In [135]:
# Various imports, setup
import pandas as pd
import numpy as np
import altair as alt
import seaborn as sns

In [136]:
################
# Read in Data #
################
# For source see '../src/data_sources'

# NOAA; 1980 to today
noaa_quakes = pd.read_csv('../data/raw/noaa_quakes.tsv', sep='\t')
noaa_quakes = noaa_quakes.drop(columns = "Search Parameters", index = 0)
noaa_quakes

Unnamed: 0,Year,Location Name,Latitude,Longitude,Focal Depth (km),Mag,Deaths,Missing,Missing Description,Total Deaths,Total Missing,Total Missing Description
1,10.0,TURKMENISTAN: NISA,38.000,58.300,18.0,7.1,,,,,,
2,11.0,TURKEY,37.800,27.400,,,,,,,,
3,17.0,"TURKEY: IZMIR, EFES, AYDIN, MANISA, ALASEHIR,...",37.850,27.300,,,,,,,,
4,23.0,GREECE,38.200,22.200,,,,,,,,
5,25.0,PAKISTAN,33.000,72.000,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...
6175,2021.0,NEW ZEALAND: GISBORNE,-37.563,179.444,20.0,7.3,,,,,,
6176,2021.0,"KERMADEC ISLANDS: S OF, RAOUL",-29.613,-177.843,55.0,7.4,,,,,,
6177,2021.0,KERMADEC ISLANDS: SSE OF RAOUL ISLAND,-29.740,-177.267,19.0,8.1,,,,,,
6178,2021.0,ALGERIA: BEJAIA,36.915,5.199,8.0,6.0,,,,,,


In [137]:
# Look at the available features

for col in noaa_quakes.columns:
    print(col)

Year
Location Name
Latitude
Longitude
Focal Depth (km)
Mag
Deaths
Missing
Missing Description
Total Deaths
Total Missing
Total Missing Description


## Convert to Log Scale for outcomes

In [138]:
noaa_quakes.shape

(6179, 12)

In [156]:
# Subset to deaths (outcome), Drop NA
quake_deaths = noaa_quakes[['Year', 'Mag', 'Total Deaths', 'Location Name']]
quake_deaths = quake_deaths.dropna(axis=0)

data_h1 = quake_deaths[quake_deaths['Total Deaths'].notna()]
data_h1.shape

(1620, 4)

In [158]:
quake_deaths.shape

(1620, 4)

In [141]:
quake_deaths["Log Total Deaths"] = np.log(noaa_quakes["Total Deaths"])

In [142]:
quake_deaths.head()

Unnamed: 0,Year,Mag,Total Deaths,Location Name,Log Total Deaths
26,115.0,7.5,260000.0,TURKEY: ANTAKYA (ANTIOCH),12.468437
45,294.0,5.5,101.0,CHINA: BEIJING,4.615121
67,365.0,8.0,5000.0,GREECE: CRETE: KNOSSOS,8.517193
96,512.0,7.5,5130.0,CHINA: SHAANXI PROVINCE: YINGXIAN,8.542861
101,525.0,7.0,250000.0,"TURKEY: ANTAKYA (ANTIOCH), SAMANDAG",12.429216


## Bin Magnitudes and Deaths

In [147]:
magbins = np.arange(start=0.5, stop=10, step=0.5)


quake_deaths['Magnitude Bin'] = (np.digitize(quake_deaths['Mag'], magbins, right=True)+1)/2

quake_deaths.head()

Unnamed: 0,Year,Mag,Total Deaths,Location Name,Log Total Deaths,Magnitude Bin
26,115.0,7.5,260000.0,TURKEY: ANTAKYA (ANTIOCH),12.468437,7.5
45,294.0,5.5,101.0,CHINA: BEIJING,4.615121,5.5
67,365.0,8.0,5000.0,GREECE: CRETE: KNOSSOS,8.517193,8.0
96,512.0,7.5,5130.0,CHINA: SHAANXI PROVINCE: YINGXIAN,8.542861,7.5
101,525.0,7.0,250000.0,"TURKEY: ANTAKYA (ANTIOCH), SAMANDAG",12.429216,7.0


In [153]:
quake_deaths['Magnitude Bin']

# deathbins = np.arange(start = 1000, stop = 830000, step = 1000)

# quake_deaths['Total Deaths Under (Bin)'] = (np.digitize(quake_deaths['Total Deaths'], deathbins, right=True))

# quake_deaths.head()

26      7.5
45      5.5
67      8.0
96      7.5
101     7.0
       ... 
6165    6.5
6166    6.5
6171    6.0
6173    5.5
6179    5.5
Name: Magnitude Bin, Length: 1620, dtype: float64

In [145]:
alt.data_transformers.enable('default', max_rows=None)


DataTransformerRegistry.enable('default')

In [162]:
alt.Chart(quake_deaths).mark_rect().encode(
    x='Year:N',
    y=alt.Y('Magnitude Bin:N', scale=alt.Scale(reverse=True)),
    color=alt.Color('Total Deaths:Q', scale=alt.Scale(scheme='reds'))
)