## Imports

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import geopandas as gpd
from sklearn.preprocessing import StandardScaler
from sklearn.cluster import KMeans
from sklearn.metrics import silhouette_score

## Read-In Data

### North Atlantic Hurricane Data for the USA (1851-2020)
Source: [IBTrACS Version 4 from the National Centers for Environmental Information](https://www.ncdc.noaa.gov/ibtracs/)

In [2]:
geo_df_usa = pd.read_csv('../data/geo_usa.csv')

In [3]:
geo_df_usa.drop(columns = 'Unnamed: 0', inplace = True)

In [4]:
geo_df_usa.head(2)

Unnamed: 0,storm_id,year,number,name,iso_time,nature,latitude,longitude,wmo_wind(knots),wmo_pressure(mb),...,usa_status,usa_wind,usa_pres,storm_category,sea_height(ft),storm_speed(knots),storm_direction(degrees),geometry,state,county
0,1851175N26270,1851,5.0,NOT_NAMED,1851-06-26 00:00:00,TS,28.1667,-96.8,70.0,,...,HU,70.0,,1.0,,5.0,282.0,POINT (-96.8 28.1667),Texas,Calhoun
1,1851175N26270,1851,5.0,NOT_NAMED,1851-06-26 03:00:00,TS,28.2265,-97.0793,,,...,HU,65.0,,1.0,,5.0,284.0,POINT (-97.0793 28.2265),Texas,Refugio


### COVID-19 DATA
Source: [New York Times COVID-19 Data Hub](https://github.com/nytimes/covid-19-data)

#### Cases and Deaths for Entire USA

In [5]:
covid_current = pd.read_csv('../data/covid_current.csv')

In [6]:
covid_current.rename(columns = {'date_x':'date'}, inplace = True)

In [7]:
# Only include columns that have full data for all states and counties
covid_current = covid_current[['date', 'county', 'state', 'fips', 'cases', 'deaths']].copy()

In [8]:
covid_current.head(3)

Unnamed: 0,date,county,state,fips,cases,deaths
0,2020-08-11,Autauga,Alabama,1001.0,1215,23
1,2020-08-11,Baldwin,Alabama,1003.0,3736,32
2,2020-08-11,Barbour,Alabama,1005.0,643,6


#### Cases, Deaths, and Change in Cases per 100,000 People for States Impacted by Tropical Storms

In [9]:
covid_hurricane_states = pd.read_csv('../data/covid_hurricane_states.csv')

In [10]:
covid_hurricane_states.rename(columns = {'date_x':'current_date', 
                                         'cases':'current_cases',
                                         'deaths': 'current_deaths',
                                         'cases_per_100000': 'current_cases_per_100000',
                                         'date_y':'one_week_ago_date'}, inplace = True)

In [11]:
covid_hurricane_states.head(3)

Unnamed: 0,current_date,county,state,fips,current_cases,current_deaths,2019_population,current_cases_per_100000,one_week_ago_date,previous_cases,previous_cases_per_100000,change_in_cases,change_in_case_ratio
0,2020-08-11,Autauga,Alabama,1001.0,1215,23,55869.0,2174.7,2020-08-04,1073.0,1920.6,142.0,254.1
1,2020-08-11,Baldwin,Alabama,1003.0,3736,32,223234.0,1673.6,2020-08-04,3320.0,1487.2,416.0,186.4
2,2020-08-11,Barbour,Alabama,1005.0,643,6,24686.0,2604.7,2020-08-04,614.0,2487.2,29.0,117.5


### Hurricane and COVID-19 Data
Source: Created by aggregating historical hurricane data and current COVID-19 data

In [12]:
hurricanes_and_covid = pd.read_csv('../data/covid_and_hurricanes.csv')

In [13]:
hurricanes_and_covid.head(3)

Unnamed: 0,state,county,cat_1_count,cat_2_count,cat_3_count,cat_4_count,cat_5_count,hurricane_count,tropical_storm_count,extratropical_system_count,tropical_depression_count,low_count,subtropical_depression_count,dissipating_storm_count,cases,previous_cases,2019_population,change_in_case_ratio
0,Texas,Calhoun,2,3,1,0,0,6,2,0,5,0,0,0,546.0,493.0,21290.0,249.0
1,Texas,Refugio,3,1,2,0,0,6,5,0,1,0,0,0,234.0,214.0,6948.0,287.9
2,Texas,Bee,1,0,0,0,0,1,8,0,2,0,0,0,1246.0,669.0,32565.0,1771.8


In [14]:
hurricanes_and_covid[hurricanes_and_covid['state'] == 'Florida']

Unnamed: 0,state,county,cat_1_count,cat_2_count,cat_3_count,cat_4_count,cat_5_count,hurricane_count,tropical_storm_count,extratropical_system_count,tropical_depression_count,low_count,subtropical_depression_count,dissipating_storm_count,cases,previous_cases,2019_population,change_in_case_ratio
183,Florida,Calhoun,0,1,0,0,0,1,9,0,0,1,0,0,481.0,333.0,14105.0,1049.2
184,Florida,Pinellas,1,1,0,0,0,2,4,0,4,1,0,0,18103.0,17047.0,974996.0,108.3
185,Florida,Hillsborough,2,2,0,0,0,4,8,0,7,0,0,0,32996.0,30798.0,1471968.0,149.3
186,Florida,Polk,10,1,0,0,0,13,26,0,25,0,1,0,14645.0,13231.0,724777.0,195.1
187,Florida,Osceola,8,0,0,0,0,8,11,1,5,0,1,1,9858.0,9018.0,375751.0,223.5
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
245,Florida,Wakulla,3,0,0,0,0,3,10,0,2,0,0,0,726.0,627.0,33739.0,293.4
246,Florida,Taylor,1,1,0,0,0,2,10,0,2,0,1,0,981.0,551.0,21569.0,1993.6
247,Florida,Manatee,1,0,0,0,0,1,5,0,3,0,0,0,9395.0,8887.0,403253.0,126.0
248,Florida,Hardee,4,1,0,0,0,5,3,0,3,0,0,0,986.0,857.0,26937.0,478.9


## Read-In Geopandas Data

### USA - States

In [15]:
usa_states = gpd.read_file('../maps/states_21basic/states.shp')
usa_states.head(3)

Unnamed: 0,STATE_NAME,DRAWSEQ,STATE_FIPS,SUB_REGION,STATE_ABBR,geometry
0,Hawaii,1,15,Pacific,HI,"MULTIPOLYGON (((-160.07380 22.00418, -160.0497..."
1,Washington,2,53,Pacific,WA,"MULTIPOLYGON (((-122.40202 48.22522, -122.4628..."
2,Montana,3,30,Mountain,MT,"POLYGON ((-111.47543 44.70216, -111.48080 44.6..."


### USA - Counties

In [16]:
usa_counties = gpd.read_file('../maps/cb_2018_us_county_20m/cb_2018_us_county_20m.shp')
usa_counties.head(10)

Unnamed: 0,STATEFP,COUNTYFP,COUNTYNS,AFFGEOID,GEOID,NAME,LSAD,ALAND,AWATER,geometry
0,37,17,1026336,0500000US37017,37017,Bladen,6,2265887723,33010866,"POLYGON ((-78.90200 34.83527, -78.79960 34.850..."
1,37,167,1025844,0500000US37167,37167,Stanly,6,1023370459,25242751,"POLYGON ((-80.49737 35.20210, -80.29542 35.502..."
2,39,153,1074088,0500000US39153,39153,Summit,6,1069181981,18958267,"POLYGON ((-81.68699 41.13596, -81.68495 41.277..."
3,42,113,1213687,0500000US42113,42113,Sullivan,6,1165338428,6617028,"POLYGON ((-76.81373 41.59003, -76.22014 41.541..."
4,48,459,1384015,0500000US48459,48459,Upshur,6,1509910100,24878888,"POLYGON ((-95.15274 32.66095, -95.15211 32.902..."
5,48,49,1383810,0500000US48049,48049,Brown,6,2446120250,32375524,"POLYGON ((-99.19587 32.07923, -99.11866 32.079..."
6,45,21,1247987,0500000US45021,45021,Cherokee,6,1018732778,11714267,"POLYGON ((-81.87441 35.18352, -81.76813 35.179..."
7,1,43,161547,0500000US01043,1043,Cullman,6,1903073019,52422496,"POLYGON ((-87.11199 33.99238, -87.10991 34.299..."
8,54,23,1697238,0500000US54023,54023,Grant,6,1236390677,7545525,"POLYGON ((-79.48687 39.20596, -79.42441 39.228..."
9,46,55,1266988,0500000US46055,46055,Haakon,6,4689282658,42449843,"POLYGON ((-102.00107 44.51093, -102.00043 44.5..."


## Modeling

In [17]:
hurricanes_and_covid.head(3)

Unnamed: 0,state,county,cat_1_count,cat_2_count,cat_3_count,cat_4_count,cat_5_count,hurricane_count,tropical_storm_count,extratropical_system_count,tropical_depression_count,low_count,subtropical_depression_count,dissipating_storm_count,cases,previous_cases,2019_population,change_in_case_ratio
0,Texas,Calhoun,2,3,1,0,0,6,2,0,5,0,0,0,546.0,493.0,21290.0,249.0
1,Texas,Refugio,3,1,2,0,0,6,5,0,1,0,0,0,234.0,214.0,6948.0,287.9
2,Texas,Bee,1,0,0,0,0,1,8,0,2,0,0,0,1246.0,669.0,32565.0,1771.8


### Define Features to Be Used in Model

In [18]:
X = hurricanes_and_covid.drop(columns = ['state', 'county', 'cases', 'previous_cases', '2019_population']).dropna()

In [91]:
hurricanes_and_covid.columns

Index(['state', 'county', 'cat_1_count', 'cat_2_count', 'cat_3_count',
       'cat_4_count', 'cat_5_count', 'hurricane_count', 'tropical_storm_count',
       'extratropical_system_count', 'tropical_depression_count', 'low_count',
       'subtropical_depression_count', 'dissipating_storm_count', 'cases',
       'previous_cases', '2019_population', 'change_in_case_ratio', 'cluster'],
      dtype='object')

In [92]:
X_hurricanes = hurricanes_and_covid[['cat_1_count', 'cat_2_count', 'cat_3_count',
       'cat_4_count', 'cat_5_count', 'hurricane_count', 'tropical_storm_count',
       'extratropical_system_count', 'tropical_depression_count', 'low_count',
       'subtropical_depression_count', 'dissipating_storm_count']].dropna()

### Scale Data

In [93]:
ss = StandardScaler()
X_scaled = ss.fit_transform(X_hurricanes)

### K-Means

In [94]:
k_list = range(2,10)

In [95]:
for k in k_list:
    km = KMeans(n_clusters = k, random_state = 42)
    km.fit(X_scaled)
    print(k)
    print(f'For k = {k}, the silhouette score is:')
    print(silhouette_score(X_scaled, km.labels_))
    print(km.inertia_)

2
For k = 2, the silhouette score is:
0.7923402349921117
14143.751001715638
3
For k = 3, the silhouette score is:
0.6321125978140619
12486.813953973957
4
For k = 4, the silhouette score is:
0.6353705670121851
10958.665439196408
5
For k = 5, the silhouette score is:
0.6341102844297157
9676.483559866983
6
For k = 6, the silhouette score is:
0.6069307803384496
8739.130203145232
7
For k = 7, the silhouette score is:
0.4583062112087441
7951.0137686110775
8
For k = 8, the silhouette score is:
0.4686647346379312
7010.926092332411
9
For k = 9, the silhouette score is:
0.4707147869632949
6412.424434547318


In [96]:
km1 = KMeans(n_clusters = 6, random_state = 42)
km1.fit(X_scaled);

In [97]:
km1.labels_

array([1, 1, 0, ..., 0, 0, 0], dtype=int32)

In [98]:
hurricanes_and_covid.dropna(inplace=True)

In [99]:
hurricanes_and_covid['cluster'] = km1.labels_

In [100]:
hurricanes_and_covid['cluster'].value_counts()

0    1360
1      79
5      54
2      12
3       4
4       1
Name: cluster, dtype: int64

In [101]:
km1.cluster_centers_

array([[-2.19190721e-01, -1.78162984e-01, -1.03095883e-01,
        -8.89069404e-02, -2.57427761e-02, -2.30207347e-01,
        -1.98082432e-01, -2.84793755e-02, -1.07159651e-01,
        -1.05494412e-02, -1.79509243e-01, -5.15368072e-02],
       [ 2.53394662e+00,  1.94760582e+00,  8.33676981e-01,
        -8.89069404e-02, -2.57427761e-02,  2.49886142e+00,
         2.14285546e+00,  2.77453706e-01,  1.22347542e+00,
         1.33130472e-01, -3.03298501e-02, -5.15368072e-02],
       [ 3.89361368e+00,  4.68699526e+00,  4.58763843e+00,
         1.03527193e+01, -2.57427761e-02,  5.53663409e+00,
         2.56232708e+00,  6.37924239e-02,  2.17636610e+00,
         1.02476713e+00,  1.47856647e-01, -5.15368072e-02],
       [ 4.14054172e+00,  4.51246964e+00,  5.52910813e+00,
         2.14858439e+00, -2.57427761e-02,  5.02786996e+00,
         4.64687987e+00, -2.89699540e-01,  2.09350604e+00,
        -2.97149950e-01,  8.02588427e-01,  1.94036079e+01],
       [-3.04163017e-01, -1.99722031e-01,  3.6461687

In [102]:
def describe_cluster(cluster_num, df = hurricanes_and_covid, col = 'cluster', outcome1 = 'hurricane_count', outcome2 = 'tropical_storm_count', outcome3 = 'change_in_case_ratio'):
    print(f'The mean hurricane count is: {df[df[col] == cluster_num][outcome1].mean()}.')
    print(f'The mean tropical storm count is: {df[df[col] == cluster_num][outcome2].mean()}.')
    print(f'The mean change in case ratio is: {df[df[col] == cluster_num][outcome3].mean()}.')

In [103]:
for i in range(0, hurricanes_and_covid['cluster'].nunique()):
    print(f'Cluster {i}:')
    describe_cluster(i)
    print()

Cluster 0:
The mean hurricane count is: 0.1375.
The mean tropical storm count is: 1.0794117647058823.
The mean change in case ratio is: 139.2936029411765.

Cluster 1:
The mean hurricane count is: 4.6075949367088604.
The mean tropical storm count is: 7.443037974683544.
The mean change in case ratio is: 269.746835443038.

Cluster 2:
The mean hurricane count is: 9.583333333333334.
The mean tropical storm count is: 8.583333333333334.
The mean change in case ratio is: 220.12500000000003.

Cluster 3:
The mean hurricane count is: 8.75.
The mean tropical storm count is: 14.25.
The mean change in case ratio is: 196.52499999999998.

Cluster 4:
The mean hurricane count is: 2.0.
The mean tropical storm count is: 5.0.
The mean change in case ratio is: 121.79999999999997.

Cluster 5:
The mean hurricane count is: 1.3703703703703705.
The mean tropical storm count is: 4.111111111111111.
The mean change in case ratio is: 242.7907407407408.



In [69]:
hurricanes_and_covid[hurricanes_and_covid['change_in_case_ratio'] > 194.775]

Unnamed: 0,state,county,cat_1_count,cat_2_count,cat_3_count,cat_4_count,cat_5_count,hurricane_count,tropical_storm_count,extratropical_system_count,tropical_depression_count,low_count,subtropical_depression_count,dissipating_storm_count,cases,previous_cases,2019_population,change_in_case_ratio,cluster
0,Texas,Calhoun,2,3,1,0,0,6,2,0,5,0,0,0,546.0,493.0,21290.0,249.0,3
1,Texas,Refugio,3,1,2,0,0,6,5,0,1,0,0,0,234.0,214.0,6948.0,287.9,3
2,Texas,Bee,1,0,0,0,0,1,8,0,2,0,0,0,1246.0,669.0,32565.0,1771.8,2
6,Texas,Uvalde,0,0,0,0,0,0,2,0,0,0,0,0,555.0,501.0,26741.0,202.0,4
7,Texas,Real,0,0,0,0,0,0,2,0,0,0,0,0,88.0,77.0,3452.0,318.6,4
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1467,Oklahoma,Le Flore,0,0,0,0,0,0,0,0,2,0,0,0,360.0,261.0,49853.0,198.6,0
1477,Oklahoma,Hughes,0,0,0,0,0,0,1,0,2,0,0,0,141.0,115.0,13279.0,195.8,4
1480,Oklahoma,Pittsburg,0,0,0,0,0,0,0,0,2,1,0,0,377.0,214.0,43654.0,373.4,4
1519,Iowa,Clinton,0,0,0,0,0,0,0,0,0,0,0,0,416.0,321.0,46429.0,204.6,4


In [80]:
geo_df_usa.head()

Unnamed: 0,storm_id,year,number,name,iso_time,nature,latitude,longitude,wmo_wind(knots),wmo_pressure(mb),...,usa_status,usa_wind,usa_pres,storm_category,sea_height(ft),storm_speed(knots),storm_direction(degrees),geometry,state,county
0,1851175N26270,1851,5.0,NOT_NAMED,1851-06-26 00:00:00,TS,28.1667,-96.8,70.0,,...,HU,70.0,,1.0,,5.0,282.0,POINT (-96.8 28.1667),Texas,Calhoun
1,1851175N26270,1851,5.0,NOT_NAMED,1851-06-26 03:00:00,TS,28.2265,-97.0793,,,...,HU,65.0,,1.0,,5.0,284.0,POINT (-97.0793 28.2265),Texas,Refugio
2,1851175N26270,1851,5.0,NOT_NAMED,1851-06-26 06:00:00,TS,28.3,-97.4,60.0,,...,TS,60.0,,0.0,,6.0,283.0,POINT (-97.40000000000001 28.3),Texas,Refugio
3,1851175N26270,1851,5.0,NOT_NAMED,1851-06-26 09:00:00,TS,28.3617,-97.7476,,,...,TS,60.0,,0.0,,6.0,282.0,POINT (-97.74760000000001 28.3617),Texas,Bee
4,1851175N26270,1851,5.0,NOT_NAMED,1851-06-26 12:00:00,TS,28.4333,-98.1,60.0,,...,TS,60.0,,0.0,,6.0,284.0,POINT (-98.09999999999999 28.4333),Texas,Live Oak


In [77]:
#df = pd.read_csv('../../../../../../../Downloads/ibtracs.NA.list.v04r00.csv')

  interactivity=interactivity, compiler=compiler, result=result)
