## Imports

In [35]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import geopandas as gpd
from sklearn.preprocessing import StandardScaler
from sklearn.cluster import KMeans
from sklearn.metrics import silhouette_score

## Read-In Data

### North Atlantic Hurricane Data for the USA (1851-2020)
Source: [IBTrACS Version 4 from the National Centers for Environmental Information](https://www.ncdc.noaa.gov/ibtracs/)

In [2]:
geo_df_usa = pd.read_csv('../data/geo_usa.csv')

In [3]:
geo_df_usa.drop(columns = 'Unnamed: 0', inplace = True)

In [4]:
geo_df_usa.head(2)

Unnamed: 0,storm_id,year,number,name,iso_time,nature,latitude,longitude,wmo_wind(knots),wmo_pressure(mb),...,usa_status,usa_wind,usa_pres,storm_category,sea_height(ft),storm_speed(knots),storm_direction(degrees),geometry,state,county
0,1851175N26270,1851,5.0,NOT_NAMED,1851-06-26 00:00:00,TS,28.1667,-96.8,70.0,,...,HU,70.0,,1.0,,5.0,282.0,POINT (-96.8 28.1667),Texas,Calhoun
1,1851175N26270,1851,5.0,NOT_NAMED,1851-06-26 03:00:00,TS,28.2265,-97.0793,,,...,HU,65.0,,1.0,,5.0,284.0,POINT (-97.0793 28.2265),Texas,Refugio


### COVID-19 DATA
Source: [New York Times COVID-19 Data Hub](https://github.com/nytimes/covid-19-data)

#### Cases and Deaths for Entire USA

In [5]:
covid_current = pd.read_csv('../data/covid_current.csv')

In [6]:
covid_current.rename(columns = {'date_x':'date'}, inplace = True)

In [7]:
# Only include columns that have full data for all states and counties
covid_current = covid_current[['date', 'county', 'state', 'fips', 'cases', 'deaths']].copy()

In [8]:
covid_current.head(3)

Unnamed: 0,date,county,state,fips,cases,deaths
0,2020-08-11,Autauga,Alabama,1001.0,1215,23
1,2020-08-11,Baldwin,Alabama,1003.0,3736,32
2,2020-08-11,Barbour,Alabama,1005.0,643,6


#### Cases, Deaths, and Change in Cases per 100,000 People for States Impacted by Tropical Storms

In [9]:
covid_hurricane_states = pd.read_csv('../data/covid_hurricane_states.csv')

In [10]:
covid_hurricane_states.rename(columns = {'date_x':'current_date', 
                                         'cases':'current_cases',
                                         'deaths': 'current_deaths',
                                         'cases_per_100000': 'current_cases_per_100000',
                                         'date_y':'one_week_ago_date'}, inplace = True)

In [11]:
covid_hurricane_states.head(3)

Unnamed: 0,current_date,county,state,fips,current_cases,current_deaths,2019_population,current_cases_per_100000,one_week_ago_date,previous_cases,previous_cases_per_100000,change_in_cases,change_in_case_ratio
0,2020-08-11,Autauga,Alabama,1001.0,1215,23,55869.0,2174.7,2020-08-04,1073.0,1920.6,142.0,254.1
1,2020-08-11,Baldwin,Alabama,1003.0,3736,32,223234.0,1673.6,2020-08-04,3320.0,1487.2,416.0,186.4
2,2020-08-11,Barbour,Alabama,1005.0,643,6,24686.0,2604.7,2020-08-04,614.0,2487.2,29.0,117.5


### Hurricane and COVID-19 Data
Source: Created by aggregating historical hurricane data and current COVID-19 data

In [12]:
hurricanes_and_covid = pd.read_csv('../data/covid_and_hurricanes.csv')

In [13]:
hurricanes_and_covid.head(3)

Unnamed: 0,state,county,cat_1_count,cat_2_count,cat_3_count,cat_4_count,cat_5_count,hurricane_count,tropical_storm_count,extratropical_system_count,tropical_depression_count,low_count,subtropical_depression_count,dissipating_storm_count,cases,previous_cases,2019_population,change_in_case_ratio
0,Texas,Calhoun,2,3,1,0,0,6,2,0,5,0,0,0,546.0,493.0,21290.0,249.0
1,Texas,Refugio,3,1,2,0,0,6,5,0,1,0,0,0,234.0,214.0,6948.0,287.9
2,Texas,Bee,1,0,0,0,0,1,8,0,2,0,0,0,1246.0,669.0,32565.0,1771.8


In [14]:
hurricanes_and_covid[hurricanes_and_covid['state'] == 'Florida']

Unnamed: 0,state,county,cat_1_count,cat_2_count,cat_3_count,cat_4_count,cat_5_count,hurricane_count,tropical_storm_count,extratropical_system_count,tropical_depression_count,low_count,subtropical_depression_count,dissipating_storm_count,cases,previous_cases,2019_population,change_in_case_ratio
183,Florida,Calhoun,0,1,0,0,0,1,9,0,0,1,0,0,481.0,333.0,14105.0,1049.2
184,Florida,Pinellas,1,1,0,0,0,2,4,0,4,1,0,0,18103.0,17047.0,974996.0,108.3
185,Florida,Hillsborough,2,2,0,0,0,4,8,0,7,0,0,0,32996.0,30798.0,1471968.0,149.3
186,Florida,Polk,10,1,0,0,0,13,26,0,25,0,1,0,14645.0,13231.0,724777.0,195.1
187,Florida,Osceola,8,0,0,0,0,8,11,1,5,0,1,1,9858.0,9018.0,375751.0,223.5
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
245,Florida,Wakulla,3,0,0,0,0,3,10,0,2,0,0,0,726.0,627.0,33739.0,293.4
246,Florida,Taylor,1,1,0,0,0,2,10,0,2,0,1,0,981.0,551.0,21569.0,1993.6
247,Florida,Manatee,1,0,0,0,0,1,5,0,3,0,0,0,9395.0,8887.0,403253.0,126.0
248,Florida,Hardee,4,1,0,0,0,5,3,0,3,0,0,0,986.0,857.0,26937.0,478.9


## Read-In Geopandas Data

### USA - States

In [15]:
usa_states = gpd.read_file('../maps/states_21basic/states.shp')
usa_states.head(3)

Unnamed: 0,STATE_NAME,DRAWSEQ,STATE_FIPS,SUB_REGION,STATE_ABBR,geometry
0,Hawaii,1,15,Pacific,HI,"MULTIPOLYGON (((-160.07380 22.00418, -160.0497..."
1,Washington,2,53,Pacific,WA,"MULTIPOLYGON (((-122.40202 48.22522, -122.4628..."
2,Montana,3,30,Mountain,MT,"POLYGON ((-111.47543 44.70216, -111.48080 44.6..."


### USA - Counties

In [94]:
usa_counties = gpd.read_file('../maps/cb_2018_us_county_20m/cb_2018_us_county_20m.shp')
usa_counties.head(10)

Unnamed: 0,STATEFP,COUNTYFP,COUNTYNS,AFFGEOID,GEOID,NAME,LSAD,ALAND,AWATER,geometry
0,37,17,1026336,0500000US37017,37017,Bladen,6,2265887723,33010866,"POLYGON ((-78.90200 34.83527, -78.79960 34.850..."
1,37,167,1025844,0500000US37167,37167,Stanly,6,1023370459,25242751,"POLYGON ((-80.49737 35.20210, -80.29542 35.502..."
2,39,153,1074088,0500000US39153,39153,Summit,6,1069181981,18958267,"POLYGON ((-81.68699 41.13596, -81.68495 41.277..."
3,42,113,1213687,0500000US42113,42113,Sullivan,6,1165338428,6617028,"POLYGON ((-76.81373 41.59003, -76.22014 41.541..."
4,48,459,1384015,0500000US48459,48459,Upshur,6,1509910100,24878888,"POLYGON ((-95.15274 32.66095, -95.15211 32.902..."
5,48,49,1383810,0500000US48049,48049,Brown,6,2446120250,32375524,"POLYGON ((-99.19587 32.07923, -99.11866 32.079..."
6,45,21,1247987,0500000US45021,45021,Cherokee,6,1018732778,11714267,"POLYGON ((-81.87441 35.18352, -81.76813 35.179..."
7,1,43,161547,0500000US01043,1043,Cullman,6,1903073019,52422496,"POLYGON ((-87.11199 33.99238, -87.10991 34.299..."
8,54,23,1697238,0500000US54023,54023,Grant,6,1236390677,7545525,"POLYGON ((-79.48687 39.20596, -79.42441 39.228..."
9,46,55,1266988,0500000US46055,46055,Haakon,6,4689282658,42449843,"POLYGON ((-102.00107 44.51093, -102.00043 44.5..."


## Modeling

In [17]:
hurricanes_and_covid.head(3)

Unnamed: 0,state,county,cat_1_count,cat_2_count,cat_3_count,cat_4_count,cat_5_count,hurricane_count,tropical_storm_count,extratropical_system_count,tropical_depression_count,low_count,subtropical_depression_count,dissipating_storm_count,cases,previous_cases,2019_population,change_in_case_ratio
0,Texas,Calhoun,2,3,1,0,0,6,2,0,5,0,0,0,546.0,493.0,21290.0,249.0
1,Texas,Refugio,3,1,2,0,0,6,5,0,1,0,0,0,234.0,214.0,6948.0,287.9
2,Texas,Bee,1,0,0,0,0,1,8,0,2,0,0,0,1246.0,669.0,32565.0,1771.8


### Define Features to Be Used in Model

In [None]:
X = hurricanes_and_covid.drop(columns = ['state', 'county', 'cases', 'previous_cases', '2019_population']).dropna()

In [47]:
X_abbrev = hurricanes_and_covid[['hurricane_count', 'tropical_storm_count', 'change_in_case_ratio']].dropna()

### Scale Data

In [48]:
ss = StandardScaler()
X_scaled = ss.fit_transform(X_abbrev)

### K-Means

In [49]:
k_list = range(2,10)

In [50]:
for k in k_list:
    km = KMeans(n_clusters = k, random_state = 42)
    km.fit(X_scaled)
    print(k)
    print(f'For k = {k}, the silhouette score is:')
    print(silhouette_score(X_scaled, km.labels_))
    print(km.inertia_)

2
For k = 2, the silhouette score is:
0.7680624645268908
2784.6716329847563
3
For k = 3, the silhouette score is:
0.7716114482045713
2063.3658843050293
4
For k = 4, the silhouette score is:
0.7781218381347527
1394.8584423705718
5
For k = 5, the silhouette score is:
0.7345111711485333
859.0026998359557
6
For k = 6, the silhouette score is:
0.6806555593201773
671.6222942251882
7
For k = 7, the silhouette score is:
0.637774147249599
537.511172838637
8
For k = 8, the silhouette score is:
0.6433430992183075
447.73068857462636
9
For k = 9, the silhouette score is:
0.6405412723641776
390.92329047986385


In [61]:
km1 = KMeans(n_clusters = 5, random_state = 42)
km1.fit(X_scaled)

KMeans(algorithm='auto', copy_x=True, init='k-means++', max_iter=300,
       n_clusters=5, n_init=10, n_jobs=None, precompute_distances='auto',
       random_state=42, tol=0.0001, verbose=0)

In [62]:
km1.labels_

array([4, 4, 4, ..., 0, 0, 0], dtype=int32)

In [63]:
hurricanes_and_covid.dropna(inplace=True)

In [64]:
hurricanes_and_covid['cluster'] = km1.labels_

In [65]:
hurricanes_and_covid['cluster'].value_counts()

0    1239
4     177
1      31
3       2
2       2
Name: cluster, dtype: int64

In [68]:
km1.cluster_centers_

array([[-2.53734555e-01, -3.32070273e-01, -7.93518104e-03],
       [ 5.31963246e+00,  3.42102319e+00,  3.13045132e-02],
       [ 5.83852912e-03,  3.39253559e-01,  1.89617875e+01],
       [ 5.83852912e-03,  3.39253559e-01, -1.82915702e+01],
       [ 8.44322903e-01,  1.71766291e+00,  4.24904781e-02]])

In [79]:
# def describe_cluster(cluster_num, df = hurricanes_and_covid):
#     print(f'The mean hurricane count is: {df[df['cluster'] == cluster_num]['hurricane_count'].mean()}.')
#     print(f'The mean tropical storm count is: {df[df['cluster'] == cluster_num]['tropical_storm_count'].mean()}.')
#     print(f'The mean change in case ratio is: {df[df['cluster'] == cluster_num]['change_in_case_ratio'].mean()}.')
    

SyntaxError: invalid syntax (<ipython-input-79-a074a28ffdfe>, line 2)

In [80]:
print(hurricanes_and_covid[hurricanes_and_covid['cluster'] == 0]['hurricane_count'].mean())

print(hurricanes_and_covid[hurricanes_and_covid['cluster'] == 0]['tropical_storm_count'].mean())

print(hurricanes_and_covid[hurricanes_and_covid['cluster'] == 0]['change_in_case_ratio'].mean())

0.08635996771589992
0.7046004842615012
131.17530266343817


In [81]:
print(hurricanes_and_covid[hurricanes_and_covid['cluster'] == 1]['hurricane_count'].mean())

print(hurricanes_and_covid[hurricanes_and_covid['cluster'] == 1]['tropical_storm_count'].mean())

print(hurricanes_and_covid[hurricanes_and_covid['cluster'] == 1]['change_in_case_ratio'].mean())

8.96774193548387
10.741935483870968
248.1129032258065


In [82]:
print(hurricanes_and_covid[hurricanes_and_covid['cluster'] == 2]['hurricane_count'].mean())

print(hurricanes_and_covid[hurricanes_and_covid['cluster'] == 2]['tropical_storm_count'].mean())

print(hurricanes_and_covid[hurricanes_and_covid['cluster'] == 2]['change_in_case_ratio'].mean())

0.5
2.5
56662.55


In [83]:
print(hurricanes_and_covid[hurricanes_and_covid['cluster'] == 3]['hurricane_count'].mean())

print(hurricanes_and_covid[hurricanes_and_covid['cluster'] == 3]['tropical_storm_count'].mean())

print(hurricanes_and_covid[hurricanes_and_covid['cluster'] == 3]['change_in_case_ratio'].mean())

0.5
2.5
-54355.600000000006


In [84]:
print(hurricanes_and_covid[hurricanes_and_covid['cluster'] == 4]['hurricane_count'].mean())

print(hurricanes_and_covid[hurricanes_and_covid['cluster'] == 4]['tropical_storm_count'].mean())

print(hurricanes_and_covid[hurricanes_and_covid['cluster'] == 4]['change_in_case_ratio'].mean())

1.8361581920903955
6.186440677966102
281.44802259887


In [85]:
hurricanes_and_covid[hurricanes_and_covid['cluster'] == 2]

Unnamed: 0,state,county,cat_1_count,cat_2_count,cat_3_count,cat_4_count,cat_5_count,hurricane_count,tropical_storm_count,extratropical_system_count,tropical_depression_count,low_count,subtropical_depression_count,dissipating_storm_count,cases,previous_cases,2019_population,change_in_case_ratio,cluster
739,Virginia,Fairfax,0,0,0,0,0,0,1,0,0,0,0,0,16544.0,15987.0,24019.0,67485.6,2
881,Louisiana,Jefferson,1,0,0,0,0,1,4,0,1,1,0,0,15452.0,14795.0,31368.0,45839.5,2


In [86]:
hurricanes_and_covid[hurricanes_and_covid['cluster'] == 3]

Unnamed: 0,state,county,cat_1_count,cat_2_count,cat_3_count,cat_4_count,cat_5_count,hurricane_count,tropical_storm_count,extratropical_system_count,tropical_depression_count,low_count,subtropical_depression_count,dissipating_storm_count,cases,previous_cases,2019_population,change_in_case_ratio,cluster
738,Virginia,Fairfax,0,0,0,0,0,0,1,0,0,0,0,0,16544.0,15987.0,1147532.0,-65118.1,3
880,Louisiana,Jefferson,1,0,0,0,0,1,4,0,1,1,0,0,15452.0,14795.0,432493.0,-43593.1,3


In [88]:
covid_current[(covid_current['state'] == 'Virginia') & (covid_current['county'] == 'Fairfax')]

Unnamed: 0,date,county,state,fips,cases,deaths
2943,2020-08-11,Fairfax,Virginia,51059.0,16544,529
2944,2020-08-11,Fairfax,Virginia,51059.0,16544,529
2945,2020-08-11,Fairfax,Virginia,51059.0,16544,529
2946,2020-08-11,Fairfax,Virginia,51059.0,16544,529


In [87]:
covid_current[(covid_current['state'] == 'Louisiana') & (covid_current['county'] == 'Jefferson')]

Unnamed: 0,date,county,state,fips,cases,deaths
1140,2020-08-11,Jefferson,Louisiana,22051.0,15452,529
1141,2020-08-11,Jefferson,Louisiana,22051.0,15452,529
1142,2020-08-11,Jefferson,Louisiana,22051.0,15452,529
1143,2020-08-11,Jefferson,Louisiana,22051.0,15452,529


In [91]:
hurricanes_and_covid[(hurricanes_and_covid['state'] == 'Louisiana') & (hurricanes_and_covid['county'] == 'Jefferson')]

Unnamed: 0,state,county,cat_1_count,cat_2_count,cat_3_count,cat_4_count,cat_5_count,hurricane_count,tropical_storm_count,extratropical_system_count,tropical_depression_count,low_count,subtropical_depression_count,dissipating_storm_count,cases,previous_cases,2019_population,change_in_case_ratio,cluster
879,Louisiana,Jefferson,1,0,0,0,0,1,4,0,1,1,0,0,15452.0,14795.0,432493.0,151.9,4
880,Louisiana,Jefferson,1,0,0,0,0,1,4,0,1,1,0,0,15452.0,14795.0,432493.0,-43593.1,3
881,Louisiana,Jefferson,1,0,0,0,0,1,4,0,1,1,0,0,15452.0,14795.0,31368.0,45839.5,2
882,Louisiana,Jefferson,1,0,0,0,0,1,4,0,1,1,0,0,15452.0,14795.0,31368.0,2094.5,4


Unnamed: 0,state,county,cat_1_count,cat_2_count,cat_3_count,cat_4_count,cat_5_count,hurricane_count,tropical_storm_count,extratropical_system_count,tropical_depression_count,low_count,subtropical_depression_count,dissipating_storm_count,cases,previous_cases,2019_population,change_in_case_ratio,cluster
0,Texas,Calhoun,2,3,1,0,0,6,2,0,5,0,0,0,546.0,493.0,21290.0,249.0,4
1,Texas,Refugio,3,1,2,0,0,6,5,0,1,0,0,0,234.0,214.0,6948.0,287.9,4
2,Texas,Bee,1,0,0,0,0,1,8,0,2,0,0,0,1246.0,669.0,32565.0,1771.8,4
4,Texas,McMullen,0,0,0,0,0,1,7,0,7,0,0,0,9.0,8.0,743.0,134.6,4
5,Texas,Frio,0,0,0,0,0,0,3,0,5,4,0,0,514.0,476.0,20306.0,187.2,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1549,Wisconsin,Juneau,0,0,0,0,0,0,0,1,0,0,0,0,138.0,129.0,26687.0,33.7,0
1550,New Mexico,Luna,0,0,0,0,0,0,1,0,0,0,0,0,255.0,238.0,23709.0,71.7,0
1551,New Mexico,Lea,0,0,0,0,0,0,1,0,0,0,0,0,834.0,683.0,71070.0,212.5,0
1552,New Mexico,Eddy,0,0,0,0,0,0,1,0,4,0,0,0,318.0,267.0,58460.0,87.3,0


Unnamed: 0,storm_id,year,number,name,iso_time,nature,latitude,longitude,wmo_wind(knots),wmo_pressure(mb),...,usa_status,usa_wind,usa_pres,storm_category,sea_height(ft),storm_speed(knots),storm_direction(degrees),geometry,state,county
2281,1911221N26278,1911,49.0,NOT_NAMED,1911-08-13 06:00:00,TS,31.0,-91.6,30.0,,...,TD,30.0,,-1.0,,8.0,267.0,POINT (-91.59999999999999 31),Louisiana,Wilkinson
3018,1932262N21265,1932,53.0,NOT_NAMED,1932-09-20 00:00:00,TS,31.0,-91.5,40.0,,...,TS,40.0,,0.0,,22.0,20.0,POINT (-91.5 31),Louisiana,Wilkinson
