# Process US State-level Data from NYT

In [20]:
# Import packages
import pandas as pd
import numpy as np
from datetime import datetime
import geopandas as gpd

### Step0: Read data and get basic state groups

#### 0.0 Handle Report Data

Read updated CSV file

In [21]:
df_states_original = pd.read_csv("./us-states.csv", dtype={"date": np.string_,
                                                             "state": np.string_,
                                                             "fips": np.string_,
                                                            "cases": np.int32,
                                                            "deaths": np.int32})
df_states_original.sort_values("fips", ascending=True)

Unnamed: 0,date,state,fips,cases,deaths
2489,2020-04-17,Alabama,01,4572,151
1173,2020-03-24,Alabama,01,242,0
1994,2020-04-08,Alabama,01,2498,67
7824,2020-07-23,Alabama,01,74212,1397
1554,2020-03-31,Alabama,01,999,14
...,...,...,...,...,...
3363,2020-05-02,Virgin Islands,78,66,4
6553,2020-06-29,Virgin Islands,78,84,6
1059,2020-03-21,Virgin Islands,78,6,0
2153,2020-04-10,Virgin Islands,78,51,1


#### 0.1 Group Data to States

Transform cases/deaths data table to pivot table using fips as main index

In [22]:
pivot_states = pd.pivot_table(df_states_original , index=['state','fips'],
                       columns=['date'])
if pivot_states.isnull().values.any():
    pivot_states = pivot_states.replace(np.nan, 0).astype(int)
pivot_states.head(5)

Unnamed: 0_level_0,Unnamed: 1_level_0,cases,cases,cases,cases,cases,cases,cases,cases,cases,cases,...,deaths,deaths,deaths,deaths,deaths,deaths,deaths,deaths,deaths,deaths
Unnamed: 0_level_1,date,2020-01-21,2020-01-22,2020-01-23,2020-01-24,2020-01-25,2020-01-26,2020-01-27,2020-01-28,2020-01-29,2020-01-30,...,2020-08-04,2020-08-05,2020-08-06,2020-08-07,2020-08-08,2020-08-09,2020-08-10,2020-08-11,2020-08-12,2020-08-13
state,fips,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2,Unnamed: 22_level_2
Alabama,1,0,0,0,0,0,0,0,0,0,0,...,1666,1695,1714,1735,1755,1768,1797,1847,1882,1890
Alaska,2,0,0,0,0,0,0,0,0,0,0,...,23,23,23,23,24,24,24,24,25,25
Arizona,4,0,0,0,0,0,1,1,1,1,1,...,3850,3933,4005,4085,4140,4152,4157,4205,4348,4385
Arkansas,5,0,0,0,0,0,0,0,0,0,0,...,490,508,515,521,535,544,555,566,573,582
California,6,0,0,0,0,1,2,2,2,2,2,...,9696,9866,10014,10197,10299,10365,10460,10654,10808,10995


#### 0.2 Get Date Info

Get dates from data table

In [23]:
date= pivot_states['cases'].columns
date

Index(['2020-01-21', '2020-01-22', '2020-01-23', '2020-01-24', '2020-01-25',
       '2020-01-26', '2020-01-27', '2020-01-28', '2020-01-29', '2020-01-30',
       ...
       '2020-08-04', '2020-08-05', '2020-08-06', '2020-08-07', '2020-08-08',
       '2020-08-09', '2020-08-10', '2020-08-11', '2020-08-12', '2020-08-13'],
      dtype='object', name='date', length=206)

Sort dates and get the start/end

In [24]:
date_str = np.sort(date)
dt_str_start=np.min(date_str)
dt_str_end=np.max(date_str)

Format dates and create a list

In [25]:
dt_start = datetime.strptime(dt_str_start, "%Y-%m-%d")
dt_end = datetime.strptime(dt_str_end, "%Y-%m-%d")
dt_range = pd.date_range(start=dt_start,end=dt_end)
print(len(dt_range), dt_range)
dt_range_str = list(map(lambda x: x.strftime("%Y-%m-%d"), dt_range.tolist()))
print(len(dt_range_str), dt_range_str)

206 DatetimeIndex(['2020-01-21', '2020-01-22', '2020-01-23', '2020-01-24',
               '2020-01-25', '2020-01-26', '2020-01-27', '2020-01-28',
               '2020-01-29', '2020-01-30',
               ...
               '2020-08-04', '2020-08-05', '2020-08-06', '2020-08-07',
               '2020-08-08', '2020-08-09', '2020-08-10', '2020-08-11',
               '2020-08-12', '2020-08-13'],
              dtype='datetime64[ns]', length=206, freq='D')
206 ['2020-01-21', '2020-01-22', '2020-01-23', '2020-01-24', '2020-01-25', '2020-01-26', '2020-01-27', '2020-01-28', '2020-01-29', '2020-01-30', '2020-01-31', '2020-02-01', '2020-02-02', '2020-02-03', '2020-02-04', '2020-02-05', '2020-02-06', '2020-02-07', '2020-02-08', '2020-02-09', '2020-02-10', '2020-02-11', '2020-02-12', '2020-02-13', '2020-02-14', '2020-02-15', '2020-02-16', '2020-02-17', '2020-02-18', '2020-02-19', '2020-02-20', '2020-02-21', '2020-02-22', '2020-02-23', '2020-02-24', '2020-02-25', '2020-02-26', '2020-02-27', '2020-02-

### Step1: Add Cases Time Series, First Case Date, Death Time Series, First Death Date

Create time series of cases/deaths

In [26]:
pivot_states['cases_ts'] = pivot_states['cases'].values.tolist()
pivot_states['cases_ts'] = pivot_states['cases_ts'].apply(lambda x: ','.join(map(str, x)))
pivot_states['deaths_ts'] = pivot_states['deaths'].values.tolist()
pivot_states['deaths_ts'] = pivot_states['deaths_ts'].apply(lambda x: ','.join(map(str, x)))

Get the date of first case/death

In [27]:
pivot_states['dt_first_case'] = (pivot_states['cases'] > 0).idxmax(axis=1)
pivot_states['dt_first_death'] = (pivot_states['deaths'] > 0).idxmax(axis=1)
# For death reports, deal with no deaths counties
pivot_states.loc[pivot_states['deaths'].iloc[:, -1] <= 0, 'dt_first_death'] = np.nan
pivot_states.head(5)

Unnamed: 0_level_0,Unnamed: 1_level_0,cases,cases,cases,cases,cases,cases,cases,cases,cases,cases,...,deaths,deaths,deaths,deaths,deaths,deaths,cases_ts,deaths_ts,dt_first_case,dt_first_death
Unnamed: 0_level_1,date,2020-01-21,2020-01-22,2020-01-23,2020-01-24,2020-01-25,2020-01-26,2020-01-27,2020-01-28,2020-01-29,2020-01-30,...,2020-08-08,2020-08-09,2020-08-10,2020-08-11,2020-08-12,2020-08-13,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
state,fips,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2,Unnamed: 22_level_2
Alabama,1,0,0,0,0,0,0,0,0,0,0,...,1755,1768,1797,1847,1882,1890,"0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...","0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...",2020-03-13,2020-03-25
Alaska,2,0,0,0,0,0,0,0,0,0,0,...,24,24,24,24,25,25,"0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...","0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...",2020-03-12,2020-03-27
Arizona,4,0,0,0,0,0,1,1,1,1,1,...,4140,4152,4157,4205,4348,4385,"0,0,0,0,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,...","0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...",2020-01-26,2020-03-20
Arkansas,5,0,0,0,0,0,0,0,0,0,0,...,535,544,555,566,573,582,"0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...","0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...",2020-03-11,2020-03-24
California,6,0,0,0,0,1,2,2,2,2,2,...,10299,10365,10460,10654,10808,10995,"0,0,0,0,1,2,2,2,2,2,3,3,6,6,6,6,6,6,6,6,7,7,7,...","0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...",2020-01-25,2020-03-04


### Step2: Add today_case, today_new_case, today_death, today_new_death

Get the latest case/death

In [28]:
yesterday = date.values[-2]
pivot_states['today_case'] = pivot_states['cases'][dt_str_end]
pivot_states['today_new_case'] = pivot_states['cases'][dt_str_end] - pivot_states['cases'][yesterday]
pivot_states['today_death'] = pivot_states['deaths'][dt_str_end]
pivot_states['today_new_death'] = pivot_states['deaths'][dt_str_end] - pivot_states['deaths'][yesterday]

### Step3: Read State Boundary GeoJSON file

Read GeoJson file

In [29]:
old_states_geojson_df = gpd.read_file(r"./states_update.geojson")
old_states_geojson_df.head(5)

Unnamed: 0,REGION,DIVISION,STATEFP,STATENS,GEOID,STUSPS,NAME,LSAD,MTFCC,FUNCSTAT,...,d2020-03-19,d2020-03-20,d2020-03-21,d2020-03-22,d2020-03-23,d2020-03-24,d2020-03-25,d2020-03-26,population,geometry
0,3,5,54,1779805,54,WV,West Virginia,0,G4000,A,...,0,0,0,0,0,0,0,0,1829054,"POLYGON ((-80.51899 40.63880, -80.51934 39.721..."
1,3,5,12,294478,12,FL,Florida,0,G4000,A,...,1,1,2,2,4,3,0,6,20598139,"MULTIPOLYGON (((-82.98748 24.62538, -82.89993 ..."
2,2,3,17,1779784,17,IL,Illinois,0,G4000,A,...,3,1,1,1,5,4,3,7,12821497,"POLYGON ((-90.64284 42.50848, -87.01994 42.493..."
3,2,4,27,662849,27,MN,Minnesota,0,G4000,A,...,0,0,1,0,0,0,0,1,5527358,"POLYGON ((-97.22872 49.00056, -95.15371 48.998..."
4,3,5,24,1714934,24,MD,Maryland,0,G4000,A,...,0,1,1,0,0,1,0,0,6003435,"POLYGON ((-77.71952 39.32131, -77.76086 39.340..."


### Step4: Extract necessary columns

Only keep the necessary columns

In [30]:
report_df = pivot_states[['cases_ts','deaths_ts','dt_first_case','dt_first_death','today_case','today_new_case', 'today_death','today_new_death']]
report_df = report_df.reset_index()
report_df.columns = ['state','fips','cases_ts','deaths_ts','dt_first_case','dt_first_death','today_case','today_new_case', 'today_death','today_new_death']
report_df.head(5)

Unnamed: 0,state,fips,cases_ts,deaths_ts,dt_first_case,dt_first_death,today_case,today_new_case,today_death,today_new_death
0,Alabama,1,"0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...","0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...",2020-03-13,2020-03-25,105557,771,1890,8
1,Alaska,2,"0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...","0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...",2020-03-12,2020-03-27,4748,95,25,0
2,Arizona,4,"0,0,0,0,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,...","0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...",2020-01-26,2020-03-20,190850,1328,4385,37
3,Arkansas,5,"0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...","0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...",2020-03-11,2020-03-24,51766,652,582,9
4,California,6,"0,0,0,0,1,2,2,2,2,2,3,3,6,6,6,6,6,6,6,6,7,7,7,...","0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...",2020-01-25,2020-03-04,603212,8115,10995,187


### Step5: Assign geometries to states

Stay consistency with geoJson states

In [31]:
report_df = report_df.replace('Northern Mariana Islands','Commonwealth of the Northern Mariana Islands')

Merge case dataframe and geometry

In [32]:
final_df = pd.merge(old_states_geojson_df[["NAME", "population", "geometry"]], report_df, how='left', left_on=['NAME'], right_on = ['state'])
final_df.columns

Index(['NAME', 'population', 'geometry', 'state', 'fips', 'cases_ts',
       'deaths_ts', 'dt_first_case', 'dt_first_death', 'today_case',
       'today_new_case', 'today_death', 'today_new_death'],
      dtype='object')

### Step6: Finalize the dataframe

Keep and add essential columns

In [33]:
final_df=final_df[["NAME", "population", "fips", "dt_first_case", "dt_first_death", "cases_ts", "deaths_ts", 'today_case','today_new_case', 'today_death','today_new_death', "geometry"]]
final_df['start'] = dt_str_start
final_df['end'] = dt_str_end
final_df['dt_unit'] = "day"
final_df.head(5)

Unnamed: 0,NAME,population,fips,dt_first_case,dt_first_death,cases_ts,deaths_ts,today_case,today_new_case,today_death,today_new_death,geometry,start,end,dt_unit
0,West Virginia,1829054,54,2020-03-17,2020-03-29,"0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...","0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...",8151.0,143.0,153.0,0.0,"POLYGON ((-80.51899 40.63880, -80.51934 39.721...",2020-01-21,2020-08-13,day
1,Florida,20598139,12,2020-03-01,2020-03-06,"0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...","0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...",557129.0,6236.0,8912.0,148.0,"MULTIPOLYGON (((-82.98748 24.62538, -82.89993 ...",2020-01-21,2020-08-13,day
2,Illinois,12821497,17,2020-01-24,2020-03-17,"0,0,0,1,1,1,1,1,1,2,2,2,2,2,2,2,2,2,2,2,2,2,2,...","0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...",202577.0,1929.0,7934.0,27.0,"POLYGON ((-90.64284 42.50848, -87.01994 42.493...",2020-01-21,2020-08-13,day
3,Minnesota,5527358,27,2020-03-06,2020-03-21,"0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...","0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...",63039.0,690.0,1731.0,7.0,"POLYGON ((-97.22872 49.00056, -95.15371 48.998...",2020-01-21,2020-08-13,day
4,Maryland,6003435,24,2020-03-05,2020-03-18,"0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...","0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...",98728.0,769.0,3620.0,8.0,"POLYGON ((-77.71952 39.32131, -77.76086 39.340...",2020-01-21,2020-08-13,day


Check Samoa data

In [34]:
final_df[final_df['NAME'] == 'American Samoa']

Unnamed: 0,NAME,population,fips,dt_first_case,dt_first_death,cases_ts,deaths_ts,today_case,today_new_case,today_death,today_new_death,geometry,start,end,dt_unit
41,American Samoa,0,,,,,,,,,,"MULTIPOLYGON (((-171.14118 -11.04661, -171.111...",2020-01-21,2020-08-13,day


New York Times removed Samoa data since 4/27

In [35]:
final_df = final_df.drop(final_df[final_df['NAME'] == 'American Samoa'].index)
final_df = final_df.reset_index()
final_df

Unnamed: 0,index,NAME,population,fips,dt_first_case,dt_first_death,cases_ts,deaths_ts,today_case,today_new_case,today_death,today_new_death,geometry,start,end,dt_unit
0,0,West Virginia,1829054,54,2020-03-17,2020-03-29,"0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...","0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...",8151.0,143.0,153.0,0.0,"POLYGON ((-80.51899 40.63880, -80.51934 39.721...",2020-01-21,2020-08-13,day
1,1,Florida,20598139,12,2020-03-01,2020-03-06,"0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...","0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...",557129.0,6236.0,8912.0,148.0,"MULTIPOLYGON (((-82.98748 24.62538, -82.89993 ...",2020-01-21,2020-08-13,day
2,2,Illinois,12821497,17,2020-01-24,2020-03-17,"0,0,0,1,1,1,1,1,1,2,2,2,2,2,2,2,2,2,2,2,2,2,2,...","0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...",202577.0,1929.0,7934.0,27.0,"POLYGON ((-90.64284 42.50848, -87.01994 42.493...",2020-01-21,2020-08-13,day
3,3,Minnesota,5527358,27,2020-03-06,2020-03-21,"0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...","0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...",63039.0,690.0,1731.0,7.0,"POLYGON ((-97.22872 49.00056, -95.15371 48.998...",2020-01-21,2020-08-13,day
4,4,Maryland,6003435,24,2020-03-05,2020-03-18,"0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...","0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...",98728.0,769.0,3620.0,8.0,"POLYGON ((-77.71952 39.32131, -77.76086 39.340...",2020-01-21,2020-08-13,day
5,5,Rhode Island,1056611,44,2020-03-01,2020-03-28,"0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...","0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...",20240.0,111.0,1019.0,1.0,"MULTIPOLYGON (((-71.67881 41.15891, -71.63497 ...",2020-01-21,2020-08-13,day
6,6,Idaho,1687809,16,2020-03-13,2020-03-26,"0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...","0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...",26814.0,534.0,256.0,5.0,"POLYGON ((-116.91607 45.99536, -116.98259 46.0...",2020-01-21,2020-08-13,day
7,7,New Hampshire,1343622,33,2020-03-02,2020-03-23,"0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...","0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...",6921.0,34.0,422.0,2.0,"POLYGON ((-71.50108 45.01335, -71.49804 45.070...",2020-01-21,2020-08-13,day
8,8,North Carolina,10155624,37,2020-03-03,2020-03-25,"0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...","0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...",141006.0,1593.0,2313.0,36.0,"POLYGON ((-81.67749 36.58814, -80.12217 36.542...",2020-01-21,2020-08-13,day
9,9,Vermont,624977,50,2020-03-07,2020-03-19,"0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...","0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...",1484.0,6.0,58.0,0.0,"POLYGON ((-73.34312 45.01084, -71.46505 45.013...",2020-01-21,2020-08-13,day


Calculate weekly change rates of cases and form a time-series list

In [36]:
final_df['change_ts'] = ""
for x in range(0, len(final_df)):
    newCaseList = [0]
    changeRateList = []
    caseStrList = final_df.loc[x,'cases_ts'].split(",")
    for i in range(1, len(caseStrList)):
        dailyNewCase = float(caseStrList[i]) - float(caseStrList[i-1])
        newCaseList.append(dailyNewCase)
    for i in range(0, len(newCaseList)):
        if i < 13:
            changeRate = 0
        else:
            currentWeekSum = 0
            previousWeekSum = 0
            for j in range(0,7):
                currentWeekSum = currentWeekSum + newCaseList[i-j]
            for k in range(7,14):
                previousWeekSum = previousWeekSum + newCaseList[i-k]
            if previousWeekSum == 0 and currentWeekSum == 0:
                changeRate = 0
            elif previousWeekSum == 0 and currentWeekSum != 0:
                changeRate = currentWeekSum
            else:
                changeRate = round(currentWeekSum/previousWeekSum-1,2)
        changeRateList.append(changeRate)
        changeRateStrList = list(map(str, changeRateList))
#     print(','.join(changeRateStrList))
    final_df.loc[x,'change_ts'] = ','.join(changeRateStrList)

final_df

Unnamed: 0,index,NAME,population,fips,dt_first_case,dt_first_death,cases_ts,deaths_ts,today_case,today_new_case,today_death,today_new_death,geometry,start,end,dt_unit,change_ts
0,0,West Virginia,1829054,54,2020-03-17,2020-03-29,"0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...","0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...",8151.0,143.0,153.0,0.0,"POLYGON ((-80.51899 40.63880, -80.51934 39.721...",2020-01-21,2020-08-13,day,"0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,..."
1,1,Florida,20598139,12,2020-03-01,2020-03-06,"0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...","0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...",557129.0,6236.0,8912.0,148.0,"MULTIPOLYGON (((-82.98748 24.62538, -82.89993 ...",2020-01-21,2020-08-13,day,"0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,..."
2,2,Illinois,12821497,17,2020-01-24,2020-03-17,"0,0,0,1,1,1,1,1,1,2,2,2,2,2,2,2,2,2,2,2,2,2,2,...","0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...",202577.0,1929.0,7934.0,27.0,"POLYGON ((-90.64284 42.50848, -87.01994 42.493...",2020-01-21,2020-08-13,day,"0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,-1.0,-1...."
3,3,Minnesota,5527358,27,2020-03-06,2020-03-21,"0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...","0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...",63039.0,690.0,1731.0,7.0,"POLYGON ((-97.22872 49.00056, -95.15371 48.998...",2020-01-21,2020-08-13,day,"0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,..."
4,4,Maryland,6003435,24,2020-03-05,2020-03-18,"0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...","0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...",98728.0,769.0,3620.0,8.0,"POLYGON ((-77.71952 39.32131, -77.76086 39.340...",2020-01-21,2020-08-13,day,"0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,..."
5,5,Rhode Island,1056611,44,2020-03-01,2020-03-28,"0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...","0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...",20240.0,111.0,1019.0,1.0,"MULTIPOLYGON (((-71.67881 41.15891, -71.63497 ...",2020-01-21,2020-08-13,day,"0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,..."
6,6,Idaho,1687809,16,2020-03-13,2020-03-26,"0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...","0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...",26814.0,534.0,256.0,5.0,"POLYGON ((-116.91607 45.99536, -116.98259 46.0...",2020-01-21,2020-08-13,day,"0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,..."
7,7,New Hampshire,1343622,33,2020-03-02,2020-03-23,"0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...","0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...",6921.0,34.0,422.0,2.0,"POLYGON ((-71.50108 45.01335, -71.49804 45.070...",2020-01-21,2020-08-13,day,"0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,..."
8,8,North Carolina,10155624,37,2020-03-03,2020-03-25,"0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...","0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...",141006.0,1593.0,2313.0,36.0,"POLYGON ((-81.67749 36.58814, -80.12217 36.542...",2020-01-21,2020-08-13,day,"0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,..."
9,9,Vermont,624977,50,2020-03-07,2020-03-19,"0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...","0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...",1484.0,6.0,58.0,0.0,"POLYGON ((-73.34312 45.01084, -71.46505 45.013...",2020-01-21,2020-08-13,day,"0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,..."


### Step 7: Output file

Save file

In [37]:
final_df.to_file(r"./nyt_states_data.geojson", driver='GeoJSON', encoding='utf-8')
print("done")

done
