In [1]:
import pandas as pd
import os
import geopandas as gpd
import numpy as np

In [2]:
!dir CA_Counties

 Volume in drive C has no label.
 Volume Serial Number is 2E4D-F196

 Directory of c:\Users\Pranav Lodha\Documents\Github\mean-square-webapp\CA_Counties

04/10/2021  03:26 PM    <DIR>          .
04/10/2021  03:26 PM    <DIR>          ..
02/16/2021  07:45 AM                 5 CA_Counties_TIGER2016.cpg
02/16/2021  07:45 AM            17,572 CA_Counties_TIGER2016.dbf
02/16/2021  07:45 AM               425 CA_Counties_TIGER2016.prj
02/16/2021  07:45 AM               732 CA_Counties_TIGER2016.sbn
02/16/2021  07:45 AM               164 CA_Counties_TIGER2016.sbx
02/16/2021  07:45 AM         3,852,848 CA_Counties_TIGER2016.shp
02/16/2021  07:45 AM            24,198 CA_Counties_TIGER2016.shp.xml
02/16/2021  07:45 AM               564 CA_Counties_TIGER2016.shx
02/16/2021  07:45 AM         1,395,490 CA_CovidDataJan10.csv
               9 File(s)      5,291,998 bytes
               2 Dir(s)  883,305,431,040 bytes free


## Load data

### Prep Data

In [3]:
data = pd.read_csv('UScounties\data_labeled_Apr23.csv',low_memory=False)
data.head()

Unnamed: 0,Date,Province_State,FIPS,New Cases/100k population,daily_growth_range,growth_label_estimate
0,2021-02-01,Alabama,1001,25.058619,0.0,nochange
1,2021-02-02,Alabama,1001,71.596055,46.537436,widespread
2,2021-02-03,Alabama,1001,53.697041,-17.899014,minimal
3,2021-02-04,Alabama,1001,103.81428,50.117239,widespread
4,2021-02-05,Alabama,1001,23.268718,-80.545562,minimal


In [4]:
data.tail()

Unnamed: 0,Date,Province_State,FIPS,New Cases/100k population,daily_growth_range,growth_label_estimate
289421,2021-05-01,Wyoming,56045,0.295701,-0.525658,nochange
289422,2021-05-02,Wyoming,56045,0.297334,0.001633,nochange
289423,2021-05-03,Wyoming,56045,1.180997,0.883663,nochange
289424,2021-05-04,Wyoming,56045,1.341487,0.16049,nochange
289425,2021-05-05,Wyoming,56045,2.279992,0.938506,nochange


In [5]:
data['month'] = pd.DatetimeIndex(data['Date']).month
data['week'] = pd.DatetimeIndex(data['Date']).week
data['growth_label'] = pd.Categorical(data['growth_label_estimate']).codes
data.head()

  data['week'] = pd.DatetimeIndex(data['Date']).week


Unnamed: 0,Date,Province_State,FIPS,New Cases/100k population,daily_growth_range,growth_label_estimate,month,week,growth_label
0,2021-02-01,Alabama,1001,25.058619,0.0,nochange,2,5,1
1,2021-02-02,Alabama,1001,71.596055,46.537436,widespread,2,5,2
2,2021-02-03,Alabama,1001,53.697041,-17.899014,minimal,2,5,0
3,2021-02-04,Alabama,1001,103.81428,50.117239,widespread,2,5,2
4,2021-02-05,Alabama,1001,23.268718,-80.545562,minimal,2,5,0


In [6]:
df = data.pipe(lambda x: x.assign(month=x.week)).reset_index(drop=True).pivot_table(values='New Cases/100k population', columns='week', index='FIPS', aggfunc='mean').rename_axis(None, axis=1).reset_index()
weekly_data = (df.melt(id_vars='FIPS', value_vars=data['week'].unique()).rename(columns={"variable": "week", "value": "New Cases/100k population"}))
print(weekly_data.shape)
weekly_data.head()

(43106, 3)


Unnamed: 0,FIPS,week,New Cases/100k population
0,1001,5,51.140039
1,1003,5,38.204626
2,1005,5,32.985729
3,1007,5,20.413631
4,1009,5,49.903405


In [7]:
df_labeled = data.pipe(lambda x: x.assign(month=x.week)).reset_index(drop=True).pivot_table(values='growth_label', columns='week', index='FIPS', aggfunc='last').rename_axis(None, axis=1).reset_index()
weekly_data_labeled = (df_labeled.melt(id_vars='FIPS', value_vars=data['week'].unique()).rename(columns={"variable": "week", "value": "growth_label"}))
weekly_data_labeled.head()

Unnamed: 0,FIPS,week,growth_label
0,1001,5,0
1,1003,5,0
2,1005,5,1
3,1007,5,1
4,1009,5,0


## Load Shape Data

In [8]:
us_path = os.path.join("UScounties", "UScounties.shp")
us_places = gpd.read_file(us_path)

print("ca_places_us", us_places.crs)

us_places['FIPS'] = us_places['FIPS'].apply(lambda x: int(x))
print(us_places.dtypes)
us_places.head()

ca_places_us epsg:4326
NAME            object
STATE_NAME      object
STATE_FIPS      object
CNTY_FIPS       object
FIPS             int64
geometry      geometry
dtype: object


Unnamed: 0,NAME,STATE_NAME,STATE_FIPS,CNTY_FIPS,FIPS,geometry
0,Lake of the Woods,Minnesota,27,77,27077,"POLYGON ((-95.34283 48.54668, -95.34105 48.715..."
1,Ferry,Washington,53,19,53019,"POLYGON ((-118.85163 47.94956, -118.84846 48.4..."
2,Stevens,Washington,53,65,53065,"POLYGON ((-117.43883 48.04412, -117.54219 48.0..."
3,Okanogan,Washington,53,47,53047,"POLYGON ((-118.97209 47.93915, -118.97406 47.9..."
4,Pend Oreille,Washington,53,51,53051,"POLYGON ((-117.43858 48.99992, -117.03205 48.9..."


## Merge Datasets

In [9]:
weekly_data_merged = weekly_data.merge(weekly_data_labeled, left_on=["FIPS", "week"], right_on=["FIPS", "week"], how="left")
weekly_data_merged.head()

Unnamed: 0,FIPS,week,New Cases/100k population,growth_label
0,1001,5,51.140039,0
1,1003,5,38.204626,0
2,1005,5,32.985729,1
3,1007,5,20.413631,1
4,1009,5,49.903405,0


In [10]:
us_merged = us_places.merge(weekly_data_merged, left_on="FIPS", right_on="FIPS", how="left")
key = 'New Cases/100k population'
key_2 = 'growth_label'

us_merged[key] = us_merged[key].fillna(0)
us_merged[key_2] = us_merged[key_2].fillna(0.0)

print("Merged Shape:",us_merged.shape)
print("US Places Shape:", us_places.shape)
us_merged.head()

Merged Shape: (43155, 9)
US Places Shape: (3141, 6)


Unnamed: 0,NAME,STATE_NAME,STATE_FIPS,CNTY_FIPS,FIPS,geometry,week,New Cases/100k population,growth_label
0,Lake of the Woods,Minnesota,27,77,27077,"POLYGON ((-95.34283 48.54668, -95.34105 48.715...",5,19.098549,1.0
1,Lake of the Woods,Minnesota,27,77,27077,"POLYGON ((-95.34283 48.54668, -95.34105 48.715...",6,3.81971,1.0
2,Lake of the Woods,Minnesota,27,77,27077,"POLYGON ((-95.34283 48.54668, -95.34105 48.715...",7,7.639419,1.0
3,Lake of the Woods,Minnesota,27,77,27077,"POLYGON ((-95.34283 48.54668, -95.34105 48.715...",8,26.737968,1.0
4,Lake of the Woods,Minnesota,27,77,27077,"POLYGON ((-95.34283 48.54668, -95.34105 48.715...",9,15.278839,1.0


In [11]:
print(us_places['FIPS'].nunique())
print(us_merged['FIPS'].nunique())

3141
3141


In [12]:
us_merged_filtered = us_merged[(us_merged['STATE_NAME'] != 'Alaska') & (us_merged['STATE_NAME'] != 'Hawaii')]
print("US Merged shape:", us_merged.shape)
print("US Merged Filtered shape:", us_merged_filtered.shape)
us_merged_filtered.head()

US Merged shape: (43155, 9)
US Merged Filtered shape: (43123, 9)


Unnamed: 0,NAME,STATE_NAME,STATE_FIPS,CNTY_FIPS,FIPS,geometry,week,New Cases/100k population,growth_label
0,Lake of the Woods,Minnesota,27,77,27077,"POLYGON ((-95.34283 48.54668, -95.34105 48.715...",5,19.098549,1.0
1,Lake of the Woods,Minnesota,27,77,27077,"POLYGON ((-95.34283 48.54668, -95.34105 48.715...",6,3.81971,1.0
2,Lake of the Woods,Minnesota,27,77,27077,"POLYGON ((-95.34283 48.54668, -95.34105 48.715...",7,7.639419,1.0
3,Lake of the Woods,Minnesota,27,77,27077,"POLYGON ((-95.34283 48.54668, -95.34105 48.715...",8,26.737968,1.0
4,Lake of the Woods,Minnesota,27,77,27077,"POLYGON ((-95.34283 48.54668, -95.34105 48.715...",9,15.278839,1.0


In [13]:
'''
1a. Load up to date JHU new cases for all states
1b. Filter for the current month (convert to weekly data) <-- We will only be showing current month data
2. Sarimax will have 2 weeks of forecast, using last 4 weeks of records
3. Spread Category
'''

'\n1a. Load up to date JHU new cases for all states\n1b. Filter for the current month (convert to weekly data) <-- We will only be showing current month data\n2. Sarimax will have 2 weeks of forecast, using last 4 weeks of records\n3. Spread Category\n'

## Test

In [57]:
county_names = pd.read_csv('https://raw.githubusercontent.com/kjhealy/fips-codes/master/county_fips_master.csv', encoding='cp1252')
#county_names = county_names.set_index('fips')
county_names.head()

Unnamed: 0,fips,county_name,state_abbr,state_name,long_name,sumlev,region,division,state,county,crosswalk,region_name,division_name
0,1001,Autauga County,AL,Alabama,Autauga County AL,50.0,3.0,6.0,1.0,1.0,3-6-1-1,South,East South Central
1,1003,Baldwin County,AL,Alabama,Baldwin County AL,50.0,3.0,6.0,1.0,3.0,3-6-1-3,South,East South Central
2,1005,Barbour County,AL,Alabama,Barbour County AL,50.0,3.0,6.0,1.0,5.0,3-6-1-5,South,East South Central
3,1007,Bibb County,AL,Alabama,Bibb County AL,50.0,3.0,6.0,1.0,7.0,3-6-1-7,South,East South Central
4,1009,Blount County,AL,Alabama,Blount County AL,50.0,3.0,6.0,1.0,9.0,3-6-1-9,South,East South Central


In [52]:
county_names.shape

(3146, 12)

In [62]:
print(county_names[county_names['fips']==1009].county_name.values)

['Blount County']
