In [11]:
# !pip install geopandas==0.3.0
# !pip install pyshp==1.2.10
# !pip install shapely==1.6.3

In [1]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import plotly.figure_factory as ff

In [2]:
# Read data set from nytimes github
us_county_data = pd.read_csv('https://raw.githubusercontent.com/nytimes/covid-19-data/master/us-counties.csv')
us_county_data.head()

Unnamed: 0,date,county,state,fips,cases,deaths
0,2020-01-21,Snohomish,Washington,53061.0,1,0
1,2020-01-22,Snohomish,Washington,53061.0,1,0
2,2020-01-23,Snohomish,Washington,53061.0,1,0
3,2020-01-24,Cook,Illinois,17031.0,1,0
4,2020-01-24,Snohomish,Washington,53061.0,1,0


In [3]:
# Extract only California county data
ca_county_data = us_county_data.loc[us_county_data['state'] == 'California']
ca_county_data = ca_county_data.fillna(0)
ca_county_data.head()

Unnamed: 0,date,county,state,fips,cases,deaths
5,2020-01-25,Orange,California,6059.0,1,0
9,2020-01-26,Los Angeles,California,6037.0,1,0
10,2020-01-26,Orange,California,6059.0,1,0
14,2020-01-27,Los Angeles,California,6037.0,1,0
15,2020-01-27,Orange,California,6059.0,1,0


In [4]:
# Create one table for cases and one table for deaths in California counties since 1/25/2020
ca_county_cases = pd.pivot_table(ca_county_data, values = 'cases',  index='county', columns = 'date').reset_index()
ca_county_deaths = pd.pivot_table(ca_county_data, values = 'deaths',  index='county', columns = 'date').reset_index()
ca_county_cases.head()

date,county,2020-01-25,2020-01-26,2020-01-27,2020-01-28,2020-01-29,2020-01-30,2020-01-31,2020-02-01,2020-02-02,...,2020-03-20,2020-03-21,2020-03-22,2020-03-23,2020-03-24,2020-03-25,2020-03-26,2020-03-27,2020-03-28,2020-03-29
0,Alameda,,,,,,,,,,...,67.0,87.0,127.0,139.0,151.0,156.0,199.0,241.0,277.0,291.0
1,Amador,,,,,,,,,,...,1.0,1.0,1.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0
2,Butte,,,,,,,,,,...,,1.0,2.0,3.0,4.0,4.0,5.0,5.0,5.0,5.0
3,Calaveras,,,,,,,,,,...,,,,,2.0,2.0,3.0,3.0,3.0,3.0
4,Colusa,,,,,,,,,,...,,,,,,,,1.0,1.0,1.0


In [25]:
missing_counties = {'county':['Modoc', 'Del Norte', 'Trinity', 'Tehama', 'Lassen', 'Plumas', 'Lake', 'Alpine', 'Mariposa', 'Sierra'], 
                    'fips':[6049.0, 6015.0, 6105.0, 6103.0, 6035.0, 6063.0, 6033.0, 6003.0, 6043.0, 6091.0], 
                    'cases':[1,1,1,1,1,1,1,1,1,1],
                    'deaths':[0,0,0,0,0,0,0,0,0,0],
                   'state':['California', 'California', 'California', 'California', 'California', 'California', 'California', 'California', 'California', 'California']}
missing_counties = pd.DataFrame(missing_counties)
complete = ca_county_data.append(missing_counties, sort = 'False')
complete = complete.reset_index()
complete.tail()

Unnamed: 0,index,cases,county,date,deaths,fips,state
1052,5,1,Plumas,,0,6063.0,California
1053,6,1,Lake,,0,6033.0,California
1054,7,1,Alpine,,0,6003.0,California
1055,8,1,Mariposa,,0,6043.0,California
1056,9,1,Sierra,,0,6091.0,California


In [28]:
values = complete['cases'].tolist()
fips = complete['fips'].tolist()

#endpts = list(np.mgrid[min(values):max(values):4j])
colorscale = ["#030512","#1d1d3b","#323268","#3d4b94","#3e6ab0"]
fig = ff.create_choropleth(
    fips=fips, values=values, scope=['California'], show_state_data=True,
    colorscale=colorscale, binning_endpoints=[1, 100, 1000, 2000], round_legend_values=True,
    plot_bgcolor='rgb(229,229,229)',
    paper_bgcolor='rgb(229,229,229)',
    legend_title='Cases by County',
    county_outline={'color': 'rgb(255,255,255)', 'width': 0.5},
    exponent_format=True,
)
fig.layout.template = None
fig.show()