In [1]:
import pandas as pd
import plotly.express as px

### Nationwide Data

In [2]:
df = pd.read_csv("data/covid_19_data.csv")

In [3]:
df.head()

Unnamed: 0,SNo,ObservationDate,Province/State,Country/Region,Last Update,Confirmed,Deaths,Recovered
0,1,01/22/2020,Anhui,Mainland China,1/22/2020 17:00,1.0,0.0,0.0
1,2,01/22/2020,Beijing,Mainland China,1/22/2020 17:00,14.0,0.0,0.0
2,3,01/22/2020,Chongqing,Mainland China,1/22/2020 17:00,6.0,0.0,0.0
3,4,01/22/2020,Fujian,Mainland China,1/22/2020 17:00,1.0,0.0,0.0
4,5,01/22/2020,Gansu,Mainland China,1/22/2020 17:00,0.0,0.0,0.0


In [4]:
df[df["Country/Region"] == "Uruguay"]

Unnamed: 0,SNo,ObservationDate,Province/State,Country/Region,Last Update,Confirmed,Deaths,Recovered
5571,5572,03/14/2020,,Uruguay,2020-03-14T16:33:03,4.0,0.0,0.0
5826,5827,03/15/2020,,Uruguay,2020-03-14T16:33:03,4.0,0.0,0.0
6074,6075,03/16/2020,,Uruguay,2020-03-16T14:38:45,8.0,0.0,0.0
6311,6312,03/17/2020,,Uruguay,2020-03-17T02:13:54,29.0,0.0,0.0
6571,6572,03/18/2020,,Uruguay,2020-03-18T11:33:04,50.0,0.0,0.0
...,...,...,...,...,...,...,...,...
168840,168841,12/02/2020,,Uruguay,2020-12-03 05:28:22,6225.0,80.0,4584.0
169600,169601,12/03/2020,,Uruguay,2020-12-04 05:27:27,6455.0,80.0,4707.0
170360,170361,12/04/2020,,Uruguay,2020-12-05 05:27:36,6731.0,80.0,4826.0
171120,171121,12/05/2020,,Uruguay,2020-12-06 05:26:18,6965.0,81.0,4946.0


### Aggregated data over regions (cummulative)

In [5]:
totals = df.groupby(["Country/Region", "Province/State"]).sum().reset_index().drop("SNo", axis=1)
totals.head()

Unnamed: 0,Country/Region,Province/State,Confirmed,Deaths,Recovered
0,Australia,Australian Capital Territory,28175.0,735.0,25809.0
1,Australia,Diamond Princess cruise ship,0.0,0.0,0.0
2,Australia,External territories,0.0,0.0,0.0
3,Australia,From Diamond Princess,55.0,0.0,0.0
4,Australia,Jervis Bay Territory,0.0,0.0,0.0


In [6]:
totals[totals["Country/Region"] == "US"]

Unnamed: 0,Country/Region,Province/State,Confirmed,Deaths,Recovered
532,US,"Norfolk County, MA",7.0,0.0,0.0
533,US,Alabama,24354457.0,417250.0,0.0
534,US,"Alameda County, CA",6.0,0.0,0.0
535,US,Alaska,1700943.0,9522.0,0.0
536,US,American Samoa,0.0,0.0,0.0
...,...,...,...,...,...
726,US,"Williamson County, TN",5.0,0.0,0.0
727,US,Wisconsin,25200400.0,291222.0,8.0
728,US,Wuhan Evacuee,4.0,0.0,0.0
729,US,Wyoming,1562150.0,12045.0,0.0


### US Specific confirmed cases

In [7]:
# CONFIRMED US CASES
df_usa = pd.read_csv("data/time_series_covid_19_confirmed_US.csv")
df_usa.head()

Unnamed: 0,UID,iso2,iso3,code3,FIPS,Admin2,Province_State,Country_Region,Lat,Long_,...,11/27/20,11/28/20,11/29/20,11/30/20,12/1/20,12/2/20,12/3/20,12/4/20,12/5/20,12/6/20
0,84001001,US,USA,840,1001.0,Autauga,Alabama,US,32.539527,-86.644082,...,2716,2735,2751,2780,2818,2873,2893,2945,2979,3005
1,84001003,US,USA,840,1003.0,Baldwin,Alabama,US,30.72775,-87.722071,...,8603,8733,8820,8890,9051,9163,9341,9501,9626,9728
2,84001005,US,USA,840,1005.0,Barbour,Alabama,US,31.868263,-85.387129,...,1171,1173,1175,1178,1189,1206,1214,1217,1219,1223
3,84001007,US,USA,840,1007.0,Bibb,Alabama,US,32.996421,-87.125115,...,1173,1179,1188,1196,1204,1239,1252,1270,1283,1293
4,84001009,US,USA,840,1009.0,Blount,Alabama,US,33.982109,-86.567906,...,2888,2922,2946,2997,3061,3100,3158,3231,3281,3299


### Covid confirmed cases over time for specific State

We can use this same logic to generate filtered graphs on selection (i.e. state, date)

In [111]:
onestate = df_usa.groupby("Province_State").sum()[df_usa.columns[11:]].loc["Alabama"]
onestate

1/22/20         0
1/23/20         0
1/24/20         0
1/25/20         0
1/26/20         0
            ...  
12/2/20    256828
12/3/20    260359
12/4/20    264199
12/5/20    267589
12/6/20    269877
Name: Alabama, Length: 320, dtype: int64

In [112]:
px.line(x=onestate.index, y=onestate)

In [10]:
px.set_mapbox_access_token(open(".mapbox_token").read())
fig = px.scatter_mapbox(df_usa,
                        lat="Lat",
                        lon="Long_",
                        zoom=1)

In [11]:
fig

In [113]:
with open("geojson_states.json") as f:
    states_geojson = json.load(f)

In [114]:
len(states_geojson["features"])

51

In [115]:
list(map(lambda x: x["properties"]["name"], states_geojson["features"]))

['Alabama',
 'Alaska',
 'Arizona',
 'Arkansas',
 'California',
 'Colorado',
 'Connecticut',
 'Delaware',
 'District of Columbia',
 'Florida',
 'Georgia',
 'Hawaii',
 'Idaho',
 'Illinois',
 'Indiana',
 'Iowa',
 'Kansas',
 'Kentucky',
 'Louisiana',
 'Maine',
 'Maryland',
 'Massachusetts',
 'Michigan',
 'Minnesota',
 'Mississippi',
 'Missouri',
 'Montana',
 'Nebraska',
 'Nevada',
 'New Hampshire',
 'New Jersey',
 'New Mexico',
 'New York',
 'North Carolina',
 'North Dakota',
 'Ohio',
 'Oklahoma',
 'Oregon',
 'Pennsylvania',
 'Rhode Island',
 'South Carolina',
 'South Dakota',
 'Tennessee',
 'Texas',
 'Utah',
 'Vermont',
 'Virginia',
 'Washington',
 'West Virginia',
 'Wisconsin',
 'Wyoming']

In [116]:
states = df_usa.groupby("Province_State").sum()

In [117]:
states = states.drop(["American Samoa", "Diamond Princess", "Grand Princess", "Guam", "Puerto Rico", "Northern Mariana Islands", "Virgin Islands"])

In [118]:
states = states.reset_index().rename(columns={"Province_State": "name"})
states

Unnamed: 0,name,UID,code3,FIPS,Lat,Long_,1/22/20,1/23/20,1/24/20,1/25/20,...,11/27/20,11/28/20,11/29/20,11/30/20,12/1/20,12/2/20,12/3/20,12/4/20,12/5/20,12/6/20
0,Alabama,5796241491,57960,241491.0,2203.246784,-5809.578199,0,0,0,0,...,242874,244993,247229,249524,252900,256828,260359,264199,267589,269877
1,Alaska,2604232344,26040,232344.0,1747.177765,-4293.070291,0,0,0,0,...,30776,31444,32065,32576,33115,33802,34563,35325,36271,37036
2,Arizona,1428230216,14280,230216.0,505.138555,-1671.948482,0,0,0,0,...,318638,322774,325995,326817,337139,340979,346421,352101,358900,364276
3,Arkansas,6468550635,64680,550635.0,2618.391704,-6932.54837,0,0,0,0,...,153677,155026,156247,157359,159309,161521,164310,167137,169382,170924
4,California,5040521376,50400,521376.0,2194.949775,-7002.258461,0,0,0,0,...,1185576,1200624,1215455,1230264,1246042,1265182,1286550,1310307,1337941,1366673
5,Colorado,5544685999,55440,685999.0,2491.870366,-6751.119357,0,0,0,0,...,220953,225283,228772,232905,237310,241172,247209,252222,257347,260581
6,Connecticut,840242082,8400,242082.0,332.909441,-581.254637,0,0,0,0,...,112581,112581,112581,117295,118754,121426,126177,127715,127715,127715
7,Delaware,420200029,4200,200029.0,117.32783,-226.599712,0,0,0,0,...,34170,34670,35251,35654,36343,36698,37456,38398,39096,39912
8,District of Columbia,252181023,2520,181023.0,38.904178,-77.01656,0,0,0,0,...,20937,21308,21448,21552,21685,21842,22164,22480,22872,23136
9,Florida,5796978574,57960,978574.0,1939.030577,-5540.949842,0,0,0,0,...,979020,985297,992660,999319,1008166,1018160,1029030,1039207,1049638,1058074


In [119]:
# use custom_data parameter to send over state specific info to callback when ready for dashboard interaction
fig = px.choropleth_mapbox(states, geojson=states_geojson, locations='name', color='1/22/20', featureidkey="properties.name",
                           color_continuous_scale="Viridis",
                           range_color=(0, 12),
                           mapbox_style="carto-positron",
                           zoom=3, center = {"lat": 37.0902, "lon": -95.7129},
                           opacity=0.8,
                           labels={'unemp':'unemployment rate'})
fig.update_layout(clickmode='event+select')
fig