In [120]:
import pandas as pd
import matplotlib.pyplot as plt

Get Data 

In [121]:
url = 'https://raw.githubusercontent.com/nytimes/covid-19-data/master/us-states.csv'
data = pd.read_csv(url, error_bad_lines = False)
data.dtypes

date      object
state     object
fips       int64
cases      int64
deaths     int64
dtype: object

In [122]:
cases = data[['date', 'state', 'cases']]

In [123]:
#For sake of choro map we need to abbreviate choromap.
states = {
        'AK': 'Alaska',
        'AL': 'Alabama',
        'AR': 'Arkansas',
        'AS': 'American Samoa',
        'AZ': 'Arizona',
        'CA': 'California',
        'CO': 'Colorado',
        'CT': 'Connecticut',
        'DC': 'District of Columbia',
        'DE': 'Delaware',
        'FL': 'Florida',
        'GA': 'Georgia',
        'GU': 'Guam',
        'HI': 'Hawaii',
        'IA': 'Iowa',
        'ID': 'Idaho',
        'IL': 'Illinois',
        'IN': 'Indiana',
        'KS': 'Kansas',
        'KY': 'Kentucky',
        'LA': 'Louisiana',
        'MA': 'Massachusetts',
        'MD': 'Maryland',
        'ME': 'Maine',
        'MI': 'Michigan',
        'MN': 'Minnesota',
        'MO': 'Missouri',
        'MP': 'Northern Mariana Islands',
        'MS': 'Mississippi',
        'MT': 'Montana',
        'NA': 'National',
        'NC': 'North Carolina',
        'ND': 'North Dakota',
        'NE': 'Nebraska',
        'NH': 'New Hampshire',
        'NJ': 'New Jersey',
        'NM': 'New Mexico',
        'NV': 'Nevada',
        'NY': 'New York',
        'OH': 'Ohio',
        'OK': 'Oklahoma',
        'OR': 'Oregon',
        'PA': 'Pennsylvania',
        'PR': 'Puerto Rico',
        'RI': 'Rhode Island',
        'SC': 'South Carolina',
        'SD': 'South Dakota',
        'TN': 'Tennessee',
        'TX': 'Texas',
        'UT': 'Utah',
        'VA': 'Virginia',
        'VI': 'Virgin Islands',
        'VT': 'Vermont',
        'WA': 'Washington',
        'WI': 'Wisconsin',
        'WV': 'West Virginia',
        'WY': 'Wyoming'
}

In [124]:
#Map abbreviation to state.
states = {state: abbrev for abbrev, state in states.items()}
cases['abbrev'] = cases['state'].map(states)
cases



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



Unnamed: 0,date,state,cases,abbrev
0,2020-01-21,Washington,1,WA
1,2020-01-22,Washington,1,WA
2,2020-01-23,Washington,1,WA
3,2020-01-24,Illinois,1,IL
4,2020-01-24,Washington,1,WA
...,...,...,...,...
8204,2020-07-29,Virginia,87993,VA
8205,2020-07-29,Washington,57300,WA
8206,2020-07-29,West Virginia,6326,WV
8207,2020-07-29,Wisconsin,55119,WI


Great, we have added abbreviations to the dataframe. We will keep the state name for ease of use when merging another dataframe. Now we can plot a geographical visual that tells us the prevalence of cases in each state over a certain time period.

In [125]:
import plotly.express as px
fig = px.choropleth(cases,
                   locations='abbrev',
                   color='cases',
                   hover_name = 'abbrev',
                   locationmode = 'USA-states',
                   animation_frame = 'date')

fig.update_layout(
title_text = 'Spread of Covid-19 in the United States',
title_x = 0.5,
geo_scope = 'usa',
geo=dict(
showframe = False,
showcoastlines = False))

fig.show()

We can see the severity of cases as the virus spreads across the United States with the above visual. However, a more helpful statistic will be to measure the virus spread versus the population for each state. Then we can get a more accurate estimate of the significance it holds in each state. 

In [126]:
#Importing population data.
pop_data = pd.read_csv(r"C:\Users\19712\Documents\Python Scripts\population estimates.txt", sep = '\t', thousands = ',', header = int())
pop_data.columns = ['rank', 'state', 'population', 'percent of Tot']
pop_data.drop(['percent of Tot', 'rank'] , axis=1, inplace=True)

In [127]:
#Merge population to dataframe.
cases = data.merge(pop_data,
          how = 'left',
          left_on = 'state',
          right_on = 'state')

Now that we have a more complete dataframe we will create another visual to account for the severity of cases in each state. To do this we will use the same as above for simplicity. First, we must determine a rolling percentage of cases in each state so that we can determine the severity of the virus. 

In [128]:
#Creating percentage column in dataframe.
cases['percent'] = cases['cases'] / cases['population'] * 100

Unnamed: 0,date,state,fips,cases,deaths,population,percent
0,2020-01-21,Washington,53,1,0,7614893.0,0.000013
1,2020-01-22,Washington,53,1,0,7614893.0,0.000013
2,2020-01-23,Washington,53,1,0,7614893.0,0.000013
3,2020-01-24,Illinois,17,1,0,12671821.0,0.000008
4,2020-01-24,Washington,53,1,0,7614893.0,0.000013
...,...,...,...,...,...,...,...
8204,2020-07-29,Virginia,51,87993,2125,8535519.0,1.030904
8205,2020-07-29,Washington,53,57300,1643,7614893.0,0.752473
8206,2020-07-29,West Virginia,54,6326,112,1792147.0,0.352984
8207,2020-07-29,Wisconsin,55,55119,920,5822434.0,0.946666


In [130]:
#Add abbreviation again.
states = {state: abbrev for abbrev, state in states.items()}
cases['abbrev'] = cases['state'].map(states)
cases

Unnamed: 0,date,state,fips,cases,deaths,population,percent,abbrev
0,2020-01-21,Washington,53,1,0,7614893.0,0.000013,WA
1,2020-01-22,Washington,53,1,0,7614893.0,0.000013,WA
2,2020-01-23,Washington,53,1,0,7614893.0,0.000013,WA
3,2020-01-24,Illinois,17,1,0,12671821.0,0.000008,IL
4,2020-01-24,Washington,53,1,0,7614893.0,0.000013,WA
...,...,...,...,...,...,...,...,...
8204,2020-07-29,Virginia,51,87993,2125,8535519.0,1.030904,VA
8205,2020-07-29,Washington,53,57300,1643,7614893.0,0.752473,WA
8206,2020-07-29,West Virginia,54,6326,112,1792147.0,0.352984,WV
8207,2020-07-29,Wisconsin,55,55119,920,5822434.0,0.946666,WI


In [132]:
import plotly.express as px
fig = px.choropleth(cases,
                   locations='abbrev',
                   color='percent',
                   hover_name = 'abbrev',
                   locationmode = 'USA-states',
                   animation_frame = 'date')

fig.update_layout(
title_text = 'Spread of Covid-19 in the United States (%)',
title_x = 0.5,
geo_scope = 'usa',
geo=dict(
showframe = False,
showcoastlines = False))

fig.show()