In [3]:
import requests
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import datetime
import folium
import geopandas
import numpy as np
import os
sns.set()

## Import data into Data Frame

We're using covidtracking.com for the data.  
Make a get request and convert the json response to a list.

In [4]:
stateData = requests.get('https://covidtracking.com/api/states/daily').json()

Take a look at the data we are getting.  
stateData is a list of dictionaries containing daily information about state covid cases. 

In [5]:
#Confirm that this is a list
print(type(stateData))

#Print out the first entry from the list
print(stateData[0])

#Confirm that the first entry is a dictionary
print(type(stateData[0]))

#Look at a value by key
print(stateData[0]['date'])


<class 'list'>
{'date': 20200423, 'state': 'AK', 'positive': 337, 'negative': 11824, 'pending': None, 'hospitalizedCurrently': 42, 'hospitalizedCumulative': None, 'inIcuCurrently': None, 'inIcuCumulative': None, 'onVentilatorCurrently': None, 'onVentilatorCumulative': None, 'recovered': 209, 'hash': '59a03ea91067d205ddc33b0e77890986c467ae17', 'dateChecked': '2020-04-23T20:00:00Z', 'death': 9, 'hospitalized': None, 'total': 12161, 'totalTestResults': 12161, 'posNeg': 12161, 'fips': '02', 'deathIncrease': 0, 'hospitalizedIncrease': 0, 'negativeIncrease': 0, 'positiveIncrease': 2, 'totalTestResultsIncrease': 2}
<class 'dict'>
20200423


Turn the data into a Pandas Data Frame and take a look at the first few entries. 

In [6]:
originalStateDF = pd.DataFrame(stateData)
originalStateDF.head()


Unnamed: 0,date,state,positive,negative,pending,hospitalizedCurrently,hospitalizedCumulative,inIcuCurrently,inIcuCumulative,onVentilatorCurrently,...,hospitalized,total,totalTestResults,posNeg,fips,deathIncrease,hospitalizedIncrease,negativeIncrease,positiveIncrease,totalTestResultsIncrease
0,20200423,AK,337.0,11824.0,,42.0,,,,,...,,12161.0,12161.0,12161.0,2,0.0,0.0,0.0,2.0,2.0
1,20200423,AL,5778.0,46863.0,,,768.0,,288.0,,...,768.0,52641.0,52641.0,52641.0,1,3.0,38.0,3568.0,313.0,3881.0
2,20200423,AR,2465.0,29125.0,,101.0,291.0,,,24.0,...,291.0,31590.0,31590.0,31590.0,5,3.0,0.0,1688.0,189.0,1877.0
3,20200423,AS,0.0,3.0,17.0,,,,,,...,,20.0,3.0,3.0,60,0.0,0.0,0.0,0.0,0.0
4,20200423,AZ,5769.0,52928.0,,699.0,,305.0,,201.0,...,,58697.0,58697.0,58697.0,4,20.0,0.0,1786.0,310.0,2096.0


In [185]:
# Make a new data frame where we will clean up some of the columns
stateDF = pd.DataFrame(stateData)
# Convert the date to a datetime
stateDF['date'] = pd.to_datetime(stateDF['date'].astype(str))


In [8]:
stateDF

Unnamed: 0,date,state,positive,negative,pending,hospitalizedCurrently,hospitalizedCumulative,inIcuCurrently,inIcuCumulative,onVentilatorCurrently,...,hospitalized,total,totalTestResults,posNeg,fips,deathIncrease,hospitalizedIncrease,negativeIncrease,positiveIncrease,totalTestResultsIncrease
0,2020-04-23,AK,337.0,11824.0,,42.0,,,,,...,,12161.0,12161.0,12161.0,02,0.0,0.0,0.0,2.0,2.0
1,2020-04-23,AL,5778.0,46863.0,,,768.0,,288.0,,...,768.0,52641.0,52641.0,52641.0,01,3.0,38.0,3568.0,313.0,3881.0
2,2020-04-23,AR,2465.0,29125.0,,101.0,291.0,,,24.0,...,291.0,31590.0,31590.0,31590.0,05,3.0,0.0,1688.0,189.0,1877.0
3,2020-04-23,AS,0.0,3.0,17.0,,,,,,...,,20.0,3.0,3.0,60,0.0,0.0,0.0,0.0,0.0
4,2020-04-23,AZ,5769.0,52928.0,,699.0,,305.0,,201.0,...,,58697.0,58697.0,58697.0,04,20.0,0.0,1786.0,310.0,2096.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2764,2020-01-26,WA,1.0,,,,,,,,...,,1.0,1.0,1.0,53,0.0,0.0,0.0,0.0,0.0
2765,2020-01-25,WA,1.0,,,,,,,,...,,1.0,1.0,1.0,53,0.0,0.0,0.0,0.0,0.0
2766,2020-01-24,WA,1.0,,,,,,,,...,,1.0,1.0,1.0,53,0.0,0.0,0.0,0.0,0.0
2767,2020-01-23,WA,1.0,,,,,,,,...,,1.0,1.0,1.0,53,0.0,0.0,0.0,0.0,0.0


## Create a dataframe with most recent data (yesterday)


In [28]:
today = pd.to_datetime(datetime.date.today())
yesterday = today -  datetime.timedelta(days=1)
yesterdayDF = stateDF[stateDF['date'] == yesterday]

## Create a map color- coding yesterday's cases
Doing it this way doesn't let us use data from the dataframe for tooltips.  But it's quick and easy.  

In [11]:
url = 'https://raw.githubusercontent.com/python-visualization/folium/master/examples/data'
state_geo = f'{url}/us-states.json'

r = requests.get(state_geo).json()
print(r['features'][1]['properties']['name'])


Alaska


In [186]:
bins = list(yesterdayDF['positive'].quantile([0, .25, .5, .75, 1]))
print(bins)

m = folium.Map(location=[48, -102], zoom_start=3)

positiveChoro = folium.Choropleth(
    name='Positive Tests',
    geo_data=state_geo,
    data=yesterdayDF,
    columns=['state', 'positive'],
    key_on='feature.id',
    fill_color='OrRd',
    fill_opacity=0.7,
    line_opacity=0.2,
    legend_name='Positive Tests',
    bins=bins,
    reset=True
)

#Add the Choropleth layer to the map
positiveChoro.add_to(m)

#Add layer control
folium.LayerControl().add_to(m)

#Save the map to an html file
m.save(os.path.join('results', 'map.html'))

#Disply the map in Jupyter Lab by calling it's object
m

[0.0, 1748.5, 4066.0, 13314.75, 263460.0]


## Add a map putting everything into a geopandas dataframe

This will give us a lot more customization options. 

First, make the geopandas dataframe by combining our state json and our covid data.  

In [63]:
geostate = geopandas.read_file(state_geo, driver='GeoJSON')

#index both dataframes on state abbreviations. 
geostate = geostate.set_index('id')
yesterdayDFindexed = yesterdayDF.set_index('state')

#An inner join will only keep rows where we have data in both sets
#This will automatically deal with things like Puerto Rico if it's in only one set
geostatedata = pd.concat([geostates, yesterdayDFindexed], axis=1, join='inner' )

#The JSON serialize won't accept a datetime type
geostatedata['date'] = geostatedata['date'].astype(str)



[0.0, 1748.5, 4066.0, 13314.75, 263460.0]


Make a colormap for our color 'scale'


In [206]:
import branca.colormap as cm
quantiles = [0, 0.25, 0.5, 0.75, 0.98, 1]
bins = list(geostatedata['positive'].quantile(quantiles))

colormap1 = cm.LinearColormap(colors=['orange', 'red'], vmin=0, vmax=1)
colormap1

#colormap returns 8 character values buy only accepts 6 characters
colors = [colormap1(quantile)[0:-2] for quantile in quantiles]

#change NY to black because it's so much worse than everywhere else
colors = colors[0:-1] + ['#000000']

colormap = cm.LinearColormap(colors=colors, index=bins,
    vmin=geostatedata.positive.min(),
    vmax=geostatedata.positive.max())


#Create a dictionay of colors because 'id' is the only property of the feature available when styling
colordict = geostatedata['positive'].apply(colormap)


colormap.caption = "Positive Covid Tests"


Make colors for total tests

In [212]:
totalbins = list(geostatedata['total'].quantile(quantiles))

colormap1 = cm.LinearColormap(colors=['white', 'blue'], vmin=0, vmax=1)

#colormap returns 8 character values buy only accepts 6 characters
colors = [colormap1(quantile)[0:-2] for quantile in quantiles]

#change NY to black because it's so much worse than everywhere else
colors = colors[0:-1] + ['#000000']

totalcolormap = cm.LinearColormap(colors=colors, index=totalbins,
    vmin=geostatedata.total.min(),
    vmax=geostatedata.total.max())


#Create a dictionay of colors because 'id' is the only property of the feature available when styling
totalcolordict = geostatedata['total'].apply(totalcolormap)


totalcolormap.caption = "Total Covid Tests"


In [214]:

statemap = folium.Map(location=[48, -102], zoom_start=3)

statelayer = folium.GeoJson(
    geostatedata,
    name='States',
    style_function=lambda feature: {
        'fillColor': 'white',
        'fillOpacity': 0,
        'color': 'black',
        'weight': 1,
    },
    tooltip=folium.GeoJsonTooltip(
        fields=['name','positive', 'total'],
        aliases=['State','Positive Tests', 'Total Tests'],
        localize=True)
    )

positivelayer = folium.GeoJson(
    geostatedata,
    name='Positive Tests',
    style_function=lambda feature: {
        'fillColor':colordict[feature['id']],
        'fillOpacity': 0.5,
        'color': 'black',
        'weight': 1,
    }
    )

totallayer = folium.GeoJson(
    geostatedata,
    name='Total Tests',
    style_function=lambda feature: {
        'fillColor':totalcolordict[feature['id']],
        'fillOpacity': 0.5,
        'color': 'black',
        'weight': 1,
    }
    )


positivelayer.add_to(statemap)
totallayer.add_to(statemap)
statelayer.add_to(statemap)

statemap.add_child(colormap)
statemap.add_child(totalcolormap)

folium.LayerControl().add_to(statemap)

# #Save the map to an html file
# m.save(os.path.join('results', 'map.html'))

# #Disply the map in Jupyter Lab by calling it's object
statemap

In [184]:
# size is total number of entries
size = stateDF.size
# shape is (number of rows, number of columns)
shape = stateDF.shape
compSize = shape[0] * shape[1]
print(shape, size, compSize)
# row labels
print("row labels: ", stateDF.index)
print("column labels: ", stateDF.columns)
print("data types: ", stateDF.dtypes)


(2769, 25) 69225 69225
row labels:  RangeIndex(start=0, stop=2769, step=1)
column labels:  Index(['date', 'state', 'positive', 'negative', 'pending',
       'hospitalizedCurrently', 'hospitalizedCumulative', 'inIcuCurrently',
       'inIcuCumulative', 'onVentilatorCurrently', 'onVentilatorCumulative',
       'recovered', 'hash', 'dateChecked', 'death', 'hospitalized', 'total',
       'totalTestResults', 'posNeg', 'fips', 'deathIncrease',
       'hospitalizedIncrease', 'negativeIncrease', 'positiveIncrease',
       'totalTestResultsIncrease'],
      dtype='object')
data types:  date                        datetime64[ns]
state                               object
positive                           float64
negative                           float64
pending                            float64
hospitalizedCurrently              float64
hospitalizedCumulative             float64
inIcuCurrently                     float64
inIcuCumulative                    float64
onVentilatorCurrently        

## Exploratory data analysis

In [None]:
# make a new data frame for just values in new mexico
nmdf = stateDF.loc[stateDF['state'] == "NM"]


In [None]:
plt.figure(figsize=(8,4))
sns.heatmap(stateDF.corr(), cmap='Greens', annot = False)

In [None]:
nmdf.plot(kind = 'scatter', x='date', y='positive', title='date verses cases')
stateDF.plot(kind = 'scatter', x='date', y='positive')

In [None]:
stateDF.info()


## Compare testing and death rates

I want to explore differences between testing rates and death rates. Some states have the similar numbers of positive tests, but very different numbers of deaths.  
I want to know if the states that have a higher death to positive test ratio are doing fewer tests.  

I want to look at the ration of positive tests to total tests and the ratio of total tests to population.  

Let's see what we find...

In [None]:
stateDF.loc[stateDF['positive' == 10]]


In [None]:
print(today())
#yesterday = stateDF[statdeDF[date] = ]

In [None]:
datestring= "March 26"
print(pd.to_datetime(datestring))
#format('%B/%d/')