In [174]:
import pandas as pd
import datetime
import re
import numpy as np
import sys, os
from math import pi
import re

from bokeh.plotting import output_notebook, figure, show
from bokeh.palettes import BuPu9, GnBu9, Category20c, RdYlBu, OrRd, RdBu
from bokeh.models import HoverTool, CustomJS, ColumnDataSource, BoxSelectTool, Range1d, Rect, LabelSet, BooleanFilter, BoxAnnotation, DatetimeTickFormatter, CDSView, GroupFilter, NumeralTickFormatter, Label
from bokeh.layouts import row
from bokeh.models.widgets import DataTable, DateFormatter, TableColumn
from bokeh.transform import linear_cmap

from bokeh.plotting import figure
from bokeh.transform import cumsum

output_notebook()

#Plotting options
opts = dict(width=1200, height=600, toolbar_location="above",
      tools='tap, box_zoom, pan, undo, crosshair, reset, wheel_zoom, box_select, save')


In [175]:
#Return list of cases per state and county
def populateCases(state, county, population=None):
    state = str(state)
    county = str(county)
    listToReturn = []
    for date in dfDates:
        tempValue = df[ (df['Province_State'] == state) & (df['Admin2'] == county) ][date].tolist()[0]
        if population == None:
            listToReturn.append(int(tempValue))
        else:
            listToReturn.append(float(tempValue/population))
    return listToReturn

#Return Population per state and county
def getPopulation(state, county):
    county = county + ' County'
    popSeries = dfPopulate['POPESTIMATE2019'][ (dfPopulate['STNAME'] == state) & (dfPopulate['CTYNAME'] == county) ]
    pop = popSeries.tolist()[0]
    #Adjust New York - has no affect
    if( (state=='New York') and (county=='New York County') ):
        pop = 8398748
    return pop

In [176]:
#Covid Data
gitFile = 'COVID-19/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_confirmed_US.csv'

#Population Data
popFile = 'co-est2019-alldata.csv'

#States and Counties to get data for
dataToGrab = [['Tennessee', 'Hamilton'], ['Florida', 'Palm Beach'], ['Texas', 'Dallas'], ['New York', 'New York'], 
              ['California', 'Los Angeles'], ['Georgia', 'Fulton'], ['Georgia', 'DeKalb'], ['Florida', 'Miami-Dade'], 
              ['Massachusetts', 'Suffolk'], ['New Jersey', 'Essex']
             ]

#Divide by population -T/F
divideByPopulation = True

#Read CSVs
df = pd.read_csv(gitFile)
dfPopulate = pd.read_csv(popFile, engine='python')

#Append population
for pop in dataToGrab:
    population = getPopulation(pop[0], pop[1])
    #Add population to list
    pop.append(population)
    
#Dates are embedded into the columns - Ignore first 11 columns
dfDates = list(df.columns)[11:] 

#Create empty list for storing multiple counties
countyStateCases = {}
for itm in dataToGrab:
    #Get cases per location
    if divideByPopulation:
        tempData = populateCases(itm[0], itm[1], itm[2])
    else:
        tempData = populateCases(itm[0], itm[1])
    #Combine county+_+state for key
    key = str(itm[0])+ '_' +str(itm[1])
    countyStateCases[ key ] = tempData

#Get dates as datetime
dataDates = [datetime.datetime.strptime(x, '%m/%d/%y') for x in dfDates]

#Append dates to dict
countyStateCases[ 'dates' ] = dataDates

#Convert to DF
dfToPlot = pd.DataFrame.from_dict( countyStateCases )

In [177]:
#Plot data
pltA = figure(**opts, x_axis_type="datetime")

#Column names are the counties to plot
columnCounties = dfToPlot.columns.tolist()
columnCounties.remove('dates');

#Add data from dfToPlot
for idx, location in enumerate(columnCounties, start=0):
    #Parse Name
    legendName = location.replace('_', ', ')
    #Get color
    lineColor = str(RdBu[len(columnCounties)][idx])
    #Add line
    pltA.line( x=dfToPlot['dates'], y=dfToPlot[location], color=lineColor, legend_label=legendName, alpha=0.7, line_width=2)
    pltA.scatter( x=dfToPlot['dates'], y=dfToPlot[location], color=lineColor, legend_label=legendName, alpha=0.7, line_width=2)

#Update location of the legend
pltA.legend.location = 'top_center'
pltA.legend.click_policy="hide"

#Update Title
if divideByPopulation:
    pltA.title.text = 'Cases in Select US Counties divided by Population'
    pltA.yaxis.axis_label = 'Number of Cases Divided by Population'
else:
    pltA.title.text = 'Cases in Select US Counties'
    pltA.yaxis.axis_label = 'Number of Cases'

#Add Hatch Pattern
pltA.xgrid.band_hatch_pattern = "\\"
pltA.xgrid.band_hatch_alpha = 0.4
pltA.xgrid.band_hatch_color = "lightgrey"
pltA.xgrid.band_hatch_weight = 0.4
pltA.xgrid.band_hatch_scale = 20

#Format to always show Month/Day
pltA.xaxis.formatter=DatetimeTickFormatter(days="%m/%d",
    months="%m/%d",
    hours="%m/%d",
    minutes="%m/%d")

#Add more ticks!
pltA.xaxis.ticker.desired_num_ticks = int(len(dfToPlot)/2)

show(pltA)