# Visualizing COVID-19 Growth Using A County-level Dataset

This notebook shows how to visualize the growth of COVID-19 in the United States using our county-level dataset, available [here]()

## Import Modules and Define Constants

In [1]:
import os
import numpy as np
import pandas as pd
import datetime as dt
import matplotlib.pyplot as plt
from os.path import join, exists
from collections import OrderedDict

import plotly
import plotly.figure_factory as ff

In [7]:
data_dir = '../data'
timeseries_dir = 'elia_stuff'
if not exists(timeseries_dir):
    os.mkdir(timeseries_dir)

In [8]:
df_demo = pd.read_csv(join(data_dir, 'counties.csv'), dtype=str)

In [11]:
list(df_demo.columns)

['FIPS',
 'State',
 'Area_Name',
 'Rural-urban_Continuum Code_2013',
 'Urban_Influence_Code_2013',
 'Economic_typology_2015',
 'POP_ESTIMATE_2018',
 'N_POP_CHG_2018',
 'Births_2018',
 'Deaths_2018',
 'NATURAL_INC_2018',
 'INTERNATIONAL_MIG_2018',
 'DOMESTIC_MIG_2018',
 'NET_MIG_2018',
 'RESIDUAL_2018',
 'GQ_ESTIMATES_2018',
 'R_birth_2018',
 'R_death_2018',
 'R_NATURAL_INC_2018',
 'R_INTERNATIONAL_MIG_2018',
 'R_DOMESTIC_MIG_2018',
 'R_NET_MIG_2018',
 'Less than a high school diploma 2014-18',
 'High school diploma only 2014-18',
 "Some college or associate's degree 2014-18",
 "Bachelor's degree or higher 2014-18",
 'Percent of adults with less than a high school diploma 2014-18',
 'Percent of adults with a high school diploma only 2014-18',
 "Percent of adults completing some college or associate's degree 2014-18",
 "Percent of adults with a bachelor's degree or higher 2014-18",
 'POVALL_2018',
 'CI90LBAll_2018',
 'CI90UBALL_2018',
 'PCTPOVALL_2018',
 'CI90LBALLP_2018',
 'CI90UBALLP_2

In [9]:
df_demo.head()

Unnamed: 0,FIPS,State,Area_Name,Rural-urban_Continuum Code_2013,Urban_Influence_Code_2013,Economic_typology_2015,POP_ESTIMATE_2018,N_POP_CHG_2018,Births_2018,Deaths_2018,...,Total number of UCR (Uniform Crime Report) Index crimes excluding arson.,Total number of UCR (Uniform Crime Report) index crimes reported including arson,MURDER,RAPE,ROBBERY,Number of AGGRAVATED ASSAULTS,BURGLRY,LARCENY,MOTOR VEHICLE THEFTS,ARSON
0,0,US,United States,,,,327167434,2020313,3855500,2814013,...,,,,,,,,,,
1,1000,AL,Alabama,,,,4887871,12751,57216,53425,...,,,,,,,,,,
2,1001,AL,Autauga County,2.0,2.0,0.0,55601,158,655,532,...,139.0,1765.0,3.0,15.0,34.0,87.0,447.0,1233.0,85.0,108.0
3,1003,AL,Baldwin County,3.0,2.0,5.0,218022,5403,2254,2228,...,446.0,4988.0,8.0,30.0,76.0,332.0,967.0,3829.0,192.0,31.0
4,1005,AL,Barbour County,6.0,6.0,3.0,24881,-277,261,324,...,48.0,474.0,0.0,4.0,8.0,36.0,90.0,362.0,21.0,0.0


## Load Population and Time-series Data

In [12]:
pops = pd.read_csv(join(data_dir, 'counties.csv'), dtype=str)[['FIPS', 'POP_ESTIMATE_2018']]
pops = dict(zip(pops['FIPS'], pops['POP_ESTIMATE_2018']))
# timeseries = pd.read_csv(join(data_dir, 'out_of_home_activity', 'grocery_visits.csv'), dtype=str)
timeseries = pd.read_csv(join(data_dir, 'infections_timeseries.csv'), dtype=str)
# labels = timeseries.keys()[1:]
labels = timeseries.keys()[2:]
per_what = 0 # 10000

# out-of-home activity start date is March 1, 2020
# start = dt.date(2020, 3, 1)

# infections start date is Jan 22, 2020
start = dt.date(2020, 1, 22)

## Create a Data Dictionary for Each Date

In [13]:
pops

{'00000': '327167434',
 '01000': '4887871',
 '01001': '55601',
 '01003': '218022',
 '01005': '24881',
 '01007': '22400',
 '01009': '57840',
 '01011': '10138',
 '01013': '19680',
 '01015': '114277',
 '01017': '33615',
 '01019': '26032',
 '01021': '44153',
 '01023': '12841',
 '01025': '23920',
 '01027': '13275',
 '01029': '14987',
 '01031': '51909',
 '01033': '54762',
 '01035': '12277',
 '01037': '10715',
 '01039': '36986',
 '01041': '13824',
 '01043': '83442',
 '01045': '48956',
 '01047': '38310',
 '01049': '71385',
 '01051': '81887',
 '01053': '36748',
 '01055': '102501',
 '01057': '16433',
 '01059': '31363',
 '01061': '26314',
 '01063': '8233',
 '01065': '14726',
 '01067': '17209',
 '01069': '104722',
 '01071': '51736',
 '01073': '659300',
 '01075': '13844',
 '01077': '92387',
 '01079': '32957',
 '01081': '163941',
 '01083': '96174',
 '01085': '9974',
 '01087': '18439',
 '01089': '366519',
 '01091': '19066',
 '01093': '29763',
 '01095': '96109',
 '01097': '413757',
 '01099': '21067',


In [14]:
per_what

0

In [15]:
datas = [OrderedDict() for _ in labels]
for i in range(len(datas)): # go thru dataes
    for fips, value in zip(timeseries['FIPS'], timeseries[labels[i]]): # go thru counties
        if fips in pops:
            # Get the number of visits per per_what people.
            if bool(per_what):
                datas[i][fips] = float(value) / float(pops[fips]) * per_what
            else:
                datas[i][fips] = float(value)

## Define the Bins and Colormaps

In [8]:
# Define the bounds of the bins based on quantiles of every data point.
values = np.array(sum([list(data.values()) for data in datas], []))
print(np.max(values))
binning_endpoints = [1, 5, 10, 50, 100, 500, 1000, 5000, 10000, 50000, 100000]
# max_val = np.max(values)
# step_size = 5000 # round(max_val / 15, -3)
# binning_endpoints = list(np.arange(0, max_val, step_size))
# binning_endpoints = list(np.arange(0, 10.1, 0.5))
# binning_endpoints = list(np.quantile(values, [0.5, 0.9, 0.95, 0.99, 0.999]))

# Get a color map.
cmap = plt.get_cmap('Reds')

# Define the colors based on these bins using a colormap from matplotlib.
num_points = len(binning_endpoints) + 1
colors = [(0.9, 0.9, 0.9)] + [cmap((i+1) / num_points) for i in range(num_points)]
colorscale = [f'rgb({t[0]}, {t[1]}, {t[2]})' for t in colors]

158258.0


## Save PNG Colormaps for Each Date

In [None]:
for i, data in enumerate(datas):
    # Get the ordinal 
    date = dt.date.fromordinal(start.toordinal() + i)
    datestring = date.strftime('%B ') + str(int(date.day)) + f' {date.year}'
    fig = ff.create_choropleth(
        fips=list(data.keys()),     # List of FIPS codes.
        values=list(data.values()), # Corresponding per-capita visits to grocery stores.
        binning_endpoints=binning_endpoints,
        county_outline={'color': 'rgb(255,255,255)', 'width': 0.05},
        colorscale=colorscale,
#         round_legend_values=True,
#         title=f'{datestring.rjust(15)} Grocery Visits per {per_what}'
        title=f'{datestring.rjust(15)} COVID-19 Infections'
    )
    fig.layout.template = None
    
    # Order the files by date.
#     filename = join(timeseries_dir, f'{date.isoformat()}_grocery_visits.png')
    filename = join(timeseries_dir, f'{date.isoformat()}.png')
    
    # write the image or show it.
    # fig.write_image(filename)
    if i < 5:
        fig.show()
    
    print(f'Wrote image {i + 1}/{len(datas)}')


## Convert the PNGs to a GIF using ImageMagick

Use the command-line ImageMagick tool to convert the PNGs in `timeseries` to a GIF.
```sh
convert -delay 100 -loop 0 *.png grocery_visits.gif
```

In [10]:
!convert -delay 50 -loop 0 infections_timeseries/*.png infections.gif