This notebook shows the COVID incidence rates over time for 
* Atlanta  
* Austin  
* Boston  
* Portland  
* Orlando

The COVID incidence data is by FIPS code, which is generally reported by county. So we will look at COVID incidence data for the following counties:
* Fulton
* Travis  
* Suffolk  
* Multnomah
* Orange

The COVID incidence data comes from John Hopkins University and can be found [here]("https://github.com/CSSEGISandData/COVID-19/tree/master/csse_covid_19_data"). 

In [None]:
import pandas as pd
import datetime
import plotly.express as px
import glob
import re
import get_fips_codes
import json
import requests

In [None]:
### get the fips codes for the five cities/counties above
counties = ["Fulton", "Travis", "Suffolk", "Multnomah", "Orange"]
county_fips = get_fips_codes.get_fips(county_list=counties)

### Since at least one county will return multiple entries, filter by state also
states = ["GA", "TX", "MA", "OR", "FL"]
fips = county_fips[county_fips.State.isin(states)]

### convert fips to a numeric
fips["FIPS"] = pd.to_numeric(fips["FIPS"])

## There is an Orange County in TX also:
fips = fips[fips.FIPS != 48361]
print(fips)

Some of the earlier files do not have FIPS codes. Since they very early files and we want to look at general trends, I'll keep only the files that do have FIPS codes.

In [None]:
dpath = "/Users/christinabrady/Documents/data/COVID-19/csse_covid_19_data/csse_covid_19_daily_reports/*.csv"
covid_files = glob.glob(dpath)

covid_files = [fl for fl in covid_files if fl is not None]
has_fips = []
for fl in covid_files:
    tmp = pd.read_csv(fl)
    if "FIPS" in tmp.columns:
        has_fips.append(fl)
    else:
        pass
    
# check it
# set(covid_files) - set(has_fips)


In [None]:
## the early files don't have a calculated incident rate, 
## so we'll pull the confirmed cases and calculate it ourselves

confirmed_cases = []
for fl in has_fips:
    dt = re.search("[0-9]{2}-[0-9]{2}-[0-9]{4}", fl).group(0)
    print("working on ", dt)
    tmp = pd.read_csv(fl)
    ret = tmp.loc[tmp.FIPS.isin(fips.FIPS), ['FIPS', 'Confirmed']]
    ret["date"] = datetime.datetime.strptime(dt, "%m-%d-%Y").date()
    confirmed_cases.append(ret)

confirmed_cases = pd.concat(confirmed_cases)
confirmed_cases.FIPS = confirmed_cases.FIPS.astype("int") ### this is so we can merge on FIPS later

In [None]:
### Use Census API to pull population data for each county 
## in order to calculate incidence rate for the missing dates

base_api_call = "https://api.census.gov/data/2019/pep/population?get=DATE_DESC,POP,NAME&&for=county:%s&in=state:%s"
pop_stats = []

for code in fips.FIPS.values:
    print(code)
    code_str = str(code)
    url = base_api_call %(code_str[2:], code_str[:2])
    response = pd.read_json(url)
    pop_stats.append(response)

    

In [None]:
pop = pd.concat(pop_stats)
pop.columns = ['date_desc', 'population', 'county_name', 'state', 'county']
pop = pop[pop.date_desc.str.contains('2019')]
pop['FIPS'] = pop[['state', 'county']].agg(''.join, axis = 1)
pop.FIPS = pop.FIPS.astype("int")
pop.population = pop.population.astype("int")
pop.head()

In [None]:
cases = pd.merge(left=confirmed_cases, right=pop, how='left', left_on='FIPS', right_on='FIPS')
cases['incidence'] = cases['Confirmed']/(cases['population']/100000)
cases = cases.sort_values(['date', 'county_name'], ascending = True)
cases.head()

In [None]:
cases.date = pd.to_datetime(cases.date, format = "%Y-%m-%d")

covid_fig = px.line(cases,
                    x = 'date',
                    y = 'incidence',
                    color = 'county_name',
                    title = "Incidence of COVID-19",
                    labels = {'date': 'Date', 
                             'incidence': 'Confirmed Cases per 100,000 people'})
covid_fig.show()

In [None]:
fips["city"] = ["Orlando", "Atlanta", "Boston", "Portland", "Austin"]
for_export = cases.loc[cases.date == datetime.date(2021, 3, 15), ["FIPS", "date", "incidence"]]
for_export = for_export.merge(fips, left_on="FIPS", right_on="FIPS")

In [None]:
for_export

In [None]:
for_export.to_csv("processed_data/covid_incidence_by_city.csv")