In [1]:
import pandas as pd
from datetime import date, timedelta
import json

pd.set_option('display.expand_frame_repr', False)
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

In [11]:
state = 'Kentucky'
state_fips = '21'
today = str(date.today())
print("Today's date:", today)

cnty_join = pd.read_csv('../data/tl_2019_us_county.csv',dtype={'STATEFP':str,'COUNTYFP':str,'GEOID':str})
cnty_join = cnty_join.loc[cnty_join['STATEFP'] == state_fips]
cnty_join = cnty_join[['STATEFP','COUNTYFP','GEOID','NAMELSAD']]

case_data = pd.read_csv('../../data/case-data/20200328-nytime-us-counties.csv',dtype={'fips':str})
state_cases = case_data.loc[case_data['state'] == state]
state_cases['date'] = pd.to_datetime(state_cases['date'], format='%Y-%m-%d', errors='ignore')

cases_by_day = state_cases.groupby('date').agg({'cases':'sum','deaths':'sum'}).reset_index()
most_recent = cases_by_day.sort_values('date',ascending=False).iloc[0]

print('Total cases reported for',state,':',most_recent['cases'])
print('Total deaths reported for',state,':',most_recent['deaths'])
print('Most recent case confirmation date:',most_recent['date'])
display(cases_by_day)

Today's date: 2020-03-28
Total cases reported for Kentucky : 306
Total deaths reported for Kentucky : 8
Most recent case confirmation date: 2020-03-27 00:00:00


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  if sys.path[0] == '':


Unnamed: 0,date,cases,deaths
0,2020-03-06,1,0
1,2020-03-07,1,0
2,2020-03-08,4,0
3,2020-03-09,5,0
4,2020-03-10,7,0
5,2020-03-11,7,0
6,2020-03-12,11,0
7,2020-03-13,11,0
8,2020-03-14,18,0
9,2020-03-15,21,0


## County + day dataset

In addition to the above cases per county per day, we need to record county + days that have no cases or deaths so we can get a good time series.

In [12]:
fullDateTime.dtypes

date          datetime64[ns]
fips                  object
county                object
date_str              object
join_field            object
dtype: object

In [13]:
covid_start = date(2020, 1, 21)   # start date
today = date.today()   # end date
delta = today - covid_start       # as timedelta
delta

days = []
for i in range(delta.days + 1):
    day = covid_start + timedelta(days=i)
    days.append(day)
    
fullDateTime = pd.DataFrame(columns=['date','fips','county'])
for day in days:
    for index, row in cnty_join.iterrows():
        fullDateTime = fullDateTime.append({'date': day, 
                                            'fips': row['GEOID'], 
                                            'county': row['NAMELSAD']}, ignore_index=True)

fullDateTime['date'] = pd.to_datetime(fullDateTime['date'], format='%Y-%m-%d', errors='coerce')

fullDateTime['date_str'] = fullDateTime['date'].dt.strftime('%Y-%m-%d')
fullDateTime['join_field'] = fullDateTime['date_str'] + '-' + fullDateTime['fips']

by_county = state_cases
by_county['date_str'] = by_county['date'].dt.strftime('%Y-%m-%d')
by_county['join_field'] = by_county['date_str'] + '-' + by_county['fips']
    
fullDT_joined = fullDateTime.merge(by_county,how='left',on='join_field')
fullDT_joined = fullDT_joined.fillna(0)

fullDT_joined = fullDT_joined[['date_x','fips_x','county_x','cases','deaths']]
fullDT_joined.rename(columns={'date_x':'date','fips_x':'fips','county_x':'county'}, inplace=True)

fullDT_joined.to_csv('../app/assets/data/'+str(today)+'-'+state+'-export.csv',index=False)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


In [14]:
by_county.loc[by_county['fips'].isna()]

Unnamed: 0,date,county,state,fips,cases,deaths,date_str,join_field
4680,2020-03-19,Unknown,Kentucky,,1,0,2020-03-19,
5494,2020-03-20,Unknown,Kentucky,,1,0,2020-03-20,
6442,2020-03-21,Unknown,Kentucky,,5,0,2020-03-21,
7516,2020-03-22,Unknown,Kentucky,,6,0,2020-03-22,
8679,2020-03-23,Unknown,Kentucky,,9,0,2020-03-23,
9971,2020-03-24,Unknown,Kentucky,,18,0,2020-03-24,
11377,2020-03-25,Unknown,Kentucky,,36,0,2020-03-25,
12959,2020-03-26,Unknown,Kentucky,,42,0,2020-03-26,
14675,2020-03-27,Unknown,Kentucky,,45,0,2020-03-27,


In [5]:
by_county.groupby('date')['cases'].sum()

date
2020-03-06      1
2020-03-07      1
2020-03-08      4
2020-03-09      5
2020-03-10      7
2020-03-11      7
2020-03-12     11
2020-03-13     11
2020-03-14     18
2020-03-15     21
2020-03-16     22
2020-03-17     26
2020-03-18     35
2020-03-19     47
2020-03-20     63
2020-03-21     84
2020-03-22    103
2020-03-23    124
2020-03-24    162
2020-03-25    203
Name: cases, dtype: int64

In [7]:
fullDT_joined.groupby('date')['cases'].sum()

date
2020-01-21      0.0
2020-01-22      0.0
2020-01-23      0.0
2020-01-24      0.0
2020-01-25      0.0
2020-01-26      0.0
2020-01-27      0.0
2020-01-28      0.0
2020-01-29      0.0
2020-01-30      0.0
2020-01-31      0.0
2020-02-01      0.0
2020-02-02      0.0
2020-02-03      0.0
2020-02-04      0.0
2020-02-05      0.0
2020-02-06      0.0
2020-02-07      0.0
2020-02-08      0.0
2020-02-09      0.0
2020-02-10      0.0
2020-02-11      0.0
2020-02-12      0.0
2020-02-13      0.0
2020-02-14      0.0
2020-02-15      0.0
2020-02-16      0.0
2020-02-17      0.0
2020-02-18      0.0
2020-02-19      0.0
2020-02-20      0.0
2020-02-21      0.0
2020-02-22      0.0
2020-02-23      0.0
2020-02-24      0.0
2020-02-25      0.0
2020-02-26      0.0
2020-02-27      0.0
2020-02-28      0.0
2020-02-29      0.0
2020-03-01      0.0
2020-03-02      0.0
2020-03-03      0.0
2020-03-04      0.0
2020-03-05      0.0
2020-03-06      1.0
2020-03-07      1.0
2020-03-08      4.0
2020-03-09      5.0
2020-03-10     