In [164]:
import pandas as pd
import numpy as np
import os
import datetime

In [165]:
def date_format(date):
    #Formats dates to a uniform regime: 01/01/2020
    d = datetime.datetime.strptime(date, '%m/%d/%Y')
    return datetime.date.strftime(d, "%m/%d/%y")

In [166]:
def edit_column_date(frame,index):
    #Edits the date format of columns of dataframes
    #index: index of the first column of dates + 1
    i = 0
    for col in frame:
        i += 1
        if i >= index:
            new_d = date_format(col)
            frame = frame.rename(columns={col : new_d})
    return frame

In [167]:
def sort_dates(frame,index):
    #Sorts the columns by date of a frame with many nonconsecutive dates (several factors per date)
    Beg = list(frame.columns[:index]) #First four entries
    End = list(np.sort(np.array(frame.columns[index:]))) #Every Date Sorted
    cols = list(Beg + End) #Ordered Columns

    frame = frame[cols]
    return frame

In [168]:
def delete_zero_columns(frame): #Delete Columns containing only 0s
    cols = list(frame.columns)
    for col in cols:
        if (frame[col] == 0).all():
            frame = frame.drop(columns=[col])
    return frame        

In [169]:
#This is the best record for cases/deaths out of all other sources, has most FIPS codes included

USAFacts_C = pd.read_csv('../../../../data/us/covid/confirmed_cases.csv')
USAFacts_D = pd.read_csv('../../../../data/us/covid/deaths.csv')

In [170]:
#Less inclusive set of cases/deaths for counties

JHU_CD = pd.read_csv('../../../../data/us/covid/JHU_daily_US.csv')

NYTCounties_CD = pd.read_csv('../../../../data/us/covid/nyt_us_counties.csv')

NYTCounties_CD_Daily = pd.read_csv('../../../../data/us/covid/nyt_us_counties_daily.csv')

In [171]:
#Removing Statewide Unallocated Lines   #Can be changed later
USAFacts_C = USAFacts_C[USAFacts_C['County Name'] != 'Statewide Unallocated']
USAFacts_D = USAFacts_D[USAFacts_D['County Name'] != 'Statewide Unallocated']
############################################################################
USAFacts_C = edit_column_date(USAFacts_C,5)
USAFacts_D = edit_column_date(USAFacts_D,5)

#Merging Cases and Deaths
USAFacts_CD = pd.merge(USAFacts_C, USAFacts_D, left_on='countyFIPS', right_on='countyFIPS', suffixes=('_C', '_D'))
USAFacts_CD = USAFacts_CD.drop(columns=['County Name_D', 'State_D', 'stateFIPS_D'])
USAFacts_CD = USAFacts_CD.drop(columns=['County Name_C', 'State_C', 'stateFIPS_C'])
USAFacts_CD.columns = USAFacts_CD.columns.str.replace('countyFIPS','FIPS')

USAFacts_CD_NonConsecutive = USAFacts_CD
USAFacts_CD_NonConsecutive = USAFacts_CD_NonConsecutive.set_index('FIPS')

USAFacts_CD_NonConsecutive.to_csv('USAFacts_CDNonConsecutive.csv') #csv of Fips, All Dates of Cases, All Dates of Deaths

Beg = list(USAFacts_CD.columns[:4]) #First four entries
End = list(np.sort(np.array(USAFacts_CD.columns[4:]))) #Every Date Sorted
cols = list(Beg + End) #Ordered Columns
USAFacts_CDConsecutive = USAFacts_CD[cols]

USAFacts_CDConsecutive = USAFacts_CDConsecutive.set_index('FIPS')

USAFacts_CDConsecutive.to_csv('USAFacts_CDConsecutive.csv') #Csv of FIps, each date and the number of cases & deaths

In [172]:
JHU_CD = JHU_CD[JHU_CD.FIPS <= 60000]
JHU_CD = pd.pivot_table(JHU_CD, values=['Confirmed', 'Deaths'], index=['FIPS'], columns=['Date'])
JHU_CD = pd.DataFrame(JHU_CD)
JHU_CD = JHU_CD.fillna(0)
JHU_CD.to_csv('JHU_CD.csv')

In [173]:
NYTCounties_CD = NYTCounties_CD.sort_values(by=['fips', 'date'])
NYTCounties_CD = NYTCounties_CD[NYTCounties_CD.fips <= 95000]
NYTCounties_CD = pd.pivot_table(NYTCounties_CD, values=['cases', 'deaths'], index=['fips'], columns=['date'])
NYTCounties_CD = NYTCounties_CD.fillna(0)
NYTCounties_CD.to_csv('NYTCounties_CD.csv')

In [174]:
NYTCounties_CD_Daily.sort_values(by=['fips', 'date'])
NYTCounties_CD_Daily = pd.pivot_table(NYTCounties_CD_Daily, values=['cases', 'deaths'], index=['fips'], columns=['date'])
NYTCounties_CD_Daily = NYTCounties_CD_Daily.fillna(0)
NYTCounties_CD_Daily.to_csv('NYTCounties_CD_Daily.csv')

In [175]:
print('USAFacts_CD: ' + str(len(USAFacts_CD)))
print('JHU_CD: ' + str(len(JHU_CD)))
print('NYTCounties_CD: ' + str(len(NYTCounties_CD)))
print('NYTCounties_CD_Daily: ' + str(len(NYTCounties_CD_Daily)))

USAFacts_CD: 3146
JHU_CD: 3142
NYTCounties_CD: 2649
NYTCounties_CD_Daily: 2651


In [176]:
USAFacts_CDConsecutive.head()

Unnamed: 0_level_0,01/22/20_C,01/23/20_C,01/24/20_C,01/22/20_D,01/23/20_D,01/24/20_D,01/25/20_C,01/25/20_D,01/26/20_C,01/26/20_D,...,04/07/20_C,04/07/20_D,04/08/20_C,04/08/20_D,04/09/20_C,04/09/20_D,04/10/20_C,04/10/20_D,04/11/20_C,04/11/20_D
FIPS,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1001,0,0,0,0,0,0,0,0,0,0,...,12,0.0,12,1.0,17,1.0,17,1.0,19,1.0
1003,0,0,0,0,0,0,0,0,0,0,...,42,0.0,49,0.0,59,0.0,59,0.0,66,0.0
1005,0,0,0,0,0,0,0,0,0,0,...,3,0.0,3,0.0,7,0.0,9,0.0,10,0.0
1007,0,0,0,0,0,0,0,0,0,0,...,8,0.0,9,0.0,11,0.0,11,0.0,13,0.0
1009,0,0,0,0,0,0,0,0,0,0,...,10,0.0,10,0.0,11,0.0,12,0.0,12,0.0


In [177]:
USAFacts_CD_NonConsecutive.head()

Unnamed: 0_level_0,01/22/20_C,01/23/20_C,01/24/20_C,01/25/20_C,01/26/20_C,01/27/20_C,01/28/20_C,01/29/20_C,01/30/20_C,01/31/20_C,...,04/02/20_D,04/03/20_D,04/04/20_D,04/05/20_D,04/06/20_D,04/07/20_D,04/08/20_D,04/09/20_D,04/10/20_D,04/11/20_D
FIPS,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1001,0,0,0,0,0,0,0,0,0,0,...,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,1.0,1.0
1003,0,0,0,0,0,0,0,0,0,0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1005,0,0,0,0,0,0,0,0,0,0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1007,0,0,0,0,0,0,0,0,0,0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1009,0,0,0,0,0,0,0,0,0,0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [178]:
NYTCounties_CD.head()

Unnamed: 0_level_0,cases,cases,cases,cases,cases,cases,cases,cases,cases,cases,...,deaths,deaths,deaths,deaths,deaths,deaths,deaths,deaths,deaths,deaths
date,2020-01-21,2020-01-22,2020-01-23,2020-01-24,2020-01-25,2020-01-26,2020-01-27,2020-01-28,2020-01-29,2020-01-30,...,2020-04-02,2020-04-03,2020-04-04,2020-04-05,2020-04-06,2020-04-07,2020-04-08,2020-04-09,2020-04-10,2020-04-11
fips,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2
1001.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,1.0,1.0,1.0,1.0,1.0,1.0
1003.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
1005.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1007.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1009.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [179]:
NYTCounties_CD_Daily.head()

Unnamed: 0_level_0,cases,cases,cases,cases,cases,cases,cases,cases,cases,cases,...,deaths,deaths,deaths,deaths,deaths,deaths,deaths,deaths,deaths,deaths
date,2020-01-21,2020-01-22,2020-01-23,2020-01-24,2020-01-25,2020-01-26,2020-01-27,2020-01-28,2020-01-29,2020-01-30,...,2020-04-02,2020-04-03,2020-04-04,2020-04-05,2020-04-06,2020-04-07,2020-04-08,2020-04-09,2020-04-10,2020-04-11
fips,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2
1001,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0
1003,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1005,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1007,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1009,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [180]:
JHU_CD.head()

Unnamed: 0_level_0,Confirmed,Confirmed,Confirmed,Confirmed,Confirmed,Confirmed,Confirmed,Confirmed,Confirmed,Confirmed,...,Deaths,Deaths,Deaths,Deaths,Deaths,Deaths,Deaths,Deaths,Deaths,Deaths
Date,03-23-2020,03-24-2020,03-25-2020,03-26-2020,03-27-2020,03-28-2020,03-29-2020,03-30-2020,03-31-2020,04-01-2020,...,04-02-2020,04-03-2020,04-04-2020,04-05-2020,04-06-2020,04-07-2020,04-08-2020,04-09-2020,04-10-2020,04-11-2020
FIPS,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2
1001.0,0.0,1.0,4.0,6.0,6.0,6.0,6.0,6.0,7.0,8.0,...,0.0,0.0,0.0,0.0,0.0,1.0,1.0,1.0,1.0,1.0
1003.0,3.0,4.0,4.0,5.0,5.0,10.0,15.0,18.0,19.0,20.0,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
1005.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1007.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,3.0,3.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1009.0,0.0,0.0,1.0,2.0,4.0,5.0,5.0,5.0,5.0,5.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
