# COVID-19 Case Counts from Johns Hopkins University

**[Work in progress]**

This notebook creates a .csv file with cummulative confimed cases and deaths for ingestion into the Knowledge Graph.

Data source: [COVID-19 Data Repository by the Center for Systems Science and Engineering (CSSE) at Johns Hopkins University](https://github.com/CSSEGISandData/COVID-19)

Author: Peter Rose (pwrose@ucsd.edu)

In [2]:
import os
from io import BytesIO
from zipfile import ZipFile
from urllib.request import urlopen
from pathlib import Path
import pandas as pd
import dateutil

In [3]:
pd.options.display.max_rows = None  # display all rows
pd.options.display.max_columns = None  # display all columsns

In [4]:
NEO4J_HOME = Path(os.getenv('NEO4J_HOME'))
print(NEO4J_HOME)

/Users/peter/Library/Application Support/Neo4j Desktop/Application/neo4jDatabases/database-4af96121-2328-4e2f-ba60-6d8b728a26d5/installation-4.0.3


In [5]:
def split_by_day(df, day, label):
    day_df = df[['stateFips', 'countyFips', day]].copy()
    day_df.rename(columns={day: label}, inplace=True)
    day_df['date'] = day
    return day_df

### Process cummulative confirmed cases

In [71]:
df = pd.read_csv("https://coronadatascraper.com/timeseries-tidy.csv.zip",  dtype='str')

In [72]:
df = df.fillna('')

In [73]:
df['type'].unique()

array(['cases', 'growthFactor', 'hospitalized', 'discharged', 'deaths',
       'tested', 'recovered', 'active', 'icu'], dtype=object)

In [74]:
df_by_type = df.groupby(['level', 'country', 'state', 'county', 'city', 'date'])[['type', 'value']]

In [None]:
df_by_type.head()

In [None]:
def split_by_type(df, value_type, label):
    df_type = df[df['type'] == value_type].copy()
    df_type = df_type[['level', 'country', 'state', 'county', 'date', 'value']]
    df_type.rename(columns={'value': label}, inplace=True)
    return df_type

In [None]:
confirmed = split_by_type(df, 'cases', 'confirmedCases')
confirmed.head()

In [58]:
deaths = split_by_type(df, 'deaths', 'confirmedDeaths')
deaths.head()

Unnamed: 0,level,country,state,county,date,confirmedDeaths
154360,county,United States,California,San Diego County,2020-01-22,0
154362,county,United States,California,San Diego County,2020-01-23,0
154364,county,United States,California,San Diego County,2020-01-24,0
154366,county,United States,California,San Diego County,2020-01-25,0
154368,county,United States,California,San Diego County,2020-01-26,0


### Process cummulative deaths

In [46]:
deaths = df.query("type == 'deaths'").copy()
deaths = deaths[['level', 'country', 'state', 'county', 'date', 'value']]
deaths.rename(columns={'value': 'cummulativeDeaths'}, inplace=True)
deaths.head()

Unnamed: 0,level,country,state,county,date,cummulativeDeaths
154360,county,United States,California,San Diego County,2020-01-22,0
154362,county,United States,California,San Diego County,2020-01-23,0
154364,county,United States,California,San Diego County,2020-01-24,0
154366,county,United States,California,San Diego County,2020-01-25,0
154368,county,United States,California,San Diego County,2020-01-26,0


#### Reformat dataframe by day

In [16]:
cases.head(100)

Unnamed: 0,name,level,city,county,state,country,population,lat,long,aggregate,tz,date,type,value
0,"Antwerp, Flanders, Belgium",county,,Antwerp,Flanders,Belgium,1847486,51.2485,4.717499999999999,,Europe/Brussels,2020-01-22,cases,4
1,"Antwerp, Flanders, Belgium",county,,Antwerp,Flanders,Belgium,1847486,51.2485,4.717499999999999,,Europe/Brussels,2020-01-23,cases,4
3,"Antwerp, Flanders, Belgium",county,,Antwerp,Flanders,Belgium,1847486,51.2485,4.717499999999999,,Europe/Brussels,2020-01-24,cases,4
5,"Antwerp, Flanders, Belgium",county,,Antwerp,Flanders,Belgium,1847486,51.2485,4.717499999999999,,Europe/Brussels,2020-01-25,cases,4
7,"Antwerp, Flanders, Belgium",county,,Antwerp,Flanders,Belgium,1847486,51.2485,4.717499999999999,,Europe/Brussels,2020-01-26,cases,4
9,"Antwerp, Flanders, Belgium",county,,Antwerp,Flanders,Belgium,1847486,51.2485,4.717499999999999,,Europe/Brussels,2020-01-27,cases,4
11,"Antwerp, Flanders, Belgium",county,,Antwerp,Flanders,Belgium,1847486,51.2485,4.717499999999999,,Europe/Brussels,2020-01-28,cases,4
13,"Antwerp, Flanders, Belgium",county,,Antwerp,Flanders,Belgium,1847486,51.2485,4.717499999999999,,Europe/Brussels,2020-01-29,cases,4
15,"Antwerp, Flanders, Belgium",county,,Antwerp,Flanders,Belgium,1847486,51.2485,4.717499999999999,,Europe/Brussels,2020-01-30,cases,4
17,"Antwerp, Flanders, Belgium",county,,Antwerp,Flanders,Belgium,1847486,51.2485,4.717499999999999,,Europe/Brussels,2020-01-31,cases,4
