# COVID-19 Data Processing: Time-Series Reports

In [1]:
%load_ext autoreload
%autoreload 2
%matplotlib inline

In [2]:
import math
import numpy as np
import pandas as pd

In [3]:
out_dir = 'data'
data_dir = 'covid19'

df_global = pd.read_table(f'{out_dir}/{data_dir}/csse_covid_19_time_series/time_series_covid19_confirmed_global.csv', sep=',')
df_us = pd.read_table(f'{out_dir}/{data_dir}/csse_covid_19_time_series/time_series_covid19_confirmed_us.csv', sep=',')

In [4]:
df_us

Unnamed: 0,UID,iso2,iso3,code3,FIPS,Admin2,Province_State,Country_Region,Lat,Long_,...,4/23/20,4/24/20,4/25/20,4/26/20,4/27/20,4/28/20,4/29/20,4/30/20,5/1/20,5/2/20
0,16.0,AS,ASM,16,60.0,,American Samoa,US,-14.271000,-170.132000,...,0,0,0,0,0,0,0,0,0,0
1,316.0,GU,GUM,316,66.0,,Guam,US,13.444300,144.793700,...,139,141,141,141,141,141,141,145,145,145
2,580.0,MP,MNP,580,69.0,,Northern Mariana Islands,US,15.097900,145.673900,...,14,14,14,14,14,14,14,14,14,14
3,630.0,PR,PRI,630,72.0,,Puerto Rico,US,18.220800,-66.590100,...,1416,1276,1307,1371,1389,1400,1433,1539,1575,1757
4,850.0,VI,VIR,850,78.0,,Virgin Islands,US,18.335800,-64.896300,...,54,54,55,57,57,57,57,66,66,66
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3256,84070016.0,US,USA,840,,Central Utah,Utah,US,39.372319,-111.575868,...,21,21,21,21,22,22,22,23,23,23
3257,84070017.0,US,USA,840,,Southeast Utah,Utah,US,38.996171,-110.701396,...,7,8,11,12,13,13,14,12,13,15
3258,84070018.0,US,USA,840,,Southwest Utah,Utah,US,37.854472,-111.441876,...,76,81,83,87,89,91,93,98,101,103
3259,84070019.0,US,USA,840,,TriCounty,Utah,US,40.124915,-109.517442,...,9,9,9,10,11,11,11,13,13,13


In [5]:
import json

with open('data/covid-19-country-states.json') as f:
    country_states = json.load(f)
    
for x in country_states:
    country_states[x] = {
        "level": 1,
        "longLat": country_states[x],
        "cases": None
    }

out = {}

for row in df_global.iterrows():
    _, row = row
    
    if abs(row['Lat']) + abs(row['Long']) >= 0.000001:
        country = row['Country/Region'].strip()
        uid = country
        level = 0
        
        # Some bug fixing... damn there is zero quality control on the data
        d = row.values[4:]
        d = np.r_[d[0], np.maximum(d[:-1], d[1:])]
        d = np.r_[d[0], np.maximum(d[:-1], d[1:])]
        d = np.r_[d[0], np.maximum(d[:-1], d[1:])]
        d = np.r_[d[0], np.maximum(d[:-1], d[1:])]
        d = np.r_[d[0], np.maximum(d[:-1], d[1:])]
        
        if isinstance(row['Province/State'], str):
            state = row['Province/State'].strip()
            uid = state + ', ' + uid
            level += 1
            
            if country in country_states:
                if country_states[country]['cases'] is None:
                    country_states[country]['cases'] = d
                else:
                    country_states[country]['cases'] += d
        
        out[uid] = {
            "level": level,
            "longLat": [row['Long'], row['Lat']],
            "cases": d.tolist()
        }

country_states['Australia']['level'] = 0
country_states['Canada']['level'] = 0
country_states['China']['level'] = 0

for row in df_us.iterrows():
    _, row = row
    
    if abs(row['Lat']) + abs(row['Long_']) >= 0.000001:
        uid = ', '.join([x.strip() for x in row['Combined_Key'].split(',')])
        level = len(row['Combined_Key'].split(',')) - 1
        
        # Some bug fixing... damn there is zero quality control on the data
        d = row.values[11:]
        d = np.r_[d[0], np.maximum(d[:-1], d[1:])]
        d = np.r_[d[0], np.maximum(d[:-1], d[1:])]
        d = np.r_[d[0], np.maximum(d[:-1], d[1:])]
        d = np.r_[d[0], np.maximum(d[:-1], d[1:])]
        d = np.r_[d[0], np.maximum(d[:-1], d[1:])]
        
        out[uid] = {
            "level": level,
            "longLat": [row['Long_'], row['Lat']],
            "cases": d.tolist(),
            "numLevels": 0
        }
        
        if country_states['US']['cases'] is None:
            country_states['US']['cases'] = d
        else:
            country_states['US']['cases'] += d
        
        state = row['Province_State'].strip()
        if state in country_states:
            country_states[state]['is_us'] = True
            if country_states[state]['cases'] is None:
                country_states[state]['cases'] = d
            else:
                country_states[state]['cases'] += d

country_states['US']['level'] = 0
                
for x in country_states:
    out_name = x
    if 'is_us' in country_states[x]:
        out_name = f'{x}, US'
        del country_states[x]['is_us']

    country_states[x]['cases'] = country_states[x]['cases'].tolist()
    country_states[x]['numLevels'] = 2 if x == 'US' else 1
    out[out_name] = country_states[x]

In [6]:
with open('data/covid-19.json', 'w') as f:
    json.dump(out, f)