# COVID-19 Data Processing: Time-Series Reports

In [1]:
%load_ext autoreload
%autoreload 2
%matplotlib inline

In [2]:
import math
import numpy as np
import pandas as pd

In [3]:
out_dir = 'data'
data_dir = 'covid19'

df_global = pd.read_table(f'{out_dir}/{data_dir}/csse_covid_19_time_series/time_series_covid19_confirmed_global.csv', sep=',')
df_us = pd.read_table(f'{out_dir}/{data_dir}/csse_covid_19_time_series/time_series_covid19_confirmed_us.csv', sep=',')

In [4]:
df_us

Unnamed: 0,UID,iso2,iso3,code3,FIPS,Admin2,Province_State,Country_Region,Lat,Long_,...,4/5/20,4/6/20,4/7/20,4/8/20,4/9/20,4/10/20,4/11/20,4/12/20,4/13/20,4/14/20
0,16,AS,ASM,16,60.0,,American Samoa,US,-14.2710,-170.1320,...,0,0,0,0,0,0,0,0,0,0
1,316,GU,GUM,316,66.0,,Guam,US,13.4443,144.7937,...,112,113,121,121,128,130,133,133,133,133
2,580,MP,MNP,580,69.0,,Northern Mariana Islands,US,15.0979,145.6739,...,6,6,6,6,6,6,6,6,6,6
3,630,PR,PRI,630,72.0,,Puerto Rico,US,18.2208,-66.5901,...,475,513,573,620,683,725,788,897,903,923
4,850,VI,VIR,850,78.0,,Virgin Islands,US,18.3358,-64.8963,...,42,43,43,45,45,50,51,51,51,51
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3248,84090053,US,USA,840,90053.0,Unassigned,Washington,US,0.0000,0.0000,...,533,648,677,915,838,908,848,732,619,484
3249,84090054,US,USA,840,90054.0,Unassigned,West Virginia,US,0.0000,0.0000,...,0,0,0,21,0,0,0,0,0,0
3250,84090055,US,USA,840,90055.0,Unassigned,Wisconsin,US,0.0000,0.0000,...,0,0,0,0,0,0,0,0,0,1
3251,84090056,US,USA,840,90056.0,Unassigned,Wyoming,US,0.0000,0.0000,...,0,0,0,0,0,0,0,0,0,0


In [35]:
import json

with open('data/covid-19-country-states.json') as f:
    country_states = json.load(f)
    
for x in country_states:
    country_states[x] = {
        "level": 1,
        "longLat": country_states[x],
        "cases": None
    }

out = {}

for row in df_global.iterrows():
    _, row = row
    
    if abs(row['Lat']) + abs(row['Long']) >= 0.000001:
        country = row['Country/Region'].strip()
        uid = country
        level = 0
        
        # Some bug fixing... damn there is zero quality control on the data
        d = row.values[4:]
        d = np.r_[d[0], np.maximum(d[:-1], d[1:])]
        d = np.r_[d[0], np.maximum(d[:-1], d[1:])]
        d = np.r_[d[0], np.maximum(d[:-1], d[1:])]
        d = np.r_[d[0], np.maximum(d[:-1], d[1:])]
        d = np.r_[d[0], np.maximum(d[:-1], d[1:])]
        
        if isinstance(row['Province/State'], str):
            state = row['Province/State'].strip()
            uid = state + ', ' + uid
            level += 1
            
            if country in country_states:
                if country_states[country]['cases'] is None:
                    country_states[country]['cases'] = d
                else:
                    country_states[country]['cases'] += d
        
        out[uid] = {
            "level": level,
            "longLat": [row['Long'], row['Lat']],
            "cases": d.tolist()
        }

country_states['Australia']['level'] = 0
country_states['Canada']['level'] = 0
country_states['China']['level'] = 0

for row in df_us.iterrows():
    _, row = row
    
    if abs(row['Lat']) + abs(row['Long_']) >= 0.000001:
        uid = ', '.join([x.strip() for x in row['Combined_Key'].split(',')])
        level = len(row['Combined_Key'].split(',')) - 1
        
        # Some bug fixing... damn there is zero quality control on the data
        d = row.values[11:]
        d = np.r_[d[0], np.maximum(d[:-1], d[1:])]
        d = np.r_[d[0], np.maximum(d[:-1], d[1:])]
        d = np.r_[d[0], np.maximum(d[:-1], d[1:])]
        d = np.r_[d[0], np.maximum(d[:-1], d[1:])]
        d = np.r_[d[0], np.maximum(d[:-1], d[1:])]
        
        out[uid] = {
            "level": level,
            "longLat": [row['Long_'], row['Lat']],
            "cases": d.tolist(),
            "numLevels": 0
        }
        
        if country_states['US']['cases'] is None:
            country_states['US']['cases'] = d
        else:
            country_states['US']['cases'] += d
        
        state = row['Province_State'].strip()
        if state in country_states:
            if country_states[state]['cases'] is None:
                country_states[state]['cases'] = d
            else:
                country_states[state]['cases'] += d

country_states['US']['level'] = 0
                
for x in country_states:
#     out_name = f'{x}, US' if x is not 'US' else x

    country_states[x]['cases'] = country_states[x]['cases'].tolist()
    country_states[x]['numLevels'] = 2 if x == 'US' else 1
    out[x] = country_states[x]

In [36]:
with open('data/covid-19.json', 'w') as f:
    json.dump(out, f)