In [1]:
import pandas as pd 
import numpy as np 
import matplotlib.pyplot as plt 
plt.style.use('seaborn')
from datetime import datetime
import json
import requests

In [2]:
def toUnixTime(date, format):
    t2 = datetime.strptime(date, format)
    t1 = datetime(1970, 1, 1)
    ans = (t2 - t1).total_seconds()*1000
    ans = int(ans)
    return ans

In [3]:
sources = {}
data = {}

In [4]:
url = lambda metric: f'https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_{metric}_global.csv' 
metrics = ['confirmed', 'recovered', 'deaths'] 
for metric in metrics: sources[metric] = url(metric)
time_series = {metric: pd.read_csv(sources[metric]) for metric in metrics}

for metric in time_series:
    df = time_series[metric]
    gb = df.groupby('Country/Region')
    df = gb.sum()
    df = df.loc[:,'1/22/20':] 
    time_series[metric] = df

In [5]:
time_series['infected'] = time_series['confirmed']-time_series['recovered']-time_series['deaths']

for metric in time_series:
    time_series[metric].loc['World'] = time_series[metric].sum()

for metric in metrics:
    time_series[f'daily_{metric}'] = time_series[metric].diff(axis=1)
    time_series[f'7MA_daily_{metric}'] = time_series[f'daily_{metric}'].rolling(window=7, axis=1).mean()

In [6]:
sources['iso'] = 'https://raw.githubusercontent.com/lukes/ISO-3166-Countries-with-Regional-Codes/master/all/all.csv'
iso = pd.read_csv(sources['iso'], index_col='name')
rename = {
    "Bolivia (Plurinational State of)": "Bolivia",
    "Brunei Darussalam": "Brunei",
    "Côte d'Ivoire": "Cote d'Ivoire",
    "Iran (Islamic Republic of)": "Iran",
    "Korea, Republic of": "Korea, South",
    "Taiwan, Province of China": "Taiwan*",
    "United States of America": "US",
    "Russian Federation":"Russia",
    "Venezuela (Bolivarian Republic of)": "Venezuela",
    "United Kingdom of Great Britain and Northern Ireland": "United Kingdom",
    "Moldova, Republic of": "Moldova",
    "Viet Nam": "Vietnam",
    "Tanzania, United Republic of": "Tanzania",
    "Palestine, State of": "West Bank and Gaza",
    "Syrian Arab Republic": "Syria",
    "Lao People's Democratic Republic": "Laos",
    "Myanmar":"Burma",
    "Congo, Democratic Republic of the": "Congo (Kinshasa)",
    "Congo": "Congo (Brazzaville)"
}
iso.rename(index=rename, inplace=True)
iso.loc['Kosovo'] = 'XK'
iso.loc['Namibia'] = 'NA'
iso.loc['World'] = 'WD'

In [7]:
general = pd.DataFrame(index=time_series['confirmed'].index)

for metric in metrics:
    general[metric] = time_series[metric].iloc[:,-1]
    general[f'daily_{metric}'] = time_series[f'daily_{metric}'].iloc[:,-1]

general = general.astype(int)
general.sort_values('confirmed', ascending=False, inplace=True) 
general = general.applymap(lambda x: '{:,}'.format(x))

In [8]:
general['country'] = general.index
general['iso'] = iso['alpha-2']
general['region'] = iso['region']

In [9]:
no_match = general[general['iso'].isnull()].index
for metric in time_series:
    time_series[metric].drop(index=no_match, inplace=True)
general.drop(index=no_match, inplace=True)

In [10]:
class countryData:
    def __init__(self, country):
        self.name = country 
        self.time_series = {metric: time_series[metric].loc[country] for metric in time_series}
        self.general = general.loc[country]
        self.preProcessing()
    
    def preProcessing(self):
        def getStart(atleast=1):
            s = self.time_series['confirmed']
            s = s['1/29/20':]
            s = s[s > atleast]
            if len(s): return s.index[0]
            return getStart(1)
        start = getStart(100)
        self.time_series = {metric: self.time_series[metric][start:] for metric in self.time_series}
        self.general['start'] = toUnixTime(start, format="%m/%d/%y")

    def to_dict(self):
        res = {
            'general': self.general.to_dict(),
            'time_series': {metric: self.time_series[metric].to_list() for metric in self.time_series}
        }
        return res

In [11]:
def genCountryData(country):
    data = countryData(country)
    return data.to_dict()

In [22]:
mx = genCountryData('US')
mx['general']

{'confirmed': '6,804,814',
 'country': 'US',
 'daily_confirmed': '39,235',
 'daily_deaths': '234',
 'daily_recovered': '13,225',
 'deaths': '199,509',
 'iso': 'US',
 'recovered': '2,590,671',
 'region': 'Americas',
 'start': 1583280000000}

3

In [12]:
for country in general.index:
    country_iso = general.loc[country]['iso']
    res = genCountryData(country)
    with open('time_series/'+country_iso+'.json', 'w') as doc:
        json.dump(res, doc)
general.to_json('general.json', orient='records')

TypeError: Object of type int64 is not JSON serializable