In [1]:
import pandas as pd 
import numpy as np
import matplotlib.pyplot as plt
plt.style.use('seaborn')
from datetime import datetime
import json
import requests
from github import Github

In [2]:
def toUnixTime(date, format):
    t2 = datetime.strptime(date, format)
    t1 = datetime(1970, 1, 1)
    ans = (t2 - t1).total_seconds()*1000
    ans = int(ans)
    return ans

In [3]:
sources = {}
data = {}

In [4]:
url = lambda metric: f'https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_{metric}_global.csv' 
metrics = ['confirmed', 'recovered', 'deaths'] 
for metric in metrics: sources[metric] = url(metric)
time_series = {metric: pd.read_csv(sources[metric]) for metric in metrics}

for metric in time_series:
    df = time_series[metric]
    gb = df.groupby('Country/Region')
    df = gb.sum()
    df = df.loc[:,'1/22/20':] 
    time_series[metric] = df

metrics.append('infected')
time_series['infected'] = time_series['confirmed']-time_series['recovered']-time_series['deaths']

In [5]:
# time_series['infected'] = time_series['confirmed']-time_series['recovered']-time_series['deaths']

# for metric in time_series:
#     time_series[metric].loc['World'] = time_series[metric].sum()

# for metric in metrics:
#     time_series[f'daily_{metric}'] = time_series[metric].diff(axis=1)
#     time_series[f'7MA_daily_{metric}'] = time_series[f'daily_{metric}'].rolling(window=7, axis=1).mean()

In [6]:
sources['iso'] = 'https://raw.githubusercontent.com/lukes/ISO-3166-Countries-with-Regional-Codes/master/all/all.csv'
iso_df = pd.read_csv(sources['iso'], index_col='name')
rename = {
    "Bolivia (Plurinational State of)": "Bolivia",
    "Brunei Darussalam": "Brunei",
    "Côte d'Ivoire": "Cote d'Ivoire",
    "Iran (Islamic Republic of)": "Iran",
    "Korea, Republic of": "Korea, South",
    "Taiwan, Province of China": "Taiwan*",
    "United States of America": "US",
    "Russian Federation":"Russia",
    "Venezuela (Bolivarian Republic of)": "Venezuela",
    "United Kingdom of Great Britain and Northern Ireland": "United Kingdom",
    "Moldova, Republic of": "Moldova",
    "Viet Nam": "Vietnam",
    "Tanzania, United Republic of": "Tanzania",
    "Palestine, State of": "West Bank and Gaza",
    "Syrian Arab Republic": "Syria",
    "Lao People's Democratic Republic": "Laos",
    "Myanmar":"Burma",
    "Congo, Democratic Republic of the": "Congo (Kinshasa)",
    "Congo": "Congo (Brazzaville)"
}
iso_df.rename(index=rename, inplace=True)
iso_df.loc['Kosovo'] = 'XK'
iso_df.loc['Namibia'] = 'NA'
iso_df.loc['World'] = 'WD'

In [7]:
sources['vaccines'] = 'https://raw.githubusercontent.com/owid/covid-19-data/master/public/data/vaccinations/vaccinations.csv'
df = pd.read_csv(sources['vaccines'])
replace = {'United States': 'US'}
df['location'].replace(replace, inplace=True)
df['date'] = df['date'].apply(lambda date: datetime.strptime(date, '%Y-%m-%d').strftime('%#m/%#d/%y')) 
vaccines_df = pd.DataFrame(index=time_series['confirmed'].index, columns=time_series['confirmed'].columns)

for country in iso_df.index:
    try:
        tmp_df = df[df['location'] == country]
        tmp_df.set_index('date', inplace=True)
        vaccines_df.loc[country] = tmp_df['total_vaccinations']
    except: continue

vaccines_df = vaccines_df.iloc[:,:-1]
vaccines_df.fillna(method='ffill', inplace=True, axis=1)
vaccines_df.fillna(value=0, inplace=True)
time_series['vaccines'] = vaccines_df

In [8]:
for metric in metrics: 
    time_series[metric].loc['World'] = time_series[metric].sum()

metrics.append("vaccines")
for metric in metrics:
    time_series[f'daily_{metric}'] = time_series[metric].diff(axis=1)
    time_series[f'7MA_daily_{metric}'] = time_series[f'daily_{metric}'].rolling(window=7, axis=1).mean()

In [9]:
general_df = pd.DataFrame(index=time_series['confirmed'].index)

for metric in metrics:
    general_df[metric] = time_series[metric].iloc[:,-1]
    general_df[f'daily_{metric}'] = time_series[f'daily_{metric}'].iloc[:,-1]

# general_df.fillna(value=0, inplace=True)
general_df = general_df.astype(int)
general_df.sort_values('confirmed', ascending=False, inplace=True) 
general_df = general_df.applymap(lambda x: '{:,}'.format(x))

In [10]:
general_df['country'] = general_df.index
general_df['iso'] = iso_df['alpha-2']
general_df['region'] = iso_df['region']
general_df['last_update'] = str(datetime.utcnow())[:-7]

In [11]:
no_match = general_df[general_df['iso'].isnull()].index
for metric in time_series:
    time_series[metric].drop(index=no_match, inplace=True)
general_df.drop(index=no_match, inplace=True)

In [12]:
class countryData:
    def __init__(self, country):
        self.general = general_df.loc[country]
        self.time_series = {metric: time_series[metric].loc[country] for metric in time_series}
        self.preProcessing()
    
    def preProcessing(self):
        def getStart(metric, atleast=1):
            s = self.time_series[metric]
            try: s = s['3/1/20':]
            except: pass 
            start = s.index[0]
            s = s[s > atleast]
            if len(s): start = s.index[0]
            return start
        
        start = getStart(metric='confirmed', atleast=100)
        self.time_series = {metric: self.time_series[metric][start:] for metric in self.time_series}
        start_vaccines = getStart(metric='daily_vaccines', atleast=1000)
        self.time_series['vaccines'] = self.time_series['vaccines'][start_vaccines:]
        self.time_series['daily_vaccines'] = self.time_series['daily_vaccines'][start_vaccines:]
        self.time_series['7MA_daily_vaccines'] = self.time_series['7MA_daily_vaccines'][start_vaccines:]
        self.time_series['starts'] = {'vaccines': toUnixTime(start_vaccines, format="%m/%d/%y")} 
        # self.time_series['starts'] = start_vaccines
        self.general['start'] = toUnixTime(start, format="%m/%d/%y")

    def to_dict(self):
        res = {
            'general': self.general.to_dict(),
            'time_series': {metric: self.time_series[metric].to_list() for metric in self.time_series if metric != 'starts'}
        }
        res['time_series']['starts'] = self.time_series['starts']
        return res

In [13]:
mx_data = countryData('Mexico')
mx_data.time_series['starts']

{'vaccines': 1608768000000}

In [14]:
mx_data.time_series['7MA_daily_vaccines'][-30:]

1/12/21     6377.571429
1/13/21    19911.714286
1/14/21    38797.285714
1/15/21    49986.714286
1/16/21    55493.142857
1/17/21    55834.571429
1/18/21    57924.142857
1/19/21    57891.857143
1/20/21    44066.142857
1/21/21    31764.571429
1/22/21    28204.714286
1/23/21    22217.428571
1/24/21    22497.714286
1/25/21    21796.571429
1/26/21    22028.142857
1/27/21    22144.857143
1/28/21    15072.428571
1/29/21     6772.714286
1/30/21     7362.714286
1/31/21     6243.000000
2/1/21      4299.571429
2/2/21      3602.857143
2/3/21      4365.285714
2/4/21      5320.857143
2/5/21      5442.714286
2/6/21      5698.714286
2/7/21      5741.428571
2/8/21      6088.285714
2/9/21      6686.857143
2/10/21     5443.714286
Name: Mexico, dtype: float64

In [15]:
def genCountryData(country):
    data = countryData(country)
    return data.to_dict()

In [16]:
def manualUpdate():
    general_df.to_json('./data/general.json', orient='records')
    for country in general_df.index:
        country_iso = general_df.loc[country]['iso']
        res = genCountryData(country)
        with open('./data/time_series/'+country_iso+'.json', 'w') as doc: json.dump(res, doc)

In [17]:
def updateData(access_token):
    g = Github(access_token)
    repo = g.get_user().get_repo("CoronaTrack")
    
    for country in general_df.index:
        country_iso = general_df.loc[country]['iso']
        country_data = genCountryData(country)
        res = json.dumps(country_data)
    
        contents = repo.get_contents(f"data/time_series/{country_iso}.json")
        repo.update_file(contents.path, "automatic update", res, contents.sha)

    res = general.to_json(orient='records')
    contents = repo.get_contents(f"data/general.json")
    repo.update_file(contents.path, "automatic update", res, contents.sha)

In [18]:
manualUpdate()