In [76]:
import numpy as np
import pandas as pd
from jinja2 import Template
from IPython.display import HTML

In [77]:
#hide

# FETCH
base_url = 'https://raw.githubusercontent.com/pratapvardhan/notebooks/master/covid19/'
paths = {
    'mapping': base_url + 'mapping_countries.csv',
    'overview': base_url + 'overview.tpl'
}

In [78]:
def get_mappings(url):
    df = pd.read_csv(url)
    return {
        'df': df,
        'replace.country': dict(df.dropna(subset=['Name']).set_index('Country')['Name']),
        'map.continent': dict(df.set_index('Name')['Continent'])
    }

In [79]:
mapping = get_mappings(paths['mapping'])

In [80]:
def get_frame(name):
    url = (
        'https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/'
        f'csse_covid_19_time_series/time_series_covid19_{name}_global.csv')
    df = pd.read_csv(url)
    # Pourqoui faire cette operation (dessous)
    df['Country/Region'] = df['Country/Region'].replace(mapping['replace.country'])
    return df

In [81]:
def get_dates(df):
    dt_cols = df.columns[~df.columns.isin(['Province/State', 'country', 'Lat', 'Long'])]
    LAST_DATE_I = -1
    
    for i in range(-1, -len(dt_cols), -1):
        if not df[dt_cols[i]].fillna(0).eq(0).all():
            LAST_DATE_I = i
            break
    return LAST_DATE_I, dt_cols

In [82]:
COL_REGION = 'Country/Region'
df = get_frame('confirmed')

In [83]:
dft_cases = df
dft_deaths = get_frame('deaths')
dft_recovered = get_frame('recovered')

In [84]:
LAST_DATE_I, dt_cols = get_dates(df)

In [85]:
dt_today =dt_cols[LAST_DATE_I]
dt_5day = dt_cols[LAST_DATE_I - 5]

In [86]:
dfc_cases = dft_cases.groupby(COL_REGION)[dt_today].sum()
dfc_deaths = dft_deaths.groupby(COL_REGION)[dt_today].sum()
dfp_cases = dft_cases.groupby(COL_REGION)[dt_5day].sum()
dfp_deaths = dft_deaths.groupby(COL_REGION)[dt_5day].sum()

In [103]:
df_table = (pd.DataFrame(dict(Cases = dfc_cases, Deaths = dfc_deaths, PCases = dfp_cases, PDeaths = dfp_deaths))
            .sort_values(by = ['Cases','Deaths'], ascending = [False, False])
            .reset_index())

In [114]:
for c in 'Cases, Deaths'.split(', '):
    df_table[f'{c} (+)'] = (df_table[c] - df_table[f'P{c}']).clip(0)
    #Clip ça veut dire, les chiffres negatif sont interdit
df_table['Fatality Rate'] = (100* df_table['Deaths']/ df_table['Cases']).round(2)
df_table['Continent'] = df_table['Country/Region'].map(mapping['map.continent'])
df_table.head(5)

Unnamed: 0,Country/Region,Cases,Deaths,PCases,PDeaths,Cases (+),Deaths (+),Fatality Rate,Continent
0,US,759086,40661,607670,25831,151416,14830,5.36,North America
1,Spain,198674,20453,172541,18056,26133,2397,10.29,Europe
2,Italy,178972,23660,162488,21067,16484,2593,13.22,Europe
3,France,154097,19744,131361,15748,22736,3996,12.81,Europe
4,Germany,145184,4586,131359,3294,13825,1292,3.16,Europe


In [115]:
#hide
#delete problematic countries from table
df_table = df_table[~df_table['Country/Region'].isin(['Cape Verde', 'Cruise Ship', 'Kosovo'])]

In [116]:
metrics = [df_table.columns[index] for index in [1,2,5,6]]

In [124]:
s_china = df_table[df_table['Country/Region'].eq('China')][metrics].sum().add_prefix('China ')
s_us = df_table[df_table['Country/Region'].eq('US')][metrics].sum().add_prefix('US ')
s_eu = df_table[df_table['Continent'].eq('Europe')][metrics].sum().add_prefix('EU ')
s_ind = df_table[df_table['Country/Region'].eq('India')][metrics].sum().add_prefix('India ')
summary = {'updated': pd.to_datetime(dt_today), 'since': pd.to_datetime(dt_5day)}
summary = {**summary, **df_table[metrics].sum(), **s_china, **s_us, **s_eu, **s_ind}

In [125]:
summary

{'updated': Timestamp('2020-04-19 00:00:00'),
 'since': Timestamp('2020-04-14 00:00:00'),
 'Cases': 2400868,
 'Deaths': 165031,
 'Cases (+)': 425064,
 'Deaths (+)': 39056,
 'China Cases': 83805,
 'China Deaths': 4636,
 'China Cases (+)': 499,
 'China Deaths (+)': 1291,
 'US Cases': 759086,
 'US Deaths': 40661,
 'US Cases (+)': 151416,
 'US Deaths (+)': 14830,
 'EU Cases': 1089974,
 'EU Deaths': 101822,
 'EU Cases (+)': 166961,
 'EU Deaths (+)': 18473,
 'India Cases': 17615,
 'India Deaths': 559,
 'India Cases (+)': 6128,
 'India Deaths (+)': 166}

In [126]:
dft_ct_new_cases = dft_cases.groupby(COL_REGION)[dt_cols].sum().diff(axis=1).fillna(0).astype(int)

In [127]:
def get_template(path):
    from urllib.parse import urlparse
    if bool(urlparse(path).netloc):
        from urllib.request import urlopen
        return urlopen(path).read().decode('utf8')
    return open(path).read()
#Vraiment, je sais pas, c'est quoi

In [128]:
#hide_input
template = Template(get_template(paths['overview']))
html = template.render(
    D=summary, table=df_table.head(20),  # REMOVE .head(20) to see all values
    newcases=dft_ct_new_cases.loc[:, dt_cols[LAST_DATE_I-50]:dt_cols[LAST_DATE_I]],
    np=np, pd=pd, enumerate=enumerate)
HTML(f'<div>{html}</div>')

Unnamed: 0_level_0,10  100  1000,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,Unnamed: 5_level_0,Unnamed: 6_level_0,Unnamed: 7_level_0,Unnamed: 8_level_0
Country,New Cases,Total Cases,Total Cases,Deaths,Deaths,Fatality,Unnamed: 7_level_1,Unnamed: 8_level_1
,Feb. 29  Apr. 19,,"(+NEW) since Apr, 14","(+NEW) since Apr, 14","(+NEW) since Apr, 14","(+NEW) since Apr, 14",,
US,,759086.0,"(+151,416)",40661,"(+14,830)",5.36%,,
Spain,,198674.0,"(+26,133)",20453,"(+2,397)",10.29%,,
Italy,,178972.0,"(+16,484)",23660,"(+2,593)",13.22%,,
France,,154097.0,"(+22,736)",19744,"(+3,996)",12.81%,,
Germany,,145184.0,"(+13,825)",4586,"(+1,292)",3.16%,,
United Kingdom,,121172.0,"(+26,327)",16095,"(+3,966)",13.28%,,
Turkey,,86306.0,"(+21,195)",2017,(+614),2.34%,,
China,,83805.0,(+499),4636,"(+1,291)",5.53%,,
Iran,,82211.0,"(+7,334)",5118,(+435),6.23%,,


In [131]:
template = Template()

In [132]:
html = template.render(
    D=summary, table=df_table.head(20),  # REMOVE .head(20) to see all values
    newcases=dft_ct_new_cases.loc[:, dt_cols[LAST_DATE_I-50]:dt_cols[LAST_DATE_I]],
    np=np, pd=pd, enumerate=enumerate)
HTML(f'<div>{html}</div>')