In [2140]:
import os
import pandas as pd
import numpy as np
import plotly.express as px

In [2141]:
CONFIRMED_PATH = 'https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_confirmed_global.csv'
DEATH_PATH = 'https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_deaths_global.csv'
RECOVERED_PATH = 'https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_recovered_global.csv'

In [2142]:
def get_data_df(data_path):
    
    data_df = pd.read_csv(data_path)
    data_df = data_df.rename(columns={'Province/State': 'province_or_state', 'Country/Region': 'country'})
    data_df['province_or_state'] = data_df['province_or_state'].fillna('')
    data_df = data_df.drop(data_df[data_df['province_or_state'].str.contains('Diamond Princess')].index)
    data_df = data_df.drop(data_df[data_df['country'].str.contains('Diamond Princess')].index)
    agg_spec = {k : 'sum' for k in data_df.columns[4:]}
    data_df = data_df.groupby(['country']).agg(agg_spec).reset_index()
    
    return data_df

In [2143]:
def get_weather_df():
    
    weather_df = pd.read_csv('yearly_temp.csv', sep='\t')
    weather_df = weather_df.rename(columns=lambda x: x.strip())
    weather_df['country'] = weather_df['country'].str.strip()
    weather_df['temp'] = pd.to_numeric(weather_df['temp'])
    
    return weather_df

In [2144]:
def get_population_df():

    population_df = pd.read_csv('population_data.csv', sep='\t')
    population_df['Med. Age'] = population_df['Med. Age'].str.replace(',', '').str.replace('N.A', '0').astype(float)
    population_df['Urban Pop %'] = population_df['Urban Pop %'].str.replace(',', '').str.replace('N.A', '0').astype(float)
    
    return population_df

In [2145]:
def join_data_df_weather(df):
    
    weather_df = get_weather_df()
    drop_countries = set(df['country'].unique()) - set(weather_df['country'].unique())
    df = df.join(weather_df.set_index('country'), on='country', how='left')
    df['hot'] = np.where(df['temp'] > 15.0, True, False)
    df = df.drop(df[df['country'].isin(drop_countries)].index)   

    return df

In [2146]:
def join_data_df_population(df):

    population_df = get_population_df()
    drop_countries = set(df['country'].unique()) - set(population_df['country'].unique())
    df = df.join(population_df.set_index('country'), on='country', how='left')
    df = df.drop(df[df['country'].isin(drop_countries)].index)   

    return df

In [2147]:
COUNTRIES_OF_INTEREST = ['Italy', 'France', 'China', 'United Kingdom', 'US', 'Germany', 'Spain', 
                         'Japan', 'Israel', 'Netherlands', 'Korea, South']

In [2148]:
data_df = get_data_df(CONFIRMED_PATH)

In [2149]:
data_df_columns = data_df.columns

In [2150]:
weather_population_columns = list(get_weather_df().columns[1:]) + ['hot'] + list(get_population_df().columns[1:])

In [2151]:
data_df_t = data_df.melt(id_vars=['country'], var_name='date', value_name='confirmed').fillna('<all>')
data_of_interest = data_df_t[data_df_t['country'].isin(COUNTRIES_OF_INTEREST)]

In [2152]:
fig = px.line(data_of_interest, x="date", y="confirmed", color='country', log_y=True)
fig.show()

In [2153]:
diff_df = data_df.copy()
for i in range(len(data_df.columns)-1, 1, -1):
    diff_df.iloc[:, i] = (data_df.iloc[:, i] - data_df.iloc[:, i-1]) / data_df.iloc[:, i-1]

diff_df = diff_df.fillna(0.0)

In [2154]:
diff_df_t = diff_df.melt(id_vars=['country'], var_name='date', value_name='new').fillna('<all>')
diff_of_interest = diff_df_t[diff_df_t['country'].isin(COUNTRIES_OF_INTEREST)]
c = diff_of_interest.groupby('country').cumcount()
week_of_interest = diff_of_interest.groupby(['country', c // 7]).agg({'date': 'first', 'new': 'sum'}).reset_index()

In [2155]:
fig = px.line(week_of_interest, x="date", y="new", color='country', log_y=True)
fig.show()

In [2156]:
data_df = join_data_df_weather(data_df)

In [2157]:
data_df = join_data_df_population(data_df)

In [2158]:
data_norm_df = data_df.copy()
for i in range(len(data_df.columns)-len(weather_population_columns), 0, -1):
    data_norm_df.iloc[:, i] = pd.to_numeric(data_df.iloc[:, i]) / pd.to_numeric(data_df['Population (2020)'])

In [2159]:
data_norm_df_t = data_norm_df.drop(columns=weather_population_columns)
data_norm_df_t = data_norm_df_t.melt(id_vars=['country'], var_name='date', value_name='confirmed').fillna('<all>')
norm_of_interest = data_norm_df_t[data_norm_df_t['country'].isin(COUNTRIES_OF_INTEREST)]

In [2160]:
fig = px.line(norm_of_interest, x="date", y="confirmed", color='country', log_y=True)
fig.show()

In [2161]:
death_df = get_data_df(DEATH_PATH)
death_df = join_data_df_weather(death_df)
death_df = join_data_df_population(death_df)

In [2162]:
death_norm_df = death_df.copy()
for i in range(len(death_df.columns)-len(weather_population_columns), 0, -1):
    death_norm_df.iloc[:, i] = pd.to_numeric(death_df.iloc[:, i]) / pd.to_numeric(death_df['Population (2020)'])

In [2163]:
death_norm_df_t = death_norm_df.drop(columns=weather_population_columns)
death_norm_df_t = death_norm_df_t.melt(id_vars=['country'], var_name='date', value_name='deaths').fillna('<all>')
death_norm_of_interest = death_norm_df_t[death_norm_df_t['country'].isin(COUNTRIES_OF_INTEREST)]

In [2164]:
fig = px.line(death_norm_of_interest, x="date", y="deaths", color='country', log_y=True)
fig.show()

In [2165]:
recovered_df = get_data_df(RECOVERED_PATH)
recovered_df = join_data_df_weather(recovered_df)
recovered_df = join_data_df_population(recovered_df)

In [2166]:
active_df = data_df.copy()
for i in range(len(data_df.columns)-len(weather_population_columns), 0, -1):
    active_df.iloc[:, i] = pd.to_numeric(active_df.iloc[:, i]) - pd.to_numeric(death_df.iloc[:, i]) - pd.to_numeric(recovered_df.iloc[:, i])

In [2167]:
active_df_norm_t = active_df.drop(columns=weather_population_columns)
active_df_norm_t = active_df_norm_t.melt(id_vars=['country'], var_name='date', value_name='active').fillna('<all>')
active_norm_of_interest = active_df_norm_t[active_df_norm_t['country'].isin(COUNTRIES_OF_INTEREST)]

In [2168]:
fig = px.line(active_norm_of_interest, x="date", y="active", color='country', log_y=True)
fig.show()

In [2169]:
active_df.loc["world", data_df_columns[1:]] = active_df[data_df_columns[1:]].sum()
active_df.at['world', 'country'] = 'world'
active_df_norm_t = active_df.drop(columns=weather_population_columns)
active_df_norm_t = active_df_norm_t.melt(id_vars=['country'], var_name='date', value_name='active').fillna('<all>')
active_norm_of_interest = active_df_norm_t[active_df_norm_t['country'].isin(['world'])]

In [2170]:
fig = px.line(active_norm_of_interest, x="date", y="active", color='country', log_y=True)
fig.show()

In [2171]:
# hot_cold_df = data_df[list(data_df_columns) + ['temp', 'hot']].groupby('hot').agg({k : 'sum' for k in data_df.columns[1:len(data_df_columns)]}).reset_index()

In [2172]:
hot_cold_df_t = data_df[list(data_df_columns)[1:] + ['hot']].melt(id_vars=['hot'], var_name='date', value_name='confirmed').fillna('<all>')

In [2173]:
fig = px.line(hot_cold_df_t, x="date", y="confirmed", color='hot')
fig.show()

In [2174]:
static_columns = ['country', data_df_columns[-1]] + weather_population_columns

In [2175]:
data_df_scatter = data_df[static_columns]
data_df_scatter = data_df_scatter.rename(columns={data_df_columns[-1]: 'confirmed'})
data_df_scatter.drop(data_df_scatter[pd.to_numeric(data_df_scatter['Population (2020)'] < 100000)].index).sort_values(by=['confirmed'], ascending=False).head(n=10)

Unnamed: 0,country,confirmed,temp,hot,Population (2020),Density (P/Km²),Land Area (Km²),Migrants (net),Med. Age,Urban Pop %
170,US,607670,8.55,False,331002651,36,9147420,954806.0,38.0,83.0
155,Spain,172541,13.3,False,46754778,94,498800,40000.0,45.0,80.0
83,Italy,162488,13.45,False,60461826,206,294140,148943.0,47.0,69.0
60,France,131361,10.7,False,65273511,119,547557,36527.0,42.0,82.0
64,Germany,131359,8.5,False,83783942,240,348560,543822.0,46.0,76.0
174,United Kingdom,94845,8.45,False,67886011,281,241930,260650.0,40.0,83.0
36,China,83306,6.95,False,1439323776,153,9388211,-348399.0,38.0,61.0
79,Iran,74877,17.25,True,83992949,52,1628550,-55000.0,32.0,76.0
169,Turkey,65111,11.1,False,84339067,110,769630,283922.0,32.0,76.0
16,Belgium,31119,9.55,False,11589623,383,30280,48000.0,42.0,98.0


In [2176]:
data_norm_df = data_norm_df[static_columns]
data_norm_df = data_norm_df.rename(columns={data_df_columns[-1]: 'confirmed'})
data_norm_df.drop(data_norm_df[pd.to_numeric(data_norm_df['Population (2020)'] < 100000)].index).sort_values(by=['confirmed'], ascending=False).head(n=10)

Unnamed: 0,country,confirmed,temp,hot,Population (2020),Density (P/Km²),Land Area (Km²),Migrants (net),Med. Age,Urban Pop %
100,Luxembourg,0.005283,1.381838e-05,False,625978,242,2590,9741.0,40.0,88.0
76,Iceland,0.00504,5.12831e-06,False,341243,3,100250,380.0,37.0,94.0
155,Spain,0.00369,2.844629e-07,False,46754778,94,498800,40000.0,45.0,80.0
160,Switzerland,0.002997,6.354986e-07,False,8654622,219,39516,52000.0,43.0,74.0
83,Italy,0.002687,2.224544e-07,False,60461826,206,294140,148943.0,47.0,69.0
16,Belgium,0.002685,8.24013e-07,False,11589623,383,30280,48000.0,42.0,98.0
81,Ireland,0.002325,1.883435e-06,False,4937786,72,68890,23604.0,38.0,63.0
60,France,0.002012,1.639256e-07,False,65273511,119,547557,36527.0,42.0,82.0
170,US,0.001836,2.583061e-08,False,331002651,36,9147420,954806.0,38.0,83.0
134,Portugal,0.001711,1.485773e-06,True,10196709,111,91590,-6000.0,46.0,66.0


In [2177]:
fig = px.scatter(data_norm_df, x="Urban Pop %", y="confirmed", hover_data=['country'], log_x=True, log_y=True)
fig.show()

In [2178]:
fig = px.scatter(data_norm_df, x="Density (P/Km²)", y="confirmed", hover_data=['country'], log_x=True, log_y=True)
fig.show()

In [2179]:
fig = px.scatter(data_norm_df, x="Med. Age", y="confirmed", hover_data=['country'], log_x=True, log_y=True)
fig.show()

In [2180]:
death_df_static = death_norm_df[static_columns].copy()
death_df_static = death_df_static.rename(columns={data_df_columns[-1]: 'deaths'})

In [2181]:
fig = px.scatter(death_df_static, x="Med. Age", y="deaths", hover_data=['country'], log_x=True, log_y=True)
fig.show()

In [2182]:
fig = px.scatter(death_df_static, x="Urban Pop %", y="deaths", hover_data=['country'], log_x=True, log_y=True,
                size_max=60, color='country', size='Population (2020)')
fig.show()

In [2183]:
fig = px.scatter(death_df_static, x="Density (P/Km²)", y="deaths", hover_data=['country'], log_x=True, log_y=True)
fig.show()