# Covid19 Analysis for Nordic Countries

In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np

%matplotlib inline

In [2]:
df = pd.read_json('https://pomber.github.io/covid19/timeseries.json')[['Sweden','Denmark','Norway','Finland','Iceland']]

In [3]:
n_rows = df.shape[0]
df['Sweden'][0]

{'date': '2020-1-22', 'confirmed': 0, 'deaths': 0, 'recovered': 0}

In [4]:
# extract dates
dates = []
for i in range(n_rows):
    dates.append(df['Sweden'][i]['date'])

In [5]:
df['Date'] = dates
df['Date'] = pd.to_datetime(df['Date'])
df = df.set_index('Date')

In [6]:
df.tail()

Unnamed: 0_level_0,Sweden,Denmark,Norway,Finland,Iceland
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2020-03-25,"{'date': '2020-3-25', 'confirmed': 2526, 'deat...","{'date': '2020-3-25', 'confirmed': 1862, 'deat...","{'date': '2020-3-25', 'confirmed': 3084, 'deat...","{'date': '2020-3-25', 'confirmed': 880, 'death...","{'date': '2020-3-25', 'confirmed': 737, 'death..."
2020-03-26,"{'date': '2020-3-26', 'confirmed': 2840, 'deat...","{'date': '2020-3-26', 'confirmed': 2023, 'deat...","{'date': '2020-3-26', 'confirmed': 3369, 'deat...","{'date': '2020-3-26', 'confirmed': 958, 'death...","{'date': '2020-3-26', 'confirmed': 802, 'death..."
2020-03-27,"{'date': '2020-3-27', 'confirmed': 3069, 'deat...","{'date': '2020-3-27', 'confirmed': 2200, 'deat...","{'date': '2020-3-27', 'confirmed': 3755, 'deat...","{'date': '2020-3-27', 'confirmed': 1041, 'deat...","{'date': '2020-3-27', 'confirmed': 890, 'death..."
2020-03-28,"{'date': '2020-3-28', 'confirmed': 3447, 'deat...","{'date': '2020-3-28', 'confirmed': 2366, 'deat...","{'date': '2020-3-28', 'confirmed': 4015, 'deat...","{'date': '2020-3-28', 'confirmed': 1167, 'deat...","{'date': '2020-3-28', 'confirmed': 963, 'death..."
2020-03-29,"{'date': '2020-3-29', 'confirmed': 3700, 'deat...","{'date': '2020-3-29', 'confirmed': 2564, 'deat...","{'date': '2020-3-29', 'confirmed': 4284, 'deat...","{'date': '2020-3-29', 'confirmed': 1240, 'deat...","{'date': '2020-3-29', 'confirmed': 1020, 'deat..."


In [7]:
df_deaths = pd.DataFrame(index=df.index)
for col in df.columns:
    df_deaths[col] = [c.get('deaths') for c in df[col]]

In [8]:
# Start from March 10 before first deaths
df_deaths = df_deaths['2020-03-10':]
# Fix faulty Iceland data
df_deaths.loc['2020-03-15','Iceland'] = 0
df_deaths.loc['2020-03-20','Iceland'] = 1

In [9]:
# population data from Wikipedia

df_pop = pd.read_html('https://en.wikipedia.org/wiki/List_of_countries_by_population_(United_Nations)')[3]
df_pop = df_pop[['Country or area', 'Population(1 July 2019)']]

df_pop['Country or area'] = df_pop['Country or area'].str.replace('\[.*\]','')
df_pop = df_pop.pivot_table(columns='Country or area',values='Population(1 July 2019)')[df.columns]
df_pop = df_pop / 1000000

In [10]:
df_pop['Sweden']

Population(1 July 2019)    10.036379
Name: Sweden, dtype: float64

In [11]:
df_deaths_per_mn = pd.DataFrame(index=df_deaths.index)
for col in df_deaths.columns:
    df_deaths_per_mn[col] = df_deaths[col] / df_pop[col].values

In [12]:
df_deaths_per_mn

Unnamed: 0_level_0,Sweden,Denmark,Norway,Finland,Iceland
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2020-03-10,0.0,0.0,0.0,0.0,0.0
2020-03-11,0.099638,0.0,0.0,0.0,0.0
2020-03-12,0.099638,0.0,0.0,0.0,0.0
2020-03-13,0.099638,0.0,0.0,0.0,0.0
2020-03-14,0.199275,0.173254,0.557739,0.0,0.0
2020-03-15,0.298913,0.346508,0.557739,0.0,0.0
2020-03-16,0.597825,0.519762,0.557739,0.0,0.0
2020-03-17,0.697463,0.693016,0.557739,0.0,2.949583
2020-03-18,0.996375,0.693016,1.115479,0.0,2.949583
2020-03-19,1.096013,1.039523,1.301392,0.0,2.949583


# Trend from first death

In [13]:
df_deaths_1 = df_deaths[df_deaths != 0]

# Remove all dates with zero deaths
df_deaths_1 = df_deaths_1.apply(lambda x: pd.Series(x.dropna().values))

# Add zero to first day
df_deaths_1 = pd.concat([pd.DataFrame(np.zeros((1,df_deaths_1.shape[1])),columns=df_deaths_1.columns), df_deaths_1], axis=0,ignore_index=True)

In [14]:
# deaths per mn inhabitants since first death
df_deaths_per_mn_1 = pd.DataFrame(index=df_deaths_1.index)
for col in df_deaths.columns:
    df_deaths_per_mn_1[col] = df_deaths_1[col] / df_pop[col].values

# Plots

## Deaths over time

In [19]:
from datetime import datetime, timedelta

import plotly.graph_objects as go
import plotly

end_date = df_deaths.tail().index[-1] + timedelta(days=1)
date = str(end_date)[:10] + ' 03:00 CET'

def plot_graph(data, title, x_title, y_title, file_name, date, template='seaborn',end_date=end_date):
    fig = go.Figure()

    for col in data:
        fig.add_trace(go.Scatter(x=data.index, y=data[col], name=col))

    fig.update_layout(template=template, title_text=title,
                  xaxis_title=x_title, xaxis=dict(tickmode='linear',fixedrange= True), xaxis_range=[data.index[0], end_date],
                  yaxis_title=y_title,
                  hovermode = 'x',
                  xaxis_rangeslider_visible=True, annotations=[dict(x = 1, y = 0, text = "Updated {}".format(date), 
      showarrow = False, xref='paper', yref='paper',
      xanchor='right', yanchor='bottom', xshift=0, yshift=0, font=dict(color="red",size=8.5))])
    plotly.offline.plot(fig, filename=file_name,auto_open=False)
    fig.show()
    
plot_graph(df_deaths, 'COVID19 Total Nordic Deaths, starting March 10 2020', "Date", "Deaths", 'deaths.html', date)
plot_graph(df_deaths_per_mn, 'COVID19 Total Nordic Deaths per Mn inhabitants, starting March 10 2020', "Date", "Deaths per Mn inhabitants", 'deaths_mn.html',date)
plot_graph(df_deaths_1, 'COVID19 Total Nordic Deaths, daily data since first death', "Days since first Death", "Deaths", 'deaths_1.html', date, template='plotly_white', end_date =df_deaths_1.index[-1]+1)
plot_graph(df_deaths_per_mn_1, 'COVID19 Total Nordic Deaths per Mn inhabitants, daily data since first death', "Days since first Death", "Deaths per Mn inhabitants", 'deaths_mn_1.html', date, template='plotly_white', end_date =df_deaths_1.index[-1]+1)