# COVID19-EU

Statistics and Forecasting for the Coronavirus disease (COVID-19) in the European Union.

## Source of Data

 Johns Hopkins CSSE Repository: [https://github.com/CSSEGISandData/COVID-19](https://github.com/CSSEGISandData/COVID-19).

## Source of EU country names

Official website of the European Union: [https://europa.eu/european-union/about-eu/countries_en](https://europa.eu/european-union/about-eu/countries_en)

### Load libraries

In [None]:
import numpy as np 
import matplotlib.pyplot as plt 
import seaborn as sns
import pandas as pd
from statsmodels.tsa.statespace.sarimax import SARIMAX
%matplotlib inline 

### Load Data from Johns Hopkins CSSE Repository

In [None]:
confirmed_data = pd.read_csv('https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_19-covid-Confirmed.csv')
deaths_data = pd.read_csv('https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_19-covid-Deaths.csv')
fields = confirmed_data.keys()

### EU Country Names

In [None]:
EU_Countries = ['Austria', 'Belgium', 'Bulgaria', 'Croatia', 'Cyprus', 'Czechia', 'Denmark', 'Estonia', 'Finland', 'France',
'Germany', 'Greece', 'Hungary', 'Ireland', 'Italy', 'Latvia', 'Lithuania', 'Luxembourg', 'Malta', 'Netherlands', 'Poland',
'Portugal', 'Romania', 'Slovakia', 'Slovenia', 'Spain', 'Sweden']

In [None]:
# Per day infections for a country (function)
def get_per_day_data(data, fields):
    days = []
    data_per_day = []
    for i in data.loc[:, fields[4]:fields[-1]]:
        days.append(i[0:-3])
        data_per_day.append(data[i].sum())
    #convert to numpy
    data_per_day = np.array(data_per_day).reshape(-1,1)
    return days, data_per_day

# Per day infections for a country (function)
def get_per_day_forecast(data_loaded, fields, EU_Countries, data_per_day_EU, data_EU_sum, data_week_forecast):
    for c in EU_Countries:
        instances = data_loaded[data_loaded['Country/Region'] == c]
        days, data_per_day = get_per_day_data (instances, fields)

        # sum up the facts for plotting later sorted results
        data_EU_sum.append(int(data_per_day.sum()))
        data_per_day_EU.append(data_per_day)

        data = data_per_day.flatten()
        model = SARIMAX(data, trend='c', order=(1, 1, 1), enforce_stationarity=False, enforce_invertibility=False)
        model_fit = model.fit(disp=False, transparams=False)
        data_week_forecast.append(model_fit.predict(len(data), len(data)+6, typ='levels'))
    return days

In [None]:
# Per day infections and forecasting
confirmed_per_day_EU = []
confirmed_EU_sum = []
confirmed_week_forecast = []
days = get_per_day_forecast(confirmed_data, fields, EU_Countries, confirmed_per_day_EU, confirmed_EU_sum, confirmed_week_forecast)

# Per day deaths and forecasting
deaths_per_day_EU = []
deaths_EU_sum = []
deaths_week_forecast = []
days = get_per_day_forecast(deaths_data, fields, EU_Countries, deaths_per_day_EU, deaths_EU_sum, deaths_week_forecast)

# Sorted for visualization from most confirmed / deaths to less confirmed /deaths
confirmed_per_day_EU_idx = sorted(range(len(confirmed_EU_sum)), key=lambda k: confirmed_EU_sum[k])
deaths_per_day_EU_idx = sorted(range(len(deaths_EU_sum)), key=lambda k: deaths_EU_sum[k])

In [None]:
def plot_statistics(x_label_text, y_label_text, plt_title, fig_dim1, fig_dim2, colors, legend_font_size, 
EU_Countries, sorted_index, data_per_day, days, days_with_forecast, forecast_data, top_n):
    plt.rcParams['figure.figsize'] = [fig_dim1, fig_dim2]
    for cnt, i in enumerate(reversed(sorted_index)):
        plt.plot(days, data_per_day[i], label=EU_Countries[i], color=colors[cnt])
        plt.scatter(days_with_forecast, forecast_data[i], s=10, marker='o', color=colors[cnt])

        if cnt == top_n:
            break
    plt.ylabel(y_label_text)
    plt.xlabel(x_label_text)
    plt.title(plt_title)
    plt.legend(loc='upper left')
    plt.xticks(fontsize=legend_font_size, rotation=90)
    plt.tight_layout()
    plt.show()

# forecasting days
days_with_forecast = ['+' + str(i+1) for i in range(confirmed_week_forecast[0].shape[0])]

sns.set()
np.random.seed(55)
colors = plt.cm.hsv(np.random.rand(27,))

### 10 Most infected EU countries

In [None]:
# Infected top-10
plot_statistics('Date', 'COVID-19 Infected (Cumulative)', 'The 10 most infected EU countries and 7 days forecasting (Johns Hopkins CSSE data source)',
 10, 5, colors, 8, EU_Countries, confirmed_per_day_EU_idx, confirmed_per_day_EU, days, days_with_forecast, confirmed_week_forecast, 10)

### All EU contries

In [None]:
# Infected all
plot_statistics('Date', 'COVID-19 Infected (Cumulative)', 'The infected EU countries and 7 days forecasting (Johns Hopkins CSSE data source)',
 15, 8, colors, 8, EU_Countries, confirmed_per_day_EU_idx, confirmed_per_day_EU, days, days_with_forecast, confirmed_week_forecast, len(confirmed_per_day_EU_idx))

### 10 EU countries with most deaths

In [None]:
# Deaths top-10
plot_statistics('Date', 'COVID-19 Deaths (Cumulative)', 'The first 10 EU countries with most deaths and 7 days forecasting (Johns Hopkins CSSE data source)',
 10, 5, colors, 8, EU_Countries, deaths_per_day_EU_idx, deaths_per_day_EU, days, days_with_forecast, deaths_week_forecast, 10)

### EU countries deaths

In [None]:
# Deaths all EU
plot_statistics('Date', 'COVID-19 Deaths (Cumulative)', 'The deaths of the EU countries and 7 days forecasting (Johns Hopkins CSSE data source)',
 15, 8, colors, 8, EU_Countries, deaths_per_day_EU_idx, deaths_per_day_EU, days, days_with_forecast, deaths_week_forecast, len(deaths_per_day_EU_idx))