In [1]:
import warnings
from string import ascii_lowercase
import numpy as np
import pandas as pd
from sklearn.metrics import r2_score
from datetime import datetime

import geopandas as gpd
from shapely.geometry import Polygon
from shapely.ops import unary_union
import cartopy.crs as ccrs
from cartopy.feature import OCEAN
import mapclassify

import matplotlib.pyplot as plt
from matplotlib.offsetbox import AnchoredText
from matplotlib.colors import to_rgb

warnings.filterwarnings('ignore')

In [2]:
# Importa dati vaccini e dati epidemiologici - https://github.com/apalladi/covid_vaccini_monitoraggio/blob/main/scripts/confronti_europei.py
def import_vaccines_data():
    """ Recupera dati sui vaccini da Our World in Data"""
    url = "https://raw.githubusercontent.com/owid/covid-19-data/master/public/data/vaccinations/vaccinations.csv"  
    df_vacc = pd.read_csv(url)
    df_vacc = df_vacc.fillna(method="ffill")
    return df_vacc

def get_vaccine_data(country, df_vacc):
    """ Recupera dati vaccini per paese """

    df_vacc_country = df_vacc[df_vacc["location"] == country].iloc[2:, :]

    date = pd.to_datetime(df_vacc_country["date"])
    vacc1 = np.array(df_vacc_country["people_vaccinated_per_hundred"])
    vacc2 = np.array(df_vacc_country["people_fully_vaccinated_per_hundred"])

    df_vacc_new = pd.DataFrame(np.transpose([vacc1, vacc2]))
    df_vacc_new.index = date
    df_vacc_new.columns = ["% vaccinated with 1 dose", "% fully vaccinated"]

    return df_vacc_new

def import_epidem_data():
    """ Recupera dati epidemiologici dal JHU CSSE
        (Johns Hopkins Unversity)"""

    base = "https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/"  # noqa: E501
    file_confirmed = base + "time_series_covid19_confirmed_global.csv"
    file_deaths = base + "time_series_covid19_deaths_global.csv"
    file_recovered = base + "time_series_covid19_recovered_global.csv"
    return pd.read_csv(file_confirmed), pd.read_csv(file_deaths), pd.read_csv(file_recovered)

def get_vaccine_data_last(country, time_window=30, t0=-1, fully=True, last_day=False):
    """ Recupera dati sulla frazione di vaccinati
        medi negli ultimi 30 giorni """

    df_vacc_country = df_vacc[df_vacc["location"] == country].iloc[2:, :]
    vacc_fully = np.array(df_vacc_country["people_fully_vaccinated_per_hundred"
                                          if fully else
                                          "people_vaccinated_per_hundred"])
    vacc_ultimi_Ngiorni = np.mean(vacc_fully[t0-(time_window+1):t0
                                             if last_day
                                             else -1])
    return vacc_ultimi_Ngiorni


def get_deaths(country, time_window=30, t0=-1):
    """ Recupera decessi per la finestra temporale selezionata """
    decessi = np.array(df_deaths[df_deaths["Country/Region"] == country].iloc[:, 4:].sum())
    decessi_ultimi_Ngiorni = decessi[t0] - decessi[t0-(time_window+1)]
    return decessi_ultimi_Ngiorni


def compute_vaccini_decessi_eu(tw, fully=True, last_day=False):
    """ calcola vaccini e decessi nei 27 Paesi europei """

    dec_res_2021 = []
    vacc_res_2021 = []
    t0 = -1
    for p, abitanti in paesi_abitanti_eu.items():
        vacc_res_2021.append(get_vaccine_data_last(p,
                                                   time_window=tw,
                                                   fully=fully,
                                                   last_day=last_day))
        dec_res_2021.append(get_deaths(p, time_window=tw, t0=t0)/abitanti)
    dec_res_2021 = np.array(dec_res_2021)
    return vacc_res_2021, dec_res_2021

def linear_model(x, coeff_fit):
    y = coeff_fit[1] + coeff_fit[0]*x
    return y

def linear_fit(vacc_res_2021, dec_res_2021):
    """ fit lineare """

    coeff_fit = np.polyfit(vacc_res_2021, dec_res_2021, 1)
    x_grid = np.arange(0, 100, 1)
    y_grid = [linear_model(v, coeff_fit) for v in x_grid]

    # calcola R2 score
    y_pred = [linear_model(v, coeff_fit) for v in vacc_res_2021]
    y_test = dec_res_2021

    score = round(r2_score(y_test, y_pred), 2)
    print('R2 score è pari a', score)

    return x_grid, y_grid, score

In [3]:
def get_map_labels(countries_df, extent_polygon):
    map_labels = []
    countries_df_clip = gpd.clip(countries_df, extent_polygon)
    for _, row in countries_df_clip.iterrows():
        if row["ADM0_A3_IT"] in adm0_a3_it.values(): 
            label_ctx = row["geometry"].representative_point()
            label_ctx = ccrs_albers.transform_point(label_ctx.x, 
                                                  label_ctx.y, 
                                                  src_crs=ccrs_plate)
            labelx, labely = label_ctx[0], label_ctx[1]
            if row["ADM0_A3_IT"] == "LUX":
                labelx, labely = labelx + 100000, labely
            if row["ADM0_A3_IT"] == "CYP":
                labelx, labely = labelx, labely - 100000
            if row["ADM0_A3_IT"] == "MLT":
                labelx, labely = labelx, labely - 50000
            map_labels.append((row["ADM0_A3_IT"], (labelx, labely)))
    return map_labels

def get_borders(eu_countries, countries_df):
    eu_borders = []
    for _, eu_g in eu_countries.iterrows():
        ue_bd = eu_g["geometry"].boundary
        if ue_bd.geom_type == "MultiLineString":
            for eu_pl in ue_bd.geoms:
                eu_borders.append(eu_pl)
        else:
            eu_borders.append(ue_bd)
    total_borders = []
    for _, tot_g in countries_df.iterrows():
        tot_bd = tot_g["geometry"].boundary
        if tot_bd.geom_type == "MultiLineString":
            for tot_pl in tot_bd.geoms:
                total_borders.append(tot_pl)
        else:
            total_borders.append(tot_bd)
    
    total_borders = unary_union(total_borders)
    no_eu_borders = [noeu_g for noeu_g in total_borders.difference(unary_union(eu_borders)).geoms]
    eu_borders = [eu_g for eu_g in unary_union(eu_borders).geoms]
    return eu_borders, no_eu_borders

In [4]:
# Sequential bivariate color schemes - https://www.joshuastevens.net/cartography/make-a-bivariate-choropleth-map/
map_colors_1 = {"A2": "#6c83b5", "B2": "#567994", "C2": "#2a5a5b",
            "A1": "#b5c0da", "B1": "#90b2b3", "C1": "#5a9178",
            "A0": "#e8e8e8", "B0": "#b8d6be", "C0": "#73ae80"}

map_colors_2 = {"A2": "#c85a5a", "B2": "#985356", "C2": "#574249",
            "A1": "#e4acac", "B1": "#ad9ea5", "C1": "#627f8c",
            "A0": "#e8e8e8", "B0": "#b0d5df", "C0": "#64acbe"}

map_colors_3 = {"A2": "#be64ac", "B2": "#8c62aa", "C2": "#3b4994",
            "A1": "#dfb0d6", "B1": "#a5add3", "C1": "#5698b9",
            "A0": "#e8e8e8", "B0": "#ace4e4", "C0": "#5ac8c8"}

map_colors_4 = {"A2": "#c8b35a", "B2": "#af8e53", "C2": "#804d36",
            "A1": "#e4d9ac", "B1": "#c8ada0", "C1": "#976b82",
            "A0": "#e8e8e8", "B0": "#cbb8d7", "C0": "#9972af"}

output_map_colors = map_colors_1
bivariate_letters = {index: letter.upper() for index, letter in enumerate(ascii_lowercase, start=0)}
bivariate_legend = np.array([to_rgb(rgb) for rgb in list(output_map_colors.values())]).reshape(3, 3, 3)

admin_labels_font = "Arial"
texts_font = "Palatino Linotype"
normal_fontsize = 12
title_fontsize = 26
texts_color = (14/255, 14/255, 16/255)

In [5]:
# Albers projection
ccrs_albers = ccrs.AlbersEqualArea()

# PlateCarree projection
ccrs_plate = ccrs.PlateCarree()

# Europe extent
lon_min, lat_min, lon_max, lat_max = -15.5, 33, 40, 70

# Extent polygon
extent_polygon = Polygon([(lon_min, lat_min), (lon_max, lat_min), (lon_max, lat_max), 
                       (lon_min, lat_max), (lon_min, lat_min)])

In [6]:
# Map Classifier
map_classifier = mapclassify.JenksCaspall.make(k=3)

In [7]:
paesi_abitanti_eu = {"Austria": 8.917, "Belgium": 11.56, "Bulgaria": 6.927,
                     "Cyprus": 1.207, "Croatia": 4.047, "Denmark": 5.831,
                     "Estonia": 1.331, "Finland": 5.531, "France": 67.39,
                     "Germany": 83.24, "Greece": 10.27, "Ireland": 4.995,
                     "Italy": 59.55, "Latvia": 1.902, "Lithuania": 2.795,
                     "Luxembourg": 0.632275, "Malta": 0.525285, "Netherlands": 17.44,
                     "Poland": 37.95, "Portugal": 10.31, "Czechia": 10.7,
                     "Romania": 19.29, "Slovakia": 5.549, "Slovenia": 2.1,
                     "Spain": 47.35, "Sweden": 10.35, "Hungary": 9.75}

paesi_eu_ita = ["Austria", "Belgio", "Bulgaria", "Cipro", "Croazia", "Danimarca",
                "Estonia", "Finlandia", "Francia", "Germania", "Grecia", "Irlanda",
                "Italia", "Lettonia", "Lituania", "Lussemburgo", "Malta", "Olanda",
                "Polonia", "Portogallo", "Repubblica Ceca", "Romania", "Slovacchia",
                "Slovenia", "Spagna", "Svezia", "Ungheria"]

adm0_a3_it = {"Austria":"AUT", "Belgium":"BEL", "Bulgaria":"BGR", "Cyprus":"CYP", 
              "Croatia":"HRV", "Denmark":"DNK", "Estonia":"EST", "Finland":"FIN", 
              "France":"FRA", "Germany":"DEU", "Greece":"GRC", "Ireland":"IRL", "Italy":"ITA", 
              "Latvia":"LVA", "Lithuania":"LTU", "Luxembourg":"LUX", "Malta":"MLT", "Netherlands":"NLD", 
              "Poland":"POL", "Portugal":"PRT", "Czechia":"CZE", "Romania":"ROU", "Slovakia":"SVK", 
              "Slovenia":"SVN", "Spain":"ESP", "Sweden":"SWE", "Hungary":"HUN"}

In [8]:
window = 30  # giorni

 # importa dati
df_confirmed, df_deaths, df_recovered = import_epidem_data()
df_vacc = import_vaccines_data()

# recupera dati per tale finestra temporale
vacc_res_2021, dec_res_2021 = compute_vaccini_decessi_eu(window, fully=False, last_day=False)

# Fit lineare
x_grid, y_grid, score = linear_fit(vacc_res_2021, dec_res_2021)

# calcola coefficiente di correlazione (pearson)
corr_coeff = round(np.corrcoef(vacc_res_2021, dec_res_2021)[0, 1], 2)

R2 score è pari a 0.67


In [9]:
# Create a dataframe with Vaccinated and deaths
vacc_dec_2021 = pd.DataFrame(zip(paesi_abitanti_eu.keys(), vacc_res_2021, dec_res_2021), 
                             columns = ["Country", "Vax", "Dth"])

# Add country abbreviation column
vacc_dec_2021["ADM0_A3_IT"] = vacc_dec_2021.apply(lambda row: adm0_a3_it[row['Country']], axis=1)

In [10]:
# Open 50m World Countries from natural earth
countries_df = gpd.read_file(r"..\Data\ne_50m_admin_0_countries.shp")

# Get borders recognized by Italy
countries_df = countries_df.dissolve(by='ADM0_A3_IT', aggfunc='last').reset_index()

# Get European countries
eu_countries = pd.merge(countries_df, vacc_dec_2021, left_on='ADM0_A3_IT', 
                        right_on='ADM0_A3_IT')[["ADM0_A3_IT", "Vax", "Dth", "geometry"]]

In [11]:
# Get map labels
map_labels = get_map_labels(countries_df, extent_polygon)

In [12]:
# Get EU/Non-EU borders
eu_borders, no_eu_borders = get_borders(eu_countries, countries_df)

In [13]:
# Create the bivariate classes
eu_countries["Dth_class"] = eu_countries[['Dth']].apply(map_classifier).astype(str)
eu_countries["Vax_class"] = eu_countries[['Vax']].apply(map_classifier)
eu_countries["Vax_class_num"] = eu_countries.apply(lambda row: bivariate_letters[row['Vax_class']], axis=1)
eu_countries["Biv_class"] = eu_countries["Vax_class_num"].str.cat(eu_countries["Dth_class"])
eu_countries["Biv_color"] = eu_countries.apply(lambda row: output_map_colors[row['Biv_class']], axis=1)

In [14]:
# Plot the map
fig, ax = plt.subplots(figsize=(14, 14), subplot_kw={'projection': ccrs_albers})
fig.set_tight_layout(True)

# Add map title
map_title = AnchoredText("Frazione di vaccinati & decessi (EU-27)", 
                         prop=dict(color=texts_color, 
                                   fontsize=title_fontsize, 
                                   weight="bold", 
                                   fontname=texts_font, 
                                   ha="center"), 
                         borderpad=0., 
                         frameon=False, 
                         loc=2)
ax.add_artist(map_title)

# Add map credits
map_sources = AnchoredText("Fonti: John Hopkins University, Our World in Data, Natural Earth, joshuastevens.net", 
                       prop=dict(color=texts_color, 
                                 fontsize=normal_fontsize, 
                                 fontname=texts_font), 
                       borderpad=0., 
                       frameon=False, 
                       loc=3)
ax.add_artist(map_sources)

# Add map author
map_author = AnchoredText("Autore: Ivan D'Ortenzio\nProiezione: Albers Equal-Area Conic", 
                      prop=dict(color=texts_color, 
                                fontsize=normal_fontsize, 
                                fontname=texts_font, 
                                ha="right"), 
                      borderpad=0., 
                      frameon=False, 
                      loc=4)
ax.add_artist(map_author)

# Add last update date
last_update = datetime.today().strftime("%d-%m-%Y %H:%M")
map_date = AnchoredText(f"Ultimo aggioramento: {last_update}", 
                        prop=dict(color=texts_color, 
                                  fontsize=normal_fontsize, 
                                  fontname=texts_font), 
                        borderpad=0., 
                        frameon=False, 
                        loc=1)
ax.add_artist(map_date)

# Add watermark
fig.text(0.96, 0.40, 'github.com/apalladi/covid_vaccini_monitoraggio', 
        fontname=texts_font, fontsize=24, weight="bold", color="#CCCCCC", 
        va="center", rotation='vertical')

# Add bivariate map legend
map_legend = fig.add_axes([0.1, 0.75, 0.15, 0.15])
map_legend.imshow(bivariate_legend, extent=(0, 1, 0, 1))
map_legend.set_ylabel(r'$\longrightarrow$', fontname=texts_font, 
                      color=texts_color, fontsize=18)
map_legend.set_xlabel(r'$\longrightarrow$', fontname=texts_font, 
                      color=texts_color, fontsize=18)
map_legend.set_xticks([0.16, 0.5, 0.83])
map_legend.set_yticks([0.16, 0.5, 0.83])
map_legend.tick_params(axis=u'both', which=u'both',length=0)
map_legend.set_xticklabels(["Basso", "Medio", "Alto"])
map_legend.set_yticklabels(["Basso", "Medio", "Alto"])
map_legend.set_facecolor((0, 0 , 0, 0))

# Add Vax legend
vax_array = bivariate_legend[2:, :, :]
vax_legend = fig.add_axes([0.27, 0.84, 0.10, 0.10])
vax_legend.imshow(vax_array, extent=(0, 1, 0, 0.2))
vax_legend.set_xlabel("% Vaccinati", 
                      color=texts_color,
                      fontsize=normal_fontsize, 
                      fontname=texts_font)
vax_legend.set_xticks([])
vax_legend.set_yticks([])
vax_legend.set_xticklabels([])
vax_legend.set_yticklabels([])

# Add Dth legend
dth_array = np.fliplr(bivariate_legend[:, :1, :].reshape(1, 3, 3))
dth_legend = fig.add_axes([0.27, 0.80, 0.10, 0.10])
dth_legend.imshow(dth_array, extent=(0, 1, 0, 0.2))
dth_legend.set_xlabel("Decessi per mln", 
                      color=texts_color, 
                      fontsize=normal_fontsize, 
                      fontname=texts_font)
dth_legend.set_xticks([])
dth_legend.set_yticks([])
dth_legend.set_xticklabels([])
dth_legend.set_yticklabels([])

# Add the scatter plot
ax_scatter = fig.add_axes([0.73, 0.73, 0.23, 0.18])
ax_scatter.scatter(eu_countries["Vax"], eu_countries["Dth"], color=eu_countries["Biv_color"], marker="h")
ax_scatter.plot(x_grid, y_grid, linestyle="--")
ax_scatter.set_ylabel(r'Decessi per mln', 
                      fontname=texts_font, 
                      color=texts_color, 
                      fontsize=normal_fontsize)
ax_scatter.set_xlabel(r'% Vaccinati', 
                      fontname=texts_font, 
                      color=texts_color, 
                      fontsize=normal_fontsize)
ax_scatter.set_xticklabels(["0%", "20%", "40%", "60%", "80%", "100%"])

ax_scatter_legend = AnchoredText(f"$r\;=\;{corr_coeff}$\n$R^2\;=\;{score}$", 
                                 prop=dict(fontname=texts_font, 
                                           color=texts_color, 
                                           fontsize=normal_fontsize, 
                                           ha="left"), 
                                 borderpad=0, 
                                 frameon=False, 
                                 loc=1)
ax_scatter.add_artist(ax_scatter_legend)
ax_scatter.set_ylim(-70, )
ax_scatter.set_xlim(0, 100)
ax_scatter.set_facecolor((1, 1 , 1, 0.5))


# Add Borders and Ocean
ax.add_feature(OCEAN, color="aliceblue", zorder=1)
ax.add_geometries(eu_borders, crs=ccrs_plate, edgecolor=texts_color, lw=0.6, facecolor=(0, 0, 0, 0))
ax.add_geometries(no_eu_borders, crs=ccrs_plate, edgecolor=texts_color, lw=0.2, facecolor=(0, 0, 0, 0))

# Plot the non European countries
countries_df.plot(color="whitesmoke", ax=ax, transform=ccrs_plate)

# Plot the bivariate map
eu_countries.plot(color=eu_countries["Biv_color"], ax=ax, transform=ccrs_plate)

# Add EU-countries labels
for label in map_labels:
    ax.annotate(label[0], (label[1][0], label[1][1]), 
                fontname=admin_labels_font,
                color=texts_color,
                fontsize=normal_fontsize,
                ha="center", 
                va="center")

# Set map extent
ax.set_extent((lon_min, lon_max, lat_min, lat_max))
plt.savefig("01_Vax_vs_Deaths_Biv_Map.jpg", dpi=300, bbox_inches='tight')
plt.close()