In [None]:
import pandas as pd
import numpy
import matplotlib.pyplot as plt
import datetime
import xml.etree.ElementTree

In [None]:
def date_is_valid(date):
    try:
        datetime.date.fromisoformat(date)
    except ValueError as e:
        return False
    return True

def days_from_jan_1st(date):
    try:
        date = datetime.date.fromisoformat(date)
    except ValueError as e:
        print(date)
        raise e
    return (date - datetime.date(date.year, 1, 1)).days

In [None]:
# Load and clean the table of daily deaths

deaths_table = pd.read_csv(
    'decessi_giornalieri.csv', 
    encoding='iso-8859-1',
    dtype={'GE':str})

# Missing data is denoted with a 9999
deaths_table = deaths_table.loc[
        (deaths_table['MASCHI_20'] != 9999)&(deaths_table['FEMMINE_20'] != 9999)]
records = []
for record in deaths_table.itertuples():
    record_base = (
        record.NOME_REGIONE, 
        record.NOME_PROVINCIA,
        record.COD_PROVCOM,
        record.NOME_COMUNE,
        record.CL_ETA)
    day = record.GE[2:4]
    month = record.GE[0:2]
    record_ext = (
        (2015, 'M', '2015-' + month + '-' + day, record.MASCHI_15),
        (2016, 'M', '2016-' + month + '-' + day, record.MASCHI_16),
        (2017, 'M', '2017-' + month + '-' + day, record.MASCHI_17),
        (2018, 'M', '2018-' + month + '-' + day, record.MASCHI_18),
        (2019, 'M', '2019-' + month + '-' + day, record.MASCHI_19),
        (2020, 'M', '2020-' + month + '-' + day, record.MASCHI_20),
        (2015, 'F', '2015-' + month + '-' + day, record.FEMMINE_15),
        (2016, 'F', '2016-' + month + '-' + day, record.FEMMINE_16),
        (2017, 'F', '2017-' + month + '-' + day, record.FEMMINE_17),
        (2018, 'F', '2018-' + month + '-' + day, record.FEMMINE_18),
        (2019, 'F', '2019-' + month + '-' + day, record.FEMMINE_19),
        (2020, 'F', '2020-' + month + '-' + day, record.FEMMINE_20),
    )
    for item in record_ext:
        records.append(record_base + item)
deaths_table = pd.DataFrame(records, columns=[
    'region', 'province', 'city_id', 'city_name',
    'age', 'year',  'sex', 'date', 'deaths'])

deaths_table = deaths_table.loc[deaths_table['date'].map(date_is_valid)]
print("Number of cities:", len(deaths_table['city_id'].unique()))

In [None]:
# Load and clean the table city metatata.
# Defines the region and province of each city

city_metadata_table = pd.read_csv(
    'city_metadata.csv', encoding='iso-8859-1', sep=';')
city_metadata_table = pd.DataFrame({
    'city_id': city_metadata_table.iloc[:, 4],
    'city_name': city_metadata_table.iloc[:, 6],
    'region': city_metadata_table.iloc[:, 10],
    'province': city_metadata_table.iloc[:, 13],
})

In [None]:
# Load and clean the population table
population_table = pd.read_csv('population.csv')

# Find the cities that are present in the death table dataset
t1 = deaths_table[['city_id', 'year']].drop_duplicates()
t2 = population_table[['city_id', 'year']].merge(
    t1, on=['city_id', 'year'])
t2['is_avail'] = True
population_table = population_table.merge(
    t2, on=['city_id', 'year'], how='left').fillna(False)

In [None]:
# Population covered in the dataset by year
t1 = population_table.loc[
    population_table['is_avail']].groupby(
    ['year'])['population'].sum().reset_index().rename(
    columns={'population': 'pop_avail'})
t2 = population_table.groupby(
    ['year'])['population'].sum().reset_index()
t2 = t2.merge(t1, on=['year'])
t2['coverage'] = t2['pop_avail'] / t2['population']
coverage_by_year = t2
coverage_by_year

In [None]:
# Population covered in the dataset by province in the year 2019
t1 = population_table.loc[population_table['year'] == 2019].merge(
    city_metadata_table[['city_id', 'province']], 
    on='city_id')

t2 = t1.loc[t1['is_avail']].groupby(
    ['province'])['population'].sum().reset_index().rename(
    columns={'population': 'pop_avail'})
t3 = t1.groupby(
    ['province'])['population'].sum().reset_index()
t3 = t3.merge(t2, on=['province'])
t3['coverage'] = t3['pop_avail'] / t3['population']
coverage_by_province = t3

In [None]:
# Plot daily deaths
subset = deaths_table.groupby(['date'])['deaths'].sum().reset_index().sort_values('date')
subset['days'] = subset['date'].map(days_from_jan_1st)
subset['year'] = subset['date'].map(lambda date:int(date[:4]))
# No population data for 2020: use data from 2019
subset['merge_year'] = numpy.minimum(subset['year'], 2019)
subset = subset.merge(
    coverage_by_year, 
    left_on='merge_year', right_on='year', 
    suffixes=('', 'l'))
groups = subset.groupby('year')
figure, axes = plt.subplots(1, 1)
axes.set_xlim(0, 90)
axes.set_ylim(0, 90)
axes.grid(axis='y',zorder=0)
axes.set_xlabel('Days since Jan 1st')
axes.set_ylabel('Deaths per 1 million people')
axes.set_title('Deaths from all causes*')
axes2 = axes.twiny()
axes2.set_xlim(0, 90)
axes2.set_ylim(0, 90)
axes2.set_xticks(tuple(numpy.cumsum([0, 31, 28, 31])))
axes2.set_xticklabels(['Jan 1st', 'Feb 1st', 'Mar 1st', 'Apr 1st'])
axes2.grid(zorder=0)
for key, group in groups:
    axes.plot(group['days'], group['deaths'] / group['pop_avail'] * 1000000, 
              label=key, linewidth=1)
axes.legend()
plt.annotate(
    "*based on a sample of 1450 cities"
    " covering 28% of the population.", 
    (0.1, 0.01), xycoords='figure fraction')
plt.tight_layout()
plt.savefig('daily_deaths.svg')
plt.show(figure)
plt.close(figure)

In [None]:
# Plot dataset coverage
def get_color(coverage):
    COLORS = [
        "#fee5d9",
        "#fcbba1",
        "#fc9272",
        "#fb6a4a",
        "#de2d26"
    ]
    return COLORS[int(numpy.floor(coverage * len(COLORS)))]

xml.etree.ElementTree.register_namespace('', 'http://www.w3.org/2000/svg')   
tree = xml.etree.ElementTree.parse('italy_template.svg')
root = tree.getroot()
for child in root:
    if child.attrib['id'] == 'italia':
        italy_group = child
for child in italy_group:
    province = child.attrib['id']
    records = coverage_by_province.loc[
        coverage_by_province['province'] == province]
    if len(records) == 0:
        coverage = 0.
    elif len(records) == 1:
        coverage = records.iloc[0]['coverage']
    else:
        print("Huh, two matches...")
    child.set('fill', get_color(coverage))

tree.write('italy.svg')