In [1]:
import numpy as np
import pandas as pd
import os
from datetime import date
from matplotlib import pyplot as plt
from matplotlib.colors import ListedColormap
import matplotlib.dates as mdates
from matplotlib.dates import DateFormatter
import csv
import plotly
import plotly.express as px
import plotly.graph_objects as go
import plotly.figure_factory as ff
from jupyter_dash import JupyterDash
import dash_core_components as dcc
import dash_html_components as html
from dash.dependencies import Input, Output
import datetime

In [2]:
## Geographical Areas
import geojson
import geopandas as gpd

In [3]:
map_token='pk.eyJ1IjoiZnJhbmNpZ2plY2kiLCJhIjoiY2tpazZveWhmMDZ5MzMxcWp4bzIxbm0wYyJ9.J_qWOJqADI6tZfle2bbZFg'

<h1> Data </h1>

In [4]:
# Source: Github - Certified by italian authorities
# path = 'https://github.com/pcm-dpc/COVID-19/blob/master/dati-province/dpc-covid19-ita-province.csv'
# Seen the data and corrected them to not waste any line
path = 'Data/dpc-covid19-ita-province.csv'

In [5]:
columns = ['data','stato','codice_regione','denominazione_regione','codice_provincia',
           'denominazione_provincia','sigla_provincia','lat','long','totale_casi','note']
df_covid = pd.read_csv(path,   quotechar="'",  skiprows = 1,
                       #sep=',\s*',quoting=csv.QUOTE_NONE,
                       #skipinitialspace=True,# sep=',', engine='python',
                       #encoding="utf-8", sep=',\s*',quoting=csv.QUOTE_NONE,
                      names = columns
                      )

In [6]:
path = 'Data/COVID19_Italy_regional_20200224_20200831.csv'

In [7]:
df_covid_regioni_all = pd.read_csv(path)

In [8]:
# Population

In [9]:
path = 'Data/persons_x_regione.csv'
df_population = pd.read_csv(path)

In [10]:
# df_population.head(1)

In [11]:
# response = urlopen(path)
# json_data = response.read()
path = 'Data/dpc-covid-19-aree-nuove-g.json'
with open(path) as f:
    json_data = geojson.load(f)
    
# for data in json_data.features:
#     if data.properties["nomeTesto"] == 'Provincia autonoma Trento':
#         data.properties["nomeTesto"] = 'Trento'
#     elif data.properties["nomeTesto"] == 'Provincia autonoma Bolzano':
#         data.properties["nomeTesto"] = 'Bolzano'
# regions_json.add(data.properties["nomeTesto"])

# Data Correction

In [12]:
df_covid['data'] = df_covid['data'].str.replace('"', '')

df_covid['data'] = pd.to_datetime(df_covid['data'], format='%Y-%m-%dT%H:%M:%S')
df_covid['totale_casi'] = df_covid['totale_casi'].astype(int)

In [13]:
df_covid_regioni_all['data'] = pd.to_datetime(df_covid_regioni_all['data'], format='%Y-%m-%d %H:%M:%S')

In [15]:
df_population = df_population[['Regione', 'Residenti']]

In [16]:
df_population.loc[df_population['Regione']=='Emilia Romagna','Regione']="Emilia-Romagna"
df_population.loc[df_population['Regione']=='Friuli-Venezia Giulia','Regione']="Friuli Venezia Giulia"

In [17]:
df_population['Residenti'] = df_population['Residenti'].str.replace('.', '')

# Data Process

In [18]:
df_covid.rename(columns={'denominazione_regione': 'Regione', 
                         'denominazione_provincia': 'Provincia', 
                        }, inplace=True)

In [19]:
df_covid_regioni_all.rename(columns={'denominazione_regione': 'Regione', 
                        }, inplace=True)

In [20]:
df_covid_regioni_all.loc[df_covid_regioni_all['Regione']=='P.A. Bolzano',
                               'Regione']="Provincia autonoma Bolzano"
df_covid_regioni_all.loc[df_covid_regioni_all['Regione']=='P.A. Trento',
                               'Regione']="Provincia autonoma Trento"

In [21]:
df_covid_daily = pd.DataFrame(df_covid.groupby([ 
                                'Regione', 'Provincia', 
                                'sigla_provincia', df_covid['data']]).agg({
    'totale_casi': "sum"}).to_records())

In [22]:
df_covid_daily['casi_giornalieri'] = df_covid_daily.groupby([
                    'Regione', 'Provincia', 
                    'sigla_provincia'])['totale_casi'].diff()

In [23]:
df_covid_daily.loc[df_covid_daily['casi_giornalieri'].isna() ,
                       'casi_giornalieri'] = df_covid_daily['totale_casi']

In [24]:
df_covid_regioni_all_daily = pd.DataFrame(df_covid_regioni_all.groupby([ 
                                'Regione', df_covid_regioni_all['data']]).agg({
    'terapia_intensiva': "sum", 'deceduti': 'sum', 'tamponi': 'sum',
    'nuovi_positivi': 'sum', 'casi_testati': 'sum', 'totale_casi': 'sum'}).to_records())

In [25]:
df_covid_regioni_all_daily['tamponi_giornalieri'] = df_covid_regioni_all_daily.groupby([
                    'Regione'])['tamponi'].diff()
df_covid_regioni_all_daily.loc[df_covid_regioni_all_daily['tamponi_giornalieri'].isna() ,
                       'tamponi_giornalieri'] = df_covid_regioni_all_daily['tamponi']


df_covid_regioni_all_daily['deceduti_giornalieri'] = df_covid_regioni_all_daily.groupby([
                    'Regione'])['deceduti'].diff()
df_covid_regioni_all_daily.loc[df_covid_regioni_all_daily['deceduti_giornalieri'].isna() ,
                       'deceduti_giornalieri'] = df_covid_regioni_all_daily['deceduti']

df_covid_regioni_all_daily['terapia_intensiva_giornalieri'] = df_covid_regioni_all_daily.groupby([
                    'Regione'])['terapia_intensiva'].diff()
df_covid_regioni_all_daily.loc[df_covid_regioni_all_daily['terapia_intensiva_giornalieri'].isna() ,
                       'terapia_intensiva_giornalieri'] = df_covid_regioni_all_daily['terapia_intensiva']

df_covid_regioni_all_daily['totale_casi_giornalieri'] = df_covid_regioni_all_daily.groupby([
                    'Regione'])['totale_casi'].diff()
df_covid_regioni_all_daily.loc[df_covid_regioni_all_daily['totale_casi_giornalieri'].isna() ,
                       'totale_casi_giornalieri'] = df_covid_regioni_all_daily['totale_casi']

In [26]:
df_covid_regioni_all_daily['month']=df_covid_regioni_all_daily.data.dt.month

In [27]:
df_covid_regioni_all_daily.loc[df_covid_regioni_all_daily['totale_casi_giornalieri'] < 0, ['totale_casi_giornalieri']] = np.nan
df_covid_regioni_all_daily.loc[df_covid_regioni_all_daily['tamponi_giornalieri'] < 0, ['tamponi_giornalieri']] = np.nan

In [28]:
df_covid_regioni_all_daily['perc_positivi_tamponi'] = df_covid_regioni_all_daily['totale_casi_giornalieri'] /\
 df_covid_regioni_all_daily['tamponi_giornalieri']

In [29]:
df_covid_regioni_all_daily['perc_positivi_tamponi'] = df_covid_regioni_all_daily['totale_casi_giornalieri'] /\
df_covid_regioni_all_daily['tamponi_giornalieri']

In [30]:
df_covid_regioni_all_daily.loc[df_covid_regioni_all_daily['perc_positivi_tamponi']>1, 'perc_positivi_tamponi'] = np.nan

In [31]:
df_covid_regioni_all_daily = df_covid_regioni_all_daily.merge(df_population, how='left', on='Regione')

In [32]:
#df_covid_regioni_all_daily.drop(columns=['Residenti'], inplace=True)

In [33]:
df_covid_regioni_all_daily['Residenti'] = df_covid_regioni_all_daily['Residenti'].astype(float)

# Data Validation

# Data Visualization

In [34]:
column_regione = 'Regione'
column_provincia = 'Provincia'
agg_column = 'casi_giornalieri'
data_column = 'data'

app_period = JupyterDash("CovidPeriod")

app_period.layout = html.Div([
    html.Label(
        [
            "Regione",
            dcc.Dropdown(id="Regione",
                         options=[{"label": x, "value": x} for x in df_covid_daily['Regione'].unique()],
                        value=df_covid_daily['Regione'].unique()[0],
                         multi=True,
#                         clearable=False,
                        ),
        ]
    ),
    html.Label(
        [
            "Provincia",
            dcc.Dropdown(id="Provincia",
                         options=[{"label": x, "value": x} for x in df_covid_daily['Provincia'].unique()],
                        value=df_covid_daily['Provincia'].unique()[0],
#                         clearable=False,
                         multi=True,
                        ),
        ]
    ),
    html.Label(
        [
            "Data range",
            dcc.Dropdown(id="dateRange",
                         options=[{"label": x, "value": x} for x in ["Day", "Week", "Month"]],
                        value="Day",
                        clearable=False,),
        ]
    ),
    html.Div(dcc.Graph(id="CovidPeriod-chart"))])
    
@app_period.callback(
    [Output("CovidPeriod-chart", "figure")],
    [Input("Regione", "value"), 
     Input("Provincia", "value"), 
     Input("dateRange", "value")]
)
def display_covid_period(regione, provincia, dateRange): # _options, regione_value
#     print(regione_value)

    if isinstance(regione, str):
        regione = [regione]
    if len(regione) == 0:
        f_regione = True
    else:
        f_regione = df_covid_daily[column_regione].isin(regione)
    if isinstance(provincia, str):
        provincia = [provincia]
    if len(provincia) == 0:
        f_provincia = True
    else:
        f_provincia = df_covid_daily[column_provincia].isin(provincia)
        
    f_data = f_provincia & f_regione
        
    
    if dateRange == "Day":
        df_plot = df_covid_daily[f_data].groupby(by = [column_regione, column_provincia,
                                                      df_covid_daily.loc[f_data, data_column].dt.date])[agg_column].sum()
    elif dateRange == "Week":
        df_plot = df_covid_daily[f_data].groupby(by = [column_regione, column_provincia,
                                                      df_covid_daily.loc[f_data, data_column].dt.week])[agg_column].sum()
    elif dateRange == "Month":
        df_plot = df_covid_daily[f_data].groupby(by = [column_regione, column_provincia,
                                                      df_covid_daily.loc[f_data, data_column].dt.month])[agg_column].sum()
    
    fig = go.Figure()
    
    l_selected_cities = set([b for a, b, c in df_plot.index.tolist()])

    for city in l_selected_cities:

        fig.add_trace(
            go.Scatter(
                x = df_plot.xs(city, level=1).index.get_level_values(1), y = df_plot.xs(city, level=1), 
#                 color = b,
                name = city, # str(a) + ' - ' + str(b) ,
                mode='lines'
            )
        )

    fig.update_layout(
        title='Numero di casi giornalieri per vari citta',
        showlegend=True
    )
#     return [fig]


@app_period.callback(
    [Output("Regione", "value")],
    [Input("Provincia", "value")]
)
def change_regione(provincia):
    print(provincia)
    if isinstance(provincia, str):
        provincia = [provincia]
    if len(provincia) == 0:
#         f_provincia = [True]*df_covid_daily.shape[0]
        new_regione = []
    elif provincia is None:
        new_regione = []
    else:
        f_provincia = df_covid_daily[column_provincia].isin(provincia)
        new_regione = df_covid_daily.loc[f_provincia, column_regione].unique()
    
    if isinstance(new_regione, str):
        new_regione = [new_regione]
    if len(new_regione) == 0:
        return ['']
    return [new_regione]

# @app_period.callback(
#     [Output("Provincia", "value")],
#     [Input("Regione", "value")]
# )
# def change_provincia(regione):
#     if len(regione) == 0:
# #         f_provincia = [True]*df_covid_daily.shape[0]
#         new_provincia = []
#     elif regione is None:
#         new_provincia = []
#     else:
#         all_prov = df_covid_daily.loc[df_covid_daily[column_regione].isin(regione),column_provincia].unique()
#         new_provincia = set(all_prov).intersection
#         new_regione = df_covid_daily.loc[f_provincia, column_regione].unique()
        
    
#     if isinstance(new_provincia, str):
#         new_provincia = [new_provincia]
#     if len(new_provincia) == 0:
#         return ['']
#     return [new_provincia]
    
    
    # this command takes the selected cities from the user and finds the regions these cities belong to
#     new_regione = df_plot.loc[(slice(None), l_selected_cities)].index.get_level_values(0).unique()
    
#     # Finally we add to new regione only missing regione
#     new_regione = regione + new_regione[~new_regione.isin(regione)].tolist()
    

# app_period.run_server(mode='inline', port=8050) # debug=True, use_reloader=False

In [35]:
# fig = px.line(df_covid_regioni_all_daily, x = 'data', y='perc_positivi_tamponi', color='Regione' )

# fig.show()

In [None]:
# fig = px.line(df_covid_regioni_all_daily, x = 'data', y='totale_casi', color='Regione' )

# fig.show()

In [None]:
#Plot per region

column_regione = 'Regione'
column_provincia = 'Provincia'
agg_column = 'tamponi_giornalieri'
data_column = 'data'

app_period = JupyterDash("CovidPeriod")

app_period.layout = html.Div([
    html.Label(
        [
            "Regione",
            dcc.Dropdown(id="Regione",
                         options=[{"label": x, "value": x} for x in df_covid_regioni_all_daily['Regione'].unique()],
                        value=df_covid_regioni_all_daily['Regione'].unique()[0],
                         multi=True,
#                         clearable=False,
                        ),
        ]
    ),
    html.Label(
        [
            "Data range",
            dcc.Dropdown(id="dateRange",
                         options=[{"label": x, "value": x} for x in ["Day", "Week", "Month"]],
                        value="Day",
                        clearable=False,),
        ]
    ),
    html.Label(
        [
            "KPI",
            dcc.Dropdown(id="KPI",
                         options=[{"label": x, "value": x} for x in ["perc_positivi_tamponi", "deceduti_giornalieri",
                                                                     "terapia_intensiva_giornalieri"]],
                        value="perc_positivi_tamponi",
                        multi=True,
                        clearable=False,),
        ]
    ),
    html.Div(dcc.Graph(id="CovidPeriod-chart"))])
    
@app_period.callback(
    [Output("CovidPeriod-chart", "figure")],
    [Input("KPI", "value"),
    Input("Regione", "value"), 
     Input("dateRange", "value")]
)
def display_covid_period(kpis, regione, dateRange): # _options, regione_value
#     print(regione_value)

    if isinstance(regione, str):
        regione = [regione]
    if len(regione) == 0:
        f_regione = [True]*df_covid_regioni_all_daily.shape[0]
    else:
        f_regione = df_covid_regioni_all_daily[column_regione].isin(regione)
        
    f_data = f_regione 
    
    agg_column = kpis
    
    if isinstance(kpis, str):
        kpis = [kpis]
    kpi_dic = {kpi : 'sum' for kpi in kpis}
    
    if dateRange == "Day":
        df_plot = df_covid_regioni_all_daily[f_data].groupby(by = [column_regione,
                                                      df_covid_regioni_all_daily.loc[f_data, data_column].dt.date]).agg(kpi_dic)
    elif dateRange == "Week":
        df_plot = df_covid_regioni_all_daily[f_data].groupby(by = [column_regione,
                                                      df_covid_regioni_all_daily.loc[f_data, data_column].dt.week]).agg(kpi_dic)
    elif dateRange == "Month":
        df_plot = df_covid_regioni_all_daily[f_data].groupby(by = [column_regione,
                                                      df_covid_regioni_all_daily.loc[f_data, data_column].dt.month]).agg(kpi_dic)
    
    fig = go.Figure()
    
    for region in df_plot.index.get_level_values(0).unique():
        for kpi in kpis:

            fig.add_trace(
                go.Scatter(
                    x = df_plot[kpi].xs(region).index.get_level_values(0), y = df_plot[kpi].xs(region), 
    #                 color = b,
                    name = region,
                    mode='lines'
                )
            )

    fig.update_layout(
        title='Numero di casi giornalieri per vari citta',
        showlegend=True
    )
#     return [fig]
    
    
    # this command takes the selected cities from the user and finds the regions these cities belong to
#     new_regione = df_plot.loc[(slice(None), l_selected_cities)].index.get_level_values(0).unique()
    
#     # Finally we add to new regione only missing regione
#     new_regione = regione + new_regione[~new_regione.isin(regione)].tolist()
    

# app_period.run_server(mode='inline', port=8050) # debug=True, use_reloader=False

- Deri ne dhjetor te covid
- Dati secondata covid + traffic
- Correlation of traffic and covid (build function)
- HeatMap Correlation for reagion after finding a decent lag
- 

In [131]:
column_regione = 'Regione'
column_provincia = 'Provincia'
agg_column = 'tamponi_giornalieri'
data_column = 'data'
jdata = json_data

app_period = JupyterDash("CovidMap")

app_period.layout = html.Div([
    html.Label(
        [
            "Data range",
            dcc.Dropdown(id="dateRange",
                         options=[{"label": x, "value": x} for x in ["Day", "Week", "Month"]],
                        value="Day",
                        clearable=False,),
        ]
    ),
    html.Label([
            "KPI",
            dcc.Dropdown(id="KPI",
                         options=[{"label": x, "value": x} for x in ["perc_positivi_tamponi", "deceduti_giornalieri",
                                                                     "terapia_intensiva_giornalieri"]],
                        value="perc_positivi_tamponi",
                        clearable=False,),
        ]
    ),
    html.Div(dcc.Graph(id="CovidMap-chart"))])
    
@app_period.callback(
    [Output("CovidMap-chart", "figure")],
    [Input("KPI", "value"), 
     Input("dateRange", "value")]
)
def display_covid_period(kpi, dateRange):
    
    kpi_dic = {kpi : 'sum', 'Residenti': 'min'}
    
    if dateRange == "Day":
        df_plot = pd.DataFrame(df_covid_regioni_all_daily.groupby(by = [column_regione,
                df_covid_regioni_all_daily[data_column].dt.date]).agg(kpi_dic).to_records())
    elif dateRange == "Week":
        df_plot = pd.DataFrame(df_covid_regioni_all_daily.groupby(by = [column_regione,
                df_covid_regioni_all_daily[data_column].dt.week]).agg(kpi_dic).to_records())
    elif dateRange == "Month":
        df_plot = pd.DataFrame(df_covid_regioni_all_daily.groupby(by = [column_regione,
                df_covid_regioni_all_daily[data_column].dt.month]).agg(kpi_dic).to_records())

    layout = go.Layout(# width = 770, height=650,
                       margin={"r":0,"t":0,"l":0,"b":0},
                      mapbox = dict(center= {"lat": 41.892770, "lon": 12.483667},
                                    accesstoken=map_token,
                                    zoom=4))

    mask = df_plot[data_column]==df_plot[data_column].min()
    current_df = df_plot[mask]

    data = [go.Choroplethmapbox( 
                                 locations = current_df[column_regione],
                                 z = current_df[kpi]/current_df['Residenti'],
                                 colorscale = 'deep',
    #                              text =regions,
                                 featureidkey="properties.nomeTesto",
                                 colorbar = dict(thickness=20, ticklen=3),
                                 geojson = jdata,
                                 marker_line_width=0, marker_opacity=0.7)]


    date_range = df_plot[data_column].unique()
    frames = []

    for N, q in enumerate(date_range):
        mask = df_plot[data_column]==q
        current_df = df_plot[mask]
        frames.append(go.Frame(data=[go.Choroplethmapbox(
                                    locations = current_df[column_regione],
                                     z = current_df[kpi]/current_df['Residenti'])],
                              name=f'frame{N+1}')
                      )

    sliders = [dict(steps= [dict(method= 'animate',
                           args= [[f'frame{k+1}'],
                                  dict(mode= 'immediate',
                                  frame= dict( duration=600, redraw= True ),
                                           transition=dict( duration= 200)
                                          )
                                    ],
                            label='dateRange : {}'.format(date_range[k])
                             ) for k in range(0,len(frames))], 
                transition= dict(duration= 100 ),
                x=0,
                y=0,
                currentvalue=dict(font=dict(size=12), visible=True, xanchor= 'center'),
                len=1.0)
           ]

    # Update Layout
    layout.update(updatemenus=[dict(type='buttons', showactive=False,
                                   y=0,
                                   x=0,
                                   xanchor='left',
                                   pad=dict(t=5, r=10),
                                   buttons=[dict(label='Play',
                                                 method='animate',
                                                 args=[None, 
                                                       dict(frame=dict(duration=600, 
                                                            redraw=True),
                                                            transition=dict(duration=200),
                                                            fromcurrent=True,
                                                            mode='immediate'
                                                           )
                                                      ]
                                                )
                                           ]
                                  )
                             ],
                 sliders=sliders);
    # Plot the figure 
#     fig=go.Figure(data=data, layout=layout, frames=frames)
#     fig.show()
    return go.Figure(data=data, layout=layout, frames=frames)

app_period.run_server(mode='inline', port=8051) # debug=True, use_reloader=False

In [None]:
df_covid_plot =pd.DataFrame(df_covid_regioni_all_daily.groupby(by=['Regione', 'month'
                                            ]).agg({'totale_casi_giornalieri': 'sum',
                                                   'Residenti': 'min'}).to_records())

jdata = json_data
regions = df_covid_plot['Regione'].unique()
L = len(regions)

layout = go.Layout(# width = 770, height=650,
                   margin={"r":0,"t":0,"l":0,"b":0},
                  mapbox = dict(center= {"lat": 41.892770, "lon": 12.483667},
                                accesstoken=map_token,
                                zoom=4))

mask = df_covid_plot['month']==df_covid_plot['month'].min()
current_df = df_covid_plot[mask]

data = [go.Choroplethmapbox( 
                             locations = current_df['Regione'],
                             z = current_df['totale_casi_giornalieri']/current_df['Residenti'],
                             colorscale = 'deep',
#                              text =regions,
                             featureidkey="properties.nomeTesto",
                             colorbar = dict(thickness=20, ticklen=3),
                             geojson = jdata,
                             marker_line_width=0, marker_opacity=0.7)]


date_range = df_covid_plot.month.unique()
frames = []

for N, q in enumerate(date_range):
    mask = df_covid_plot['month']==q
    current_df = df_covid_plot[mask]
    frames.append(go.Frame(data=[go.Choroplethmapbox(
                                locations = current_df['Regione'],
                                 z = current_df['totale_casi_giornalieri']/current_df['Residenti'])],
                          name=f'frame{N+1}')
                  )

sliders = [dict(steps= [dict(method= 'animate',
                       args= [[f'frame{k+1}'],
                              dict(mode= 'immediate',
                              frame= dict( duration=600, redraw= True ),
                                       transition=dict( duration= 200)
                                      )
                                ],
                        label='Date : {}'.format(date_range[k])
                         ) for k in range(0,len(frames))], 
            transition= dict(duration= 100 ),
            x=0,
            y=0,
            currentvalue=dict(font=dict(size=12), visible=True, xanchor= 'center'),
            len=1.0)
       ]

# Update Layout
layout.update(updatemenus=[dict(type='buttons', showactive=False,
                               y=0,
                               x=0,
                               xanchor='left',
                               pad=dict(t=5, r=10),
                               buttons=[dict(label='Play',
                                             method='animate',
                                             args=[None, 
                                                   dict(frame=dict(duration=600, 
                                                        redraw=True),
                                                        transition=dict(duration=200),
                                                        fromcurrent=True,
                                                        mode='immediate'
                                                       )
                                                  ]
                                            )
                                       ]
                              )
                         ],
             sliders=sliders);
# Plot the figure 
fig=go.Figure(data=data, layout=layout, frames=frames)
fig.show()

In [None]:
# TODO add documentation on github (link to datasets, also for traffic)
# TODO fix plot (hover)
# TODO fix representation of feature into the plot (compute right kpi)
# TODO more kpi
# TODO add week (eventually day) resampling
# TODO then: correlation