In [2]:
import numpy as np
import pandas as pd
from datetime import date
from matplotlib import pyplot as plt

from numpy import cos, sin, arcsin, sqrt
from math import radians
import seaborn as sns
from jupyter_dash import JupyterDash

import dash_core_components as dcc
import dash_html_components as html
from dash.dependencies import Input, Output
import plotly.figure_factory as ff
import plotly.express as px
import numpy as np

# need for token (mapbox)
px.set_mapbox_access_token("pk.eyJ1IjoiZmlsaXBrcmFzbmlxaSIsImEiOiJja2luOW9jdmgwa3J3MnpvNXhkNGJ6MWFtIn0.eevoM5byqvtc1nC0oXpuOw")

def haversine(row, lonlat):
    lat1, lon1 = lonlat
    lon2, lat2 = row['LNG'], row['LAT']
    lon1, lat1, lon2, lat2 = map(radians, [lon1, lat1, lon2, lat2])
    dlon = lon2 - lon1 
    dlat = lat2 - lat1 
    a = sin(dlat/2)**2 + cos(lat1) * cos(lat2) * sin(dlon/2)**2
    c = 2 * arcsin(sqrt(a)) 
    km = 6367 * c
    return km

def cell_from_coords(data, lonlat):
    data['LAT'], data['LNG'] = pd.to_numeric(data['LAT_Y'],errors='coerce'), pd.to_numeric(data['LONG_X'],errors='coerce')
    data['distance'] = data.apply(lambda row: haversine(row, lonlat), axis=1)
    row = data.sort_values(by='distance').iloc[0:1]
    data.drop(['distance', 'LAT', "LNG"], axis=1, inplace=True)
    return row

def normalize(data, column):
    data.loc[:, column] = (data[column]-data[column].mean())/data[column].std()
    return data

def fix_coords(data):
    data['LAT'], data['LNG'] = pd.to_numeric(data['LAT_Y'],errors='coerce'), pd.to_numeric(data['LONG_X'],errors='coerce')
    return data

def prepare_for_hexbin(dataframe, weekly = True, monthly = True):
    if weekly:
        data_groupped = dataframe.groupby('ECELL_ID').resample('W-Mon', on='Date').mean().reset_index().sort_values(by='Date')
    elif monthly:
        data_groupped = dataframe.groupby('ECELL_ID').resample('MS', on='Date').mean().reset_index().sort_values(by='Date') 
    else:
        data_groupped = dataframe.groupby('ECELL_ID').resample('D', on='Date').mean().reset_index().sort_values(by='Date')
    data_groupped = data_groupped.sort_values(by='Date')
    data_groupped['DateString'] = data_groupped['Date'].map(lambda x: x.strftime('%j'))
    data_groupped["DateString"] = (data_groupped["DateString"].astype(int)+1)//7
    return data_groupped

def prepare_for_timeseries(dataframe, weekly = True, monthly = True):
    if weekly:
        data_groupped = dataframe.groupby('COMUNE').resample('W-Mon', on='Date').sum().reset_index().sort_values(by='Date')
    elif monthly:
        data_groupped = dataframe.groupby('COMUNE').resample('MS', on='Date').sum().reset_index().sort_values(by='Date') 
    else:
        data_groupped = dataframe.groupby('COMUNE').resample('D', on='Date').sum().reset_index().sort_values(by='Date')
    data_groupped = data_groupped.sort_values(by='Date')
    data_groupped['DateString'] = data_groupped['Date'].map(lambda x: x.strftime('%j'))
    data_groupped["DateString"] = (data_groupped["DateString"].astype(int)+1)//7
    return data_groupped

def normalize_per_city(data, column, cities = ["MILANO", "ROMA"], city_column = "COMUNE"):
    means = {city: data.where(lambda x: x[city_column] == city).dropna()[column].mean() for city in cities}#, "ROMA": daily_selected_data_groupped.where(lambda x: x.COMUNE =="ROMA").dropna()["DL_VOL"].mean()}
    stds = {city: data.where(lambda x: x[city_column] == city).dropna()[column].std() for city in cities}
    data = data.where(lambda x:x[column] > 0).dropna()# filtering before doing computations: some fields are wrongly 0
    data["NORM_{}".format(column)] = data.apply(lambda x: (x[column] - means[x[city_column]]) / stds[x[city_column]], axis=1)
    return data

def assign_constant_col(df, col, val):
    df[col] = val
    return df

def remove_outliers(df, col):
    df = df.dropna(subset=[col])
    return df[df[col] < np.percentile(df[col],95)]

def normalize_cols_per_city(df, cols = ["USERNUM_AVG", "Hin_Succ", "DL_VOL"]):
    for col in cols:
        df = normalize_per_city(df, col)
    return df

def assign_period(dfs):
    # assumes dfs to follow order of periods
    for i, (df, period) in enumerate(zip(dfs, periods)):
        dfs[i] = assign_constant_col(df, "period", period)
    return dfs

def normalize_per_city_dfs(dfs):
    for i, df in enumerate(dfs):
        dfs[i] = normalize_per_city(dfs[i], "USERNUM_AVG")
        dfs[i] = normalize_per_city(dfs[i], "Hin_Succ")
        dfs[i] = normalize_per_city(dfs[i], "DL_VOL")
    return dfs

def complete_dfs_ts(dfs):
    dfs = assign_period(dfs)
    dfs = normalize_per_city_dfs(dfs)
    return dfs


data_path = "/Users/filipkrasniqi/Documents/Datasets.tmp/traffic-covid/"
cities = ["Milano", "ROMA", "TORINO"]

columns_all_data, required_columns_all_data = ["DL_VOL", "Hin_Succ", "USERNUM_AVG"], ["LAT", "LNG", "City", 'hour', 'Date', 'ECELL_ID', 'COMUNE']
periods = ["Dawn", "Morning", "Lunch", "Afternoon", "Evening"]
# columns to use for hexbin visualization
columns, required_columns = ["DL_VOL", "Hin_Succ", "USERNUM_AVG"], ["LAT", "LNG", "DateString", "City"]

# Reading data regarding Rome and Milan

## Reading data for Rome and Milan and adding coordinates as float

In [3]:
data_milano = fix_coords(pd.read_pickle("{}LTE_1800_{}.pkl".format(data_path, cities[0])))
data_milano_2 = fix_coords(pd.read_pickle("{}LTE_1800_{}_P2.pkl".format(data_path, cities[0])))
data_rome = fix_coords(pd.read_pickle("{}LTE_1800_{}.pkl".format(data_path, cities[1])))

## Removing outliers for each column (*)
Outliers = those outside the 95% confidence. This is done before merging data among the cities

In [4]:
# remove outliers from the three datasets regarding DL_VOL and Hin_Succ
data_milano = remove_outliers(data_milano, "DL_VOL")
data_milano = remove_outliers(data_milano, "Hin_Succ")

data_milano_2 = remove_outliers(data_milano_2, "DL_VOL")
data_milano_2 = remove_outliers(data_milano_2, "Hin_Succ")

data_rome = remove_outliers(data_rome, "DL_VOL")
data_rome = remove_outliers(data_rome, "Hin_Succ")



## Merging data for cities

In [5]:
# merging them
all_data_milano = pd.concat([data_milano, data_milano_2])
all_data_milano["City"] = 0
data_rome["City"] = 1
all_data = pd.concat([all_data_milano, data_rome])
all_data["USERNUM_AVG"] = all_data["USERNUM_AVG"].astype(float)

## Initializing data to group them for period (*)

In [6]:
# preparing data for period visualization
all_data["hour"] = [t.hour for t in pd.DatetimeIndex(all_data.Date)]
# getting data with relative period
evening_data = all_data[columns_all_data+required_columns_all_data].where(lambda x: (x.hour >= 20) & (x.hour <= 23)).dropna()
afternoon_data = all_data[columns_all_data+required_columns_all_data].where(lambda x: (x.hour >= 16) & (x.hour <= 19)).dropna()
lunch_data = all_data[columns_all_data+required_columns_all_data].where(lambda x: (x.hour >= 12) & (x.hour <= 15)).dropna()
morning_data = all_data[columns_all_data+required_columns_all_data].where(lambda x: (x.hour >= 8) & (x.hour <= 11)).dropna()
dawn_data = all_data[columns_all_data+required_columns_all_data].where(lambda x: (x.hour >= 4) & (x.hour <= 7)).dropna()

## Grouping data for timeseries: data range (*)

### Regardless the period

In [7]:
data_groupped_day = prepare_for_timeseries(all_data, weekly = False, monthly = False)
daily_selected_data_groupped = data_groupped_day[columns+required_columns + ["Date", "COMUNE"]].dropna()

daily_selected_data_groupped = normalize_per_city(daily_selected_data_groupped, "USERNUM_AVG")
daily_selected_data_groupped = normalize_per_city(daily_selected_data_groupped, "Hin_Succ")
daily_selected_data_groupped = normalize_per_city(daily_selected_data_groupped, "DL_VOL")

data_groupped_weekly = prepare_for_timeseries(all_data, weekly = True, monthly = False)
weekly_selected_data_groupped = data_groupped_weekly[columns+required_columns + ["Date", "COMUNE"]].dropna()

weekly_selected_data_groupped = normalize_per_city(weekly_selected_data_groupped, "USERNUM_AVG")
weekly_selected_data_groupped = normalize_per_city(weekly_selected_data_groupped, "Hin_Succ")
weekly_selected_data_groupped = normalize_per_city(weekly_selected_data_groupped, "DL_VOL")

data_groupped_month = prepare_for_timeseries(all_data, weekly = False, monthly = True)
monthly_selected_data_groupped = data_groupped_month[columns+required_columns + ["Date", "COMUNE"]].dropna()

monthly_selected_data_groupped = normalize_per_city(monthly_selected_data_groupped, "USERNUM_AVG")
monthly_selected_data_groupped = normalize_per_city(monthly_selected_data_groupped, "Hin_Succ")
monthly_selected_data_groupped = normalize_per_city(monthly_selected_data_groupped, "DL_VOL")

### Selecting per period

In [8]:
# resampling data for timeseries: per day
dfs_day = [prepare_for_timeseries(dawn_data, weekly = False, monthly = False), \
        prepare_for_timeseries(morning_data, weekly = False, monthly = False), \
        prepare_for_timeseries(lunch_data, weekly = False, monthly = False), \
        prepare_for_timeseries(afternoon_data, weekly = False, monthly=False), \
        prepare_for_timeseries(evening_data, weekly = False, monthly=False)]

# resampling data for timeseries: per week
dfs_week = [prepare_for_timeseries(dawn_data, weekly = True, monthly = False), \
        prepare_for_timeseries(morning_data, weekly = True, monthly = False), \
        prepare_for_timeseries(lunch_data, weekly = True, monthly = False), \
        prepare_for_timeseries(afternoon_data, weekly = True, monthly=False), \
        prepare_for_timeseries(evening_data, weekly = True, monthly=False)]

# resampling data for timeseries: per month
dfs_month = [prepare_for_timeseries(dawn_data, weekly = False, monthly = True), \
        prepare_for_timeseries(morning_data, weekly = False, monthly = True), \
        prepare_for_timeseries(lunch_data, weekly = False, monthly = True), \
        prepare_for_timeseries(afternoon_data, weekly = False, monthly=True), \
        prepare_for_timeseries(evening_data, weekly = False, monthly=True)]

# assigning period col and normalizing
dfs_day, dfs_week, dfs_month = complete_dfs_ts(dfs_day), complete_dfs_ts(dfs_week), complete_dfs_ts(dfs_month)
# creating final dfs for day, week or month
ts_period_data_groupped_daily, ts_period_data_groupped_weekly, ts_period_data_groupped_monthly = \
    pd.concat(dfs_day), pd.concat(dfs_week), pd.concat(dfs_month)

## Grouping data for hexbin: data range and cell

### Regardless the period

In [9]:
hex_data_groupped_week = prepare_for_hexbin(all_data)

In [10]:
hex_data_groupped_month = prepare_for_hexbin(all_data, weekly = False)

In [11]:
# columns to use for hexbin visualization
columns, required_columns = ["DL_VOL", "Hin_Succ", "USERNUM_AVG"], ["LAT", "LNG", "DateString", "City"]

# week: take groupped data and assign the column I need. Plus, fix the city
hex_week_selected_data_groupped = hex_data_groupped_week[columns+required_columns].dropna()
hex_week_selected_data_groupped["City"] = hex_week_selected_data_groupped["City"].astype(int)
hex_week_selected_data_groupped["City"] = hex_week_selected_data_groupped["City"].apply(lambda x: cities[x])

# month: do the same as week
hex_month_selected_data_groupped = hex_data_groupped_month[columns+required_columns].dropna()
hex_month_selected_data_groupped["City"] = hex_month_selected_data_groupped["City"].astype(int)
hex_month_selected_data_groupped["City"] = hex_month_selected_data_groupped["City"].apply(lambda x: cities[x])

### Selecting per period

In [12]:
# resampling data for hexbin
hex_evening_data_week, hex_afternoon_data_week, hex_lunch_data_week, hex_morning_data_week, hex_dawn_data_week = \
        prepare_for_hexbin(evening_data), prepare_for_hexbin(afternoon_data), prepare_for_hexbin(lunch_data), \
        prepare_for_hexbin(morning_data), prepare_for_hexbin(dawn_data)

# assigning data after resampling
hex_evening_data_week, hex_afternoon_data_week, hex_lunch_data_week, hex_morning_data_week, hex_dawn_data_week = \
    assign_constant_col(hex_evening_data_week, "period", periods[4]), \
    assign_constant_col(hex_afternoon_data_week, "period", periods[3]), \
    assign_constant_col(hex_lunch_data_week, "period", periods[2]), \
    assign_constant_col(hex_morning_data_week, "period", periods[1]), \
    assign_constant_col(hex_dawn_data_week, "period", periods[0])

# same operations for month
hex_evening_data_month, hex_afternoon_data_month, hex_lunch_data_month, hex_morning_data_month, hex_dawn_data_month = \
        prepare_for_hexbin(evening_data, weekly = False), prepare_for_hexbin(afternoon_data, weekly = False), \
        prepare_for_hexbin(lunch_data, weekly = False), prepare_for_hexbin(morning_data, weekly = False), \
        prepare_for_hexbin(dawn_data, weekly = False)

hex_evening_data_month, hex_afternoon_data_month, hex_lunch_data_month, hex_morning_data_month, hex_dawn_data_month = \
    assign_constant_col(hex_evening_data_month, "period", periods[4]), \
    assign_constant_col(hex_afternoon_data_month, "period", periods[3]), \
    assign_constant_col(hex_lunch_data_month, "period", periods[2]), \
    assign_constant_col(hex_morning_data_month, "period", periods[1]), \
    assign_constant_col(hex_dawn_data_month, "period", periods[0])

#period_data_groupped_month = prepare_for_hexbin(period_data, weekly = False)
#period_data = pd.concat([dawn_data, morning_data, lunch_data, afternoon_data, evening_data])
hex_period_data_groupped_week = pd.concat( \
        [hex_evening_data_week, hex_afternoon_data_week, hex_lunch_data_week, \
             hex_morning_data_week, hex_dawn_data_week])
hex_period_data_groupped_month = pd.concat([hex_evening_data_month, hex_afternoon_data_month, hex_lunch_data_month,\
                                            hex_morning_data_month, hex_dawn_data_month])

period_columns = columns+required_columns+['period', 'ECELL_ID']
hex_period_week_selected_data_groupped = hex_period_data_groupped_week[period_columns].dropna()
hex_period_week_selected_data_groupped["City"] = hex_period_week_selected_data_groupped["City"].astype(int)
hex_period_week_selected_data_groupped["City"] = hex_period_week_selected_data_groupped["City"].apply(lambda x: cities[x])

hex_period_month_selected_data_groupped = hex_period_data_groupped_month[period_columns].dropna()
hex_period_month_selected_data_groupped["City"] = hex_period_month_selected_data_groupped["City"].astype(int)
hex_period_month_selected_data_groupped["City"] = hex_period_month_selected_data_groupped["City"].apply(lambda x: cities[x])

## Compare aggregated data for Milano and Rome

### Building visualization for both normalized and non-normalized case

In [13]:
dataRanges = ["Week", "Month", "Day"]
norm_columns = ["NORM_{}".format(col) for col in columns]
towns = daily_selected_data_groupped.COMUNE.unique()

def build_app_timeseries(columns, dataRanges, towns):    
    name = "TIMESERIES_TOTAL_{}".format("_".join(columns))
    app_timeseries = JupyterDash(name)

    norm_columns = ["NORM_{}".format(col) for col in columns]

    app_timeseries.layout = html.Div([
    html.Label(
        [
            "Data range",
            dcc.Dropdown(id="dateRange",
                         options=[{"label": x, "value": x} for x in dataRanges],
                        value=dataRanges[0],
                        clearable=False)
        ]),
    html.Label(
        [
            "KPI",
            dcc.Dropdown(id="kpi",
                         options=[{"label": x, "value": x} for x in columns],
                        value=columns[0],
                        clearable=False)
        ]),
    html.Label(
        [
            "Comune",
            dcc.Dropdown(id="town",
                         options=[{"label": x, "value": x} for x in towns],
                        value=None,
                        clearable=True)
        ],
    ),
    html.Div(dcc.Graph(id=name))])

    @app_timeseries.callback(
    Output(name, "figure"), 
    [Input("dateRange", "value"), Input("kpi", "value"), Input("town", "value")])
    def display_map_period(dateRange, kpi, town):

        if dateRange == "Day":
            selected_data_groupped = daily_selected_data_groupped
        elif dateRange=="Week":
            selected_data_groupped = weekly_selected_data_groupped
        else:
            selected_data_groupped = monthly_selected_data_groupped
        
        if town is not None:
            selected_data_groupped = selected_data_groupped.where(lambda x:x.COMUNE==town).dropna()

        fig = px.line(selected_data_groupped, x='Date', y=kpi, color='COMUNE')

        return fig
    
    return app_timeseries

app_timeseries_normalized, app_timeseries = build_app_timeseries(norm_columns, dataRanges, towns), build_app_timeseries(columns, dataRanges, towns)

### Comparing the raw data

In [14]:
app_timeseries.run_server(mode='inline', port=16000) # debug=True, use_reloader=False

## Conclusions
Comparing raw data between Rome and Milan gives interesting insights on how the mobile traffic is used in the two cities. We remind here that population in Rome is around 4M, while Milan has approximately 3M people (ratio Milan-Rome: 0.75).
### Download
When analysing data regarding downloads in the two cities, our data are not consistent at all. On average, the metropolitan city of Rome downloads 10000 more data than Milan; the difference is huge. This could be explained by different factors: for example, users may typically use **heavier applications**, for **longer time** (e.g.: people in Rome may use more 4G connection then WiFi even at home), and also the number of subscribers to our mobile operator may not be represented by the popilation ratio.
### Avg. Number of users
Analysing this KPI allows us to throw some more conclusions. As we can see from the plots, Milan has an average usage of 4M per week, while Rome is around 40-50M. This means that we have, on average, around 10 times the users of Milan. This partially explains the higher number of downloads in Rome; still, we can now conclude that people in Rome use much more the mobile network wrt Milan. Please have in mind that this information may be biased: indeed, if mobility is higher on one city wrt another, we would experience duplicates when counting users. For this reason, we compare also the last KPI.
### Handover-IN (success)
For our purpose, we'll consider the values prior to March, in particular those of February; by doing this, we are focusing on data that do not comprehend COVID-19 as a factor of variation of mobility. Indeed, something that is much more visible when analysing this feature is the drop around March, that can be seen in both situations. For a better comparison, we'll see the normalized data per city afterwards, so for now we'll just compare absolute values of Handover during February.

Rome has a daily average of around 200M, while Milan lies around 14M. This means that we have much more mobility in Rome, and that the number of users as described above are much more biased in the Rome case than in Milan. Despite that, we can still draw the same conclusions about the usage of mobile traffic.

### Comparing normalized data

In [15]:
app_timeseries_normalized.run_server(mode='inline', port=16002) # debug=True, use_reloader=False

## Conclusions
To understand this, here we provide a brief description of what we did. Basically, for each considered KPI we normalized the data for each city separately. By doing this, we can no more compare the absolute values; the analysis will be much more qualitative and focused on the behaviour during the month to **compare the two cases**, to draw some conclusions again on the use of mobile networks and on how people reacted to COVID-19 restrictions, both in terms of mobility and of usage of the mobile networks.

### Download
Our expectations would be to see a similar normalized behaviour, maybe in a different way, but still, we expect that entering lockdown would decrease the usage of mobile networks. The daily time series gives pretty interesting insights. In particular, we can see how Milan followed the just described trend for most of the time, even though at the end (when reaching summer) the usage keeps dropping in the same way as during lockdown. Instead, Rome is actually behaving in a very different way. Indeed, we can see how in march we have a huge increase of the usage of the networks, and while ending the lockdown we it drops in a logarithmic fashion.

We should try to motivate this two aspects:
1) why Rome has experienced such an increases of usage in March -> people are less used to use Internet then in Milan, but use mobile connection insteade of WiFi???
2) why Milan follows our ideal trend in until May, but then follows the lockdown trend even though the city should experience a more normal behaviour -> people in Milan use internet for other purposes, and they were aware of the fact that they should behave properly regardless from the Lockdown???

### Avg. Number of users
If compared with the download scenario, the trend makes complete sense. Indeed, we see again a slow but linear decrease of users for Milan, while Rome experiences first a huge increase when entering March, and then a decrease once lockdown finishes. So, nothing new if compared with download.

### Handover-IN (success)
We can see that when normalizing the data we see a pattern. Mobility seems to work similar in the two cases, when normalizing the KPI, even though Milan seems to be a bit more reluctant to mobility after the lockdown if compared to Rome.

In [16]:
def build_app_timeseries_period(columns, dataRanges, towns, periods):
    name = "TIMESERIES_PERIOD_{}".format("_".join(columns))
    app_period_timeseries = JupyterDash(name)

    app_period_timeseries.layout = html.Div([
    html.Label(
        [
            "Data range",
            dcc.Dropdown(id="dateRange",
                         options=[{"label": x, "value": x} for x in dataRanges],
                         value=dataRanges[0],
                         clearable=False,),
        ]
    ),
    html.Label(
        [
            "Periods to show",
            dcc.Dropdown(id="periods",
                         options=[{"label": x, "value": x} for x in periods],
                        value=periods[0],
                        multi=True,
                        clearable=False,),
        ]
    ),
    html.Label(
        [
            "KPI",
            dcc.Dropdown(id="kpi",
                         options=[{"label": x, "value": x} for x in columns],
                        value=columns[0],
                        clearable=False)
        ]
    ),
    html.Label(
        [
            "Comune",
            dcc.Dropdown(id="town",
                         options=[{"label": x, "value": x} for x in towns],
                        value=None,
                        clearable=True)
        ],
    ),
    html.Div(dcc.Graph(id=name))])
    
    @app_period_timeseries.callback(
    Output(name, "figure"), 
    [Input("dateRange", "value"), Input("periods", "value"), Input("kpi", "value"), Input("town", "value")])
    def display_map_period(dateRange, periods, kpi, town):
        
        if periods is None:
            periods = ["Dawn"]
            
        if isinstance(periods, str):
            periods = [periods]
            
        if dateRange == "Day":
            selected_data_groupped = ts_period_data_groupped_daily
        elif dateRange=="Week":
            selected_data_groupped = ts_period_data_groupped_weekly
        else:
            selected_data_groupped = ts_period_data_groupped_monthly
        
        if town is not None:
            selected_data_groupped = selected_data_groupped.where(lambda x:x.COMUNE==town).dropna()
            
        # or condition on the filters
        query = ' | '.join([f'period=="{p}"' for p in periods])
        selected_data_groupped= selected_data_groupped.query(query)

        fig = px.line(selected_data_groupped, x='Date', y=kpi, line_dash='COMUNE', color='period')

    #     fig.show()  
        return fig
    
    return app_period_timeseries

app_timeseries_period_normalized, app_timeseries_period = \
    build_app_timeseries_period(norm_columns, dataRanges, towns, ts_period_data_groupped_daily["period"].unique()), \
    build_app_timeseries_period(columns, dataRanges, towns, ts_period_data_groupped_daily["period"].unique())

In [17]:
app_timeseries_period.run_server(mode='inline', port = 16003)

In [18]:
app_timeseries_period_normalized.run_server(mode='inline', port = 16004)

# Commenti
- DL: evening in Roma meno dipendente da lockdown in confronto agli altri periodi
- Hin ha comportamento simile in Lunch, Afternoon e Morning
- 9 marzo: lockdown coincide con caduta di mobilità (Hin)
- 9 Marzo: a Roma si nota un aumento di DL VOL. L'aumento è notevole nell'orario lavorativo, segno che l'utilizzo del traffico internet mobile è diverso nelle due città
- 6 aprile: minimo Hin sia a Milano che a Roma https://www.reggionline.com/coronavirus-circa-2-500-aziende-riapriranno-deroga-dal-6-aprile-video/
- 4 maggio: notevole aumento (inizio fase 2)
- 1 giugno: diminuzione notevole, festa 2/06 e ponte
- 02/07: Milano, evening: calo inspiegato di Hin
- Estate: sia Milano che Roma registrano un maggiore aumento normalizzato di mobilità la sera

## Compare aggregated data for different period of day

In [19]:
# todo aggiungere torino
# TODO add rolling average in visualizzazione

# TODO provare nuove features:
# - ratio at every time of dl_link -> how much link is used? this could normalize in case cells behave differently. Does this actually happen?
# - dl_link/#user -> low values should represent high mobility areas, because users connected but used few data compared to other

# TODO timeseries covid

# TODO parlare con Andrea per analizzare Roma (dritte su zone e altro)
# TODO different KPIs -> Filip -> X
# TODO per month instead of week -> Filip -> X

# TODO represent in different period during days, and compare with dinner time (as a base for home) -> Filip -> X
# TODO represent in different period of weeks, and compare with weekend -> Filip

# TODO find info about density of population on specific places / areas -> Franci, (Filip)
# TODO find area specification per area (e.g.: business, home, turistic, ...) -> Franci, (Filip)

# -> brainstorming: attempt on 23/12

# es: Garibaldi -> DoW, h8-h12, h14-h18: work
# DoW + WE, h18-6: home

## Aggregated data in map

In [20]:
app = JupyterDash("Map")

app.layout = html.Div([
    html.Label(["KPIs", dcc.Dropdown(
        id="kpi",
        options=[{"label": x, "value": x} for x in columns],
        value=columns[0],
        clearable=False,
                )]),
    html.Label(
        [
            "Città",
            dcc.Dropdown(id="city",
                         options=[{"label": x, "value": x} for x in hex_week_selected_data_groupped.City.unique()],
                        value=hex_week_selected_data_groupped["City"].unique()[1],
                        clearable=False,),
        ]
    ),
    html.Label(
        [
            "Data range",
            dcc.Dropdown(id="dateRange",
                         options=[{"label": x, "value": x} for x in ["Week", "Month"]],
                        value="Week",
                        clearable=False,),
        ]
    ),
    html.Div(dcc.Graph(id="map-chart"))
])

@app.callback(
    Output("map-chart", "figure"), 
    [Input("kpi", "value"), Input("city", "value"), Input("dateRange", "value")])

def display_map(kpi, city, dateRange):
    
    if dateRange == "Week":
        selected_data_groupped = hex_week_selected_data_groupped
    else:
        selected_data_groupped = hex_month_selected_data_groupped
    
    filtered_data_groupped = selected_data_groupped.where(lambda x:x.City==city).dropna()
    fig = ff.create_hexbin_mapbox(
        data_frame=filtered_data_groupped,
        lat="LAT", lon="LNG", nx_hexagon=30, animation_frame="DateString", color=kpi,
        color_continuous_scale="Inferno", labels={"color": kpi, "frame": "DateString"}
    )
    fig.update_layout(margin=dict(b=0, t=0, l=0, r=0))
    fig.layout.sliders[0].pad.t=20
    fig.layout.updatemenus[0].pad.t=60
    return fig

app.run_server(mode='inline', port = 15000)

## Aggregated data in map per period

In [21]:
app_period = JupyterDash("PERIOD")

app_period.layout = html.Div([
    html.Label(["KPIs", dcc.Dropdown(
        id="kpi",
        options=[{"label": x, "value": x} for x in columns],
        value=columns[0],
        clearable=False,
                )]),
    html.Label(
        [
            "Città",
            dcc.Dropdown(id="city",
                         options=[{"label": x, "value": x} for x in hex_period_week_selected_data_groupped.City.unique()],
                        value=hex_period_week_selected_data_groupped["City"].unique()[1],
                        clearable=False,),
        ]
    ),
    html.Label(
        [
            "Data range",
            dcc.Dropdown(id="dateRange",
                         options=[{"label": x, "value": x} for x in ["Week", "Month"]],
                        value="Week",
                        clearable=False,),
        ]
    ),
    html.Label(
        [
            "Period",
            dcc.Dropdown(id="period",
                         options=[{"label": x, "value": x} for x in hex_period_week_selected_data_groupped.period.unique()],
                        value="Dawn",
                        clearable=False,),
        ]
    ),
    html.Label(
        [
            "Period vs evening",
            dcc.Dropdown(id="period_vs_evening",
                         options=[{"label": x, "value": x} for x in ["Yes", "No"]],
                        value="No",
                        clearable=False,),
        ]
    ),
    html.Div(dcc.Graph(id="period-chart"))])
    
@app_period.callback(
    Output("period-chart", "figure"), 
    [Input("kpi", "value"), Input("city", "value"), Input("dateRange", "value"), Input("period", "value"), Input("period_vs_evening", "value")])
def display_map_period(kpi, city, dateRange, period, periodVsEvening):
    
    if period == 'Evening':
        periodVsEvening = 'No'
    
    if dateRange == "Week":
        selected_data_groupped = hex_period_week_selected_data_groupped
    else:
        selected_data_groupped = hex_period_month_selected_data_groupped
    
    filtered_data_groupped = selected_data_groupped.where(lambda x:x.City==city).dropna()
    if periodVsEvening == 'No':
        filtered_data_groupped = filtered_data_groupped.where(lambda x:x.period==period).dropna()
        filtered_data_groupped = filtered_data_groupped.set_index(['ECELL_ID', 'DateString'])
        filtered_data_groupped.reset_index(inplace=True)
    else:
        filtered_data_groupped = filtered_data_groupped.set_index(['ECELL_ID', 'DateString'])
        filtered_data_groupped_evening = filtered_data_groupped.where(lambda x:x.period=='Evening').dropna()
        filtered_data_groupped_period = filtered_data_groupped.where(lambda x:x.period==period).dropna()
        
        filtered_data_groupped = filtered_data_groupped_evening#
        filtered_data_groupped[kpi] = (filtered_data_groupped[kpi] - filtered_data_groupped_period[kpi]).abs()
        
        filtered_data_groupped.reset_index(inplace=True)
        filtered_data_groupped_evening.reset_index(inplace=True)
        filtered_data_groupped_period.reset_index(inplace=True)
        
    fig = ff.create_hexbin_mapbox(
        data_frame=filtered_data_groupped,
        lat="LAT", lon="LNG", nx_hexagon=30, animation_frame="DateString", color=kpi,
        color_continuous_scale="Inferno", labels={"color": kpi, "frame": "DateString"}
    )
    fig.update_layout(margin=dict(b=20, t=20, l=0, r=0))
    
    fig.layout.sliders[0].pad.t=30
    fig.layout.updatemenus[0].pad.t=50

#     fig.show()  
    return fig

app_period.run_server(mode='inline', port=15001) # debug=True, use_reloader=False