In [6]:
import numpy as np
import pandas as pd
import insee
import mobile_traffic as mt
import plotly.graph_objects as go
from datetime import date, datetime, timedelta, time

In [7]:
from Aggregation import aggregate_netmob_tile_level_variables_to_insee_tile_level

In [8]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [9]:
path = '/Users/andrea/Desktop/PhD/Projects/Current/NetMob/Data/BaseData'

In [10]:
mobile_usage = pd.read_csv(path + '/app_consumption_netmob_tile.csv')
mobile_usage.set_index('tile', inplace=True)

In [11]:
mobile_usage_insee_tile = aggregate_netmob_tile_level_variables_to_insee_tile_level(mobile_usage, city_netmob_tile=mt.City.PARIS, variables=list(mobile_usage.columns), aggregation_fct='weighted_sum')
mobile_usage_insee_tile = mobile_usage_insee_tile.set_index('insee_tile')
mobile_usage_insee_tile = mobile_usage_insee_tile.loc[mobile_usage_insee_tile['covered_area'] > 0.8].copy()
mobile_usage_insee_tile = mobile_usage_insee_tile.drop(columns=['covered_area'])

In [12]:
mobile_usage_insee_tile.to_csv(path + '/app_consumption_insee_tile.csv')

In [13]:
income = insee.tile.get_data(tile=mobile_usage_insee_tile.index.values, var_name=['Ind_snv'], shares=True)
income = income.rename(columns={'Ind_snv': 'log2_mean_income'})
income['log2_mean_income'] = np.log2(income['log2_mean_income'])

In [21]:
q = list(np.round(np.linspace(0.1, 0.9, 9), decimals=2))
q = [0.3, 0.7]
quantiles = list(np.quantile(income['log2_mean_income'].values, q=q))
bins = [-np.inf] + quantiles + [np.inf]
labels = ['low', 'medium', 'high'] # [str(a) for a in q] + [str(1)] #['low', 'medium', 'high']
categories = pd.cut(income['log2_mean_income'], bins=bins, labels=labels).to_frame('income_category')
income = pd.merge(income, categories, left_index=True, right_index=True)

In [22]:
mobile_usage_income_category = mobile_usage_insee_tile.merge(categories, left_index=True, right_index=True)
mobile_usage_income_category = mobile_usage_income_category.groupby('income_category', observed=False).sum()

In [23]:
def compute_rca(df):
    numerator = df.div(df.sum(axis=1), axis=0)
    denominator = df.sum(axis=0) / df.sum().sum()
    return numerator.div(denominator, axis=1)

In [24]:
mobile_usage_insee_tile_rca = compute_rca(df=mobile_usage_insee_tile)

In [25]:
mobile_usage_income_category_rca = compute_rca(df=mobile_usage_income_category)

In [26]:
mobile_usage_income_category_rca = mobile_usage_income_category_rca.T.sort_values(by='low', ascending=False).T

In [29]:
mobile_usage_income_category_rca.loc['low'].to_csv(path + '/app_rca_ranking_by_low_income.csv')

In [128]:
fig = go.Figure()
heatmap = go.Heatmap(
    z=mobile_usage_income_category_rca.values,
    x=mobile_usage_income_category_rca.columns,
    y=mobile_usage_income_category_rca.index,
    zmid=1,
    colorscale='RdBu',
    colorbar=dict(
        title='RCA',
        titleside='right'
    ))
fig.add_trace(heatmap)
fig.update_layout(
    title='RCA of mobile app usage by income category',
    xaxis_title='Mobile app',
    yaxis_title='Income category',
    template='plotly_white',
    width=1800,
    height=800,
    font=dict(size=18, color='black'))
fig.show(renderer='browser')

In [145]:
mobile_usage_by_time = pd.read_csv(path + '/app_consumption_by_time.csv')
mobile_usage_by_time['time'] = pd.to_datetime(mobile_usage_by_time['time'], format='%H:%M:%S').dt.time
mobile_usage_by_time.set_index('time', inplace=True)

In [149]:
def time_from_reference_time(times, reference_time):
    times_ = [_time_from_reference_time(t=t, reference_time=reference_time) for t in times]
    return times_
    
    
def _time_from_reference_time(t, reference_time):
    auxiliary_date = date(2020, 1, 1)
    if t < reference_time:
        datetime.combine(auxiliary_date, t)
        return datetime.combine(auxiliary_date, t) + timedelta(days=1) - datetime.combine(auxiliary_date, reference_time)
    else:
        return datetime.combine(auxiliary_date, t) - datetime.combine(auxiliary_date, reference_time)

In [174]:
mobile_usage_by_time.sort_index(key=lambda x: time_from_reference_time(times=x, reference_time=time(21)), inplace=True)

In [196]:
time_slot_size = 8
mobile_usage_by_time_aggregated = []
times = []
for i in range(0, len(mobile_usage_by_time.index), time_slot_size):
    times.append(mobile_usage_by_time.index[i:i+time_slot_size].values[0])
    mobile_usage_by_time_aggregated.append(mobile_usage_by_time.iloc[i:i+time_slot_size].mean())
    
mobile_usage_by_time_aggregated = pd.concat(mobile_usage_by_time_aggregated, axis=1).T
mobile_usage_by_time_aggregated.index = times

In [197]:
mobile_usage_by_time_rca = compute_rca(df=mobile_usage_by_time_aggregated)

In [198]:
late_night_time = time(1)
avg_deep_night = mobile_usage_by_time_rca.loc[late_night_time:].mean()
argsort_deep_night = avg_deep_night.argsort()

In [199]:
mobile_usage_by_time_rca = mobile_usage_by_time_rca.T.iloc[argsort_deep_night.values].copy().T

In [200]:
fig = go.Figure()
heatmap = go.Heatmap(
    z=mobile_usage_by_time_rca.values,
    x=mobile_usage_by_time_rca.columns,
    y=[t.isoformat() for t in mobile_usage_by_time_rca.index],
    zmax=1.5,
    zmid=1,
    zmin=0.5,
    colorscale='RdBu',
    colorbar=dict(
        title='RCA',
        titleside='right'
    ))
fig.add_trace(heatmap)
fig.update_layout(
    title='RCA of mobile app usage by time',
    xaxis_title='Mobile app',
    yaxis_title='Time',
    template='plotly_white',
    width=1800,
    height=600,
    font=dict(size=18, color='black'))
fig.show(renderer='browser')