In [12]:
import mobile_traffic as mt
from datetime import time
import noise
import insee
import numpy as np
import pandas as pd

In [31]:
from Sleep.SessionDistribution import calculate_session_distribution_city

In [2]:
%load_ext autoreload
%autoreload 2

In [10]:
path = '/Users/anmusso/Desktop/PhD/Projects/Current/NetMob/NetMobData/data/FigureData/Sleep'

# Expected Time of Session Data

In [66]:
mobile_traffic_lyon = mt.get_city_traffic_data(traffic_type=mt.TrafficType.USERS, city=mt.City.LYON, geo_data_type=mt.GeoDataType.IRIS)

100%|██████████| 77/77 [00:30<00:00,  2.50it/s]


In [67]:
session_distribution_lyon = calculate_session_distribution_city(mobile_traffic_lyon, start=time(21), end=time(3,30), window_smoothing=3)

In [68]:
expected_session_time = session_distribution_lyon.expected_time_of_session_by_location()
expected_session_time.reset_index(names='tile', inplace=True)

In [69]:
expected_session_time.head()

Unnamed: 0,tile,expected_time
0,11050000,23:00:00
1,12380000,23:00:00
2,12480000,23:00:00
3,12490101,23:00:00
4,12490103,23:00:00


In [70]:
expected_session_time.to_csv(f'{path}/expected_session_time_paris.csv', index=False)

# Noise Data

In [61]:
paris_geo = mt.geo_tile.get_geo_data(city=mt.City.PARIS)
paris_geo = paris_geo.iloc[:100]

In [62]:
noise_estimates = noise.get_noise_estimate(polygons=paris_geo, city=noise.City.PARIS, measurement=noise.Measurement.NIGHT)

In [63]:
noise_estimates = noise_estimates['noise_estimate'].to_frame()
noise_estimates.reset_index(names='tile', inplace=True)

In [64]:
noise_estimates.head()

Unnamed: 0,tile,noise_estimate
0,162,61.0
1,507,49.0
2,508,61.0
3,853,52.0
4,854,60.0


In [65]:
noise_estimates.to_csv(f'{path}/noise_estimates_paris_n.csv', index=False)

# Admin Data

In [122]:
paris_geo = mt.geo_tile.get_geo_data(city=mt.City.PARIS)

In [123]:
matching_tile_mobile_traffic_tile_insee = insee.tile.get_matching_tiles(polygons=paris_geo, return_intersection_area=True)

In [124]:
matching_tile_mobile_traffic_tile_insee.head()

Unnamed: 0_level_0,geometry,tile,intersection_area
tile,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
162,"POLYGON ((653218.968 6838547.550, 653219.807 6...",CRS3035RES200mN2866000E3759200,6813.063953
162,"POLYGON ((653218.968 6838547.550, 653219.807 6...",CRS3035RES200mN2866000E3759400,3173.721549
507,"POLYGON ((653119.877 6838648.320, 653120.716 6...",CRS3035RES200mN2866000E3759200,8319.845685
507,"POLYGON ((653119.877 6838648.320, 653120.716 6...",CRS3035RES200mN2866200E3759200,1666.941405
508,"POLYGON ((653219.807 6838647.481, 653220.646 6...",CRS3035RES200mN2866000E3759200,5331.103238


In [125]:
matching_tile_mobile_traffic_tile_insee.rename(columns={'tile': 'tile_insee'}, inplace=True)
matching_tile_mobile_traffic_tile_insee.reset_index(names='tile_mobile_traffic', inplace=True)
matching_tile_mobile_traffic_tile_insee = matching_tile_mobile_traffic_tile_insee[['tile_mobile_traffic', 'tile_insee', 'intersection_area']]

In [11]:
matching_tile_mobile_traffic_tile_insee.to_csv(f'{path}/matching_tile_mobile_traffic_tile_insee_paris.csv', index=False)

# Merge of three datasets at the insee tile level

In [126]:
from datetime import datetime

In [141]:
expected_session_time = pd.read_csv(f'{path}/expected_session_time_paris_start_21.csv')

In [142]:
noise_estimates = pd.read_csv(f'{path}/noise_estimates_paris_n.csv')

In [143]:
matching_tile_mobile_traffic_tile_insee = pd.read_csv(f'{path}/matching_tile_mobile_traffic_tile_insee_paris.csv')

In [144]:
insee_data = insee.tile.get_data(tile=list(matching_tile_mobile_traffic_tile_insee['tile_insee'].unique()), var_name=['Ind', 'Ind_snv'])
insee_data.reset_index(names='tile_insee', inplace=True)

In [145]:
full_data = pd.merge(expected_session_time, noise_estimates, on='tile')
full_data = pd.merge(full_data, matching_tile_mobile_traffic_tile_insee, left_on='tile', right_on='tile_mobile_traffic')
full_data.drop(columns=['tile'], inplace=True)
full_data = pd.merge(full_data, insee_data, left_on='tile_insee', right_on='tile_insee')

In [146]:
def aggregation_function(x):
    area = full_data.loc[x.index, 'intersection_area'].values
    decibels = x.values
    decibels_to_linear_scale = np.power(10, decibels / 10)
    weighted_average_decibels_linear_scale = np.average(decibels_to_linear_scale, weights=area)
    average_decibels = 10 * np.log10(weighted_average_decibels_linear_scale)
    rounded_average_decibels = np.round(average_decibels, decimals=0)
    return rounded_average_decibels

In [147]:
def weighted_mean(x):
    area = full_data.loc[x.index, 'intersection_area'].values
    return np.average(x.values, weights=area)

In [148]:
aggregated_full_data = full_data.groupby('tile_insee').agg({'expected_time_number': weighted_mean, 'Ind': 'first', 'Ind_snv': 'first', 'noise_estimate': aggregation_function})

In [149]:
ts = pd.date_range(start=datetime(2019, 1, 1, 21), end=datetime(2019, 1, 2, 3, 30), freq='1min')
times = pd.date_range(start=datetime(2019, 1, 1, 21), end=datetime(2019, 1, 2, 3, 30), freq='15min')
multiplier = len(ts) / len(times)
def float_to_time(x):
    index = np.round(x * multiplier).astype(int)
    return ts[index].time()

In [150]:
aggregated_full_data['expected_time'] = aggregated_full_data['expected_time_number'].apply(float_to_time)

In [151]:
aggregated_full_data.head()

Unnamed: 0_level_0,expected_time_number,Ind,Ind_snv,noise_estimate,expected_time
tile_insee,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
CRS3035RES200mN2866000E3759200,8.62403,16.0,355102.5,59.0,23:05:00
CRS3035RES200mN2866000E3759400,8.626265,206.0,4433911.8,61.0,23:05:00
CRS3035RES200mN2866200E3759200,8.669638,139.0,3301313.8,54.0,23:06:00
CRS3035RES200mN2866200E3759400,8.715428,352.0,5711361.1,61.0,23:06:00
CRS3035RES200mN2866200E3759600,8.721309,498.5,7033760.2,62.0,23:06:00


In [152]:
aggregated_full_data.reset_index(names='tile_insee', inplace=True)

In [153]:
aggregated_full_data.to_csv(f'{path}/aggregated_full_data_paris_insee_tile.csv', index=False)