In [201]:
import pandas as pd
import plotly.graph_objs as go
from datetime import time, timedelta, datetime
import mobile_traffic as mt
import numpy as np
import insee
import geopandas as gpd
from tqdm import tqdm
import webbrowser

In [179]:
path = '/Users/anmusso/Desktop/PhD/Projects/Current/NetMob/NetMobData/data/FigureData/Sleep'

In [180]:
data = pd.read_csv(path + '/aggregated_full_data_paris_insee_tile.csv')

In [181]:
data.dropna(inplace=True)

In [182]:
geometry = insee.tile.get_data(tile=data['tile_insee'].unique(), var_name='geometry')

In [183]:
geometry.reset_index(names='tile_insee', inplace=True)

In [184]:
data = data.merge(geometry, on='tile_insee')

In [185]:
data['centroid'] = data['geometry'].apply(lambda x: x.centroid)

In [186]:
data = gpd.GeoDataFrame(data, geometry='geometry')
data.to_crs(epsg=2154, inplace=True)

In [227]:
def get_matching(df: pd.DataFrame, geo_var: str, treatment: str, outcome: str, high: float, low: float, buffer_size: int):
    high_var = df.loc[df[treatment] > high][[geo_var, 'centroid', treatment, outcome]]
    high_var = gpd.GeoDataFrame(high_var, geometry='centroid')
    high_var['buffer'] = high_var['centroid'].apply(lambda x: x.buffer(buffer_size))
    high_var.set_geometry('buffer', inplace=True)
    
    low_var = df.loc[df[treatment] < low][[geo_var, 'centroid', treatment, outcome]]
    low_var = gpd.GeoDataFrame(low_var, geometry='centroid')
    
    match_high_var_with_nearby_low_var_tiles = gpd.sjoin(high_var, low_var, predicate='intersects', lsuffix='high', rsuffix='low')
    matching_high_low_var = match_high_var_with_nearby_low_var_tiles[[f'{geo_var}_high', f'{treatment}_high', f'{outcome}_high', f'{geo_var}_low', f'{treatment}_low', f'{outcome}_low']]
    return matching_high_low_var

In [228]:
def plot_bars(m: pd.DataFrame, treatment: str, outcome: str):
    fig = go.Figure()
    diff_treatment = (m[f'{treatment}_high'] - m[f'{treatment}_low']).mean()
    std_treatment = (m[f'{treatment}_high'] - m[f'{treatment}_low']).std()
    diff_outcome = (m[f'{outcome}_high'] - m[f'{outcome}_low']).mean()
    std_outcome = (m[f'{outcome}_high'] - m[f'{outcome}_low']).std()
    x = [treatment, outcome]
    y = [diff_treatment / std_treatment, diff_outcome / std_outcome]
    bar_trace = go.Bar(x=x, y=y)
    fig.add_trace(bar_trace)
    fig.update_layout(title=f'Expected time in high and low {treatment} tiles', yaxis_title='Difference')
    fig.show(renderer='browser')

In [229]:
def matching_to_long_format(m: pd.DataFrame, geo_var: str, treatment: str, outcome: str):
    m_high = m[[f'{geo_var}_high', f'{treatment}_high', f'{outcome}_high']].copy()
    m_high.columns = [geo_var, treatment, outcome]
    m_high['type'] = 'high'
    m_low = matching[[f'{geo_var}_low', f'{treatment}_low', f'{outcome}_low']].copy()
    m_low.columns = [geo_var, treatment, outcome]
    m_low['type'] = 'low'
    m_long_format = pd.concat([m_high, m_low])
    return m_long_format

In [230]:
def plot_map(df, column, legend=True, tiles="CartoDB positron"):
    path_map = '/Users/anmusso/Desktop/PhD/Projects/Current/NetMob/NetMobCode/temp/map_matching.html'
    html_map = df.explore(column=column, legend=legend, tiles=tiles)
    html_map.save(path_map)
    webbrowser.open(f'file://{path_map}')

In [248]:
data = data.loc[data['Ind'] > 1000]

In [249]:
matching = get_matching(data, geo_var='tile_insee', treatment='noise_estimate', outcome='expected_time_number', high=60, low=50, buffer_size=500)

In [250]:
matching_long = matching_to_long_format(matching, geo_var='tile_insee', treatment='noise_estimate', outcome='expected_time_number')
matching_long = matching_long.merge(geometry, on='tile_insee')
matching_long = gpd.GeoDataFrame(matching_long, geometry='geometry')

In [251]:
plot_bars(matching, treatment='noise_estimate', outcome='expected_time_number')

In [252]:
plot_map(matching_long, column='type')

In [86]:
noise = pd.read_csv(path + '/noise_estimates_paris_.csv')

In [130]:
session = pd.read_csv(path + '/expected_session_time_paris_start_21.csv')

In [131]:
tile_geo = mt.geo_tile.get_geo_data(city=mt.City.PARIS)

In [132]:
data1 = noise.merge(session, on='tile')
data1 = data1.merge(tile_geo.reset_index(names='tile'), on='tile')

In [133]:
data1 = gpd.GeoDataFrame(data1, geometry='geometry')
data1.to_crs(epsg=2154, inplace=True)

In [134]:
data1.dropna(inplace=True)

In [135]:
data1['centroid'] = data1['geometry'].apply(lambda x: x.centroid)

In [167]:
data1.head()

Unnamed: 0,tile,noise_estimate,expected_time_number,expected_time,geometry,centroid
0,162,61.0,8.582475,23:09:00,"POLYGON ((653218.968 6838547.550, 653219.807 6...",POINT (653269.353 6838597.096)
1,507,49.0,8.641215,23:10:00,"POLYGON ((653119.877 6838648.320, 653120.716 6...",POINT (653170.262 6838697.865)
2,508,61.0,8.650317,23:10:00,"POLYGON ((653219.807 6838647.481, 653220.646 6...",POINT (653270.192 6838697.027)
3,853,52.0,8.66515,23:10:00,"POLYGON ((653120.716 6838748.250, 653121.555 6...",POINT (653171.100 6838797.796)
4,854,60.0,8.691205,23:10:00,"POLYGON ((653220.646 6838747.411, 653221.485 6...",POINT (653271.031 6838796.957)


In [169]:
matching = get_matching(data1, geo_var='tile', treatment='noise_estimate', outcome='expected_time_number', high=60, low=40, buffer_size=150)

In [178]:
plot_bars(matching, treatment='noise_estimate', outcome='expected_time_number')

In [170]:
matching.head()

Unnamed: 0,tile_high,noise_estimate_high,expected_time_number_high,tile_low,noise_estimate_low,expected_time_number_low
34919,70344,73.0,8.29479,69997,0.0,8.341544
34968,70393,61.0,8.5666,70738,23.0,8.547669
35294,70739,61.0,8.590632,70738,23.0,8.547669
35620,71085,62.0,8.664186,70738,23.0,8.547669
36259,71766,61.0,8.426977,72113,36.0,8.442326


In [171]:
matching_high = matching[['tile_high', 'noise_estimate_high', 'expected_time_number_high']].copy()
matching_high.columns = ['tile', 'noise_estimate', 'expected_time_number']
matching_high['type'] = 'high'
matching_low = matching[['tile_low', 'noise_estimate_low', 'expected_time_number_low']].copy()
matching_low.columns = ['tile', 'noise_estimate', 'expected_time_number']
matching_low['type'] = 'low'

In [172]:
matching_long = pd.concat([matching_high, matching_low])

In [173]:
matching_long = matching_long.merge(tile_geo.reset_index(names='tile'), on='tile')

In [174]:
matching_long = gpd.GeoDataFrame(matching_long, geometry='geometry')

In [175]:
import webbrowser
import os

In [176]:
def plot_map(df, column, legend=True, tiles="CartoDB positron"):
    path_map = '/Users/anmusso/Desktop/PhD/Projects/Current/NetMob/NetMobCode/temp/map_matching.html'
    html_map = df.explore(column=column, legend=legend, tiles=tiles)
    html_map.save(path_map)
    webbrowser.open(f'file://{path_map}')

In [177]:
plot_map(matching_long, column='type')