In [124]:
from datetime import time

import pandas as pd
import numpy as np
import plotly.graph_objects as go
import plotly.express as px
from jupyter_dash import JupyterDash
from dash import dcc, html
import dash_mantine_components as dmc

from Utils import City, AggregationLevel, TrafficType
from DataPreprocessing.WeatherData.Data import WeatherData
from DataPreprocessing.TrafficData.Data import CityTrafficData
from DataPreprocessing.GeoData.GeoData import IrisGeoData
from ExpectedBedTime.ExpectedBedTimeAPI import ExpectedBedTimeAPI
from DataPreprocessing.AdminData.AdminDataComplete import AdminData
from ExpectedBedTime.Plots import StartBedTimeRobustness
from FeatureSelection.BasicFeatureImportanceCalculator import BasicFeatureImportanceCalculator
from FeatureSelection.RegressionConstructor import Regression, FeatureConstructor
from DataPreprocessing.GeoData.GeoDataComplete import GeoData, GeoDataType

In [5]:
%load_ext autoreload
%autoreload 2

In [20]:
iris_geo_data = IrisGeoData()

In [24]:
admin_data = AdminData()

In [76]:
geo_data = GeoData()
geo_data.load(GeoDataType.IRIS)

In [13]:
traffic_data = (CityTrafficData(city=city, aggregation_level=AggregationLevel.IRIS, traffic_type=TrafficType.USERS) for city in City)

In [14]:
expected_bed_time = ExpectedBedTimeAPI.compute_expected_bed_time(traffic_data=traffic_data)

100%|██████████| 77/77 [00:18<00:00,  4.07it/s]
100%|██████████| 77/77 [00:13<00:00,  5.78it/s]
100%|██████████| 77/77 [00:13<00:00,  5.58it/s]
100%|██████████| 77/77 [00:15<00:00,  4.98it/s]
100%|██████████| 77/77 [00:21<00:00,  3.56it/s]
100%|██████████| 77/77 [00:21<00:00,  3.58it/s]
100%|██████████| 77/77 [00:14<00:00,  5.37it/s]
100%|██████████| 77/77 [00:19<00:00,  3.98it/s]
100%|██████████| 77/77 [00:14<00:00,  5.48it/s]
100%|██████████| 77/77 [00:14<00:00,  5.18it/s]
100%|██████████| 77/77 [00:13<00:00,  5.51it/s]
100%|██████████| 77/77 [00:15<00:00,  4.84it/s]
100%|██████████| 77/77 [00:15<00:00,  5.07it/s]
100%|██████████| 77/77 [00:14<00:00,  5.39it/s]
100%|██████████| 77/77 [01:04<00:00,  1.19it/s]
100%|██████████| 77/77 [00:16<00:00,  4.68it/s]
100%|██████████| 77/77 [00:14<00:00,  5.23it/s]
100%|██████████| 77/77 [00:14<00:00,  5.26it/s]
100%|██████████| 77/77 [00:16<00:00,  4.78it/s]
100%|██████████| 77/77 [00:14<00:00,  5.40it/s]


In [197]:
expected_bed_time.geo_plot(iris_geo_data=iris_geo_data)

In [142]:
feature_constructor = FeatureConstructor(admin_data=admin_data, geo_data=geo_data)
city_density = np.log(feature_constructor.city_density())
density = feature_constructor.density()

In [143]:
avg_bed_time = expected_bed_time.data['mean_float'].to_frame(name='expected_bed_time')

In [144]:
matched = pd.merge(avg_bed_time, density, left_index=True, right_index=True, how='inner')

In [145]:
features_and_labels = np.log(matched)

In [146]:
reg = Regression(features=features_and_labels['density'].to_frame(), labels=features_and_labels['expected_bed_time'].to_frame())
reg.plot(x_axis='density', color=city_density)

In [193]:
city_density = np.log(feature_constructor.city_density())

In [180]:
city_density.drop('Paris', inplace=True)

In [194]:
expected_sleep_city = expected_bed_time.assign_iris_to_quantile(n_quantiles=8)
expected_sleep_city['quantile'] = expected_sleep_city['quantile'].astype('int')
expected_sleep_city = expected_sleep_city.merge(geo_data.matching.data, left_index=True, right_on='iris', how='inner')[['city', 'quantile']]
expected_sleep_city = expected_sleep_city.groupby('city').mean()

In [191]:
expected_sleep_city.drop('Paris', inplace=True)

In [195]:
reg = Regression(features=city_density, labels=expected_sleep_city)
reg.plot(x_axis='density')

[0.46517155]


In [164]:
reg = Regression(features=features_and_labels['density'].to_frame(), labels=features_and_labels['expected_bed_time'].to_frame())
reg.plot(x_axis='density', color=city_density)

[0.04724712]


In [23]:
expected_bed_time.geo_plot(iris_geo_data=iris_geo_data, n_quantiles=8)

In [71]:
regression_constructor = RegressionConstructor(admin_data=admin_data, expected_bed_time=expected_bed_time, iris_geo_data=iris_geo_data)
regression_constructor.plot_regression(feature_names=['density'], x_axis='density')

[0.04724712]


In [26]:
basic_feature_importance_calculator = BasicFeatureImportanceCalculator(admin_data=admin_data, expected_bed_time=expected_bed_time)
basic_feature_importance_calculator.compute_feature_importance()

Unnamed: 0,f_statistic,p_values,mutual_information,regression,description
P19_POP,5.730877,6.525308e-04,0.000000,-0.011194,Population
P19_POP75P,208.524044,2.120320e-128,0.056746,-0.319038,Nombre de personnes de 75 ans ou plus
P19_POP6074,120.930532,5.948143e-76,0.026569,-0.239795,Nombre de personnes de 60 à 74 ans
P19_POP4559,7.580474,4.666718e-05,0.003278,-0.057413,Nombre de personnes de 45 à 59 ans
P19_POP3044,28.186842,4.355023e-18,0.005872,0.106339,Nombre de personnes de 30 à 44 ans
...,...,...,...,...,...
EQUIP_E1,415.414152,2.821740e-244,0.113460,0.414156,INFRASTRUCTURES DE TRANSPORTS
EQUIP_F1,65.142800,2.043858e-41,0.024995,-0.182989,ÉQUIPEMENTS SPORTIFS
EQUIP_F2,21.229888,1.133050e-13,0.000000,-0.105170,ÉQUIPEMENTS DE LOISIRS
EQUIP_F3,3.413553,1.669016e-02,0.010111,-0.013973,ÉQUIPEMENTS CULTURELS ET SOCIOCULTURELS


In [None]:
nantes_traffic_data = CityTrafficData(city=City.LYON, aggregation_level=AggregationLevel.IRIS, traffic_type=TrafficType.USERS)

In [10]:
expected_bed_time_lyon = ExpectedBedTimeAPI.compute_expected_bed_time(traffic_data=lyon_traffic_data)

In [11]:
expected_bed_time_lyon.data.head()

Unnamed: 0_level_0,mean,median,std,mean_float,median_float,std_float,n_obs
iris,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
440090101,00:13:00,00:11:00,19,9.884739,9.703744,1.245699,48
440090102,00:08:00,00:07:00,17,9.507818,9.445226,1.134008,48
440090103,00:07:00,00:06:00,18,9.462249,9.393729,1.175369,48
440140000,00:08:00,00:09:00,18,9.551398,9.630426,1.192873,48
440180101,00:09:00,00:08:00,17,9.596484,9.523875,1.158587,48


In [22]:
expected_bed_time_lyon.geo_plot(iris_geo_data=iris_geo_data, n_quantiles=8)
expected_bed_time_lyon.plot()

In [53]:
feature_selector = BasicFeatureImportanceCalculator(admin_data=admin_data, expected_bed_time=expected_bed_time_lyon)

In [55]:
feature_selector.compute_feature_importance()

Unnamed: 0,f_statistic,p_values,mutual_information,regression,description
P19_POP,2.134476,9.528897e-02,0.016831,0.110254,Population
P19_POP75P,4.870993,2.436819e-03,0.042771,-0.179381,Nombre de personnes de 75 ans ou plus
P19_POP6074,10.647999,9.399752e-07,0.030756,-0.273457,Nombre de personnes de 60 à 74 ans
P19_POP4559,2.513112,5.808348e-02,0.000000,-0.130662,Nombre de personnes de 45 à 59 ans
P19_POP3044,4.032395,7.606094e-03,0.002939,0.164598,Nombre de personnes de 30 à 44 ans
...,...,...,...,...,...
EQUIP_E1,2.817541,3.883921e-02,0.003198,0.144475,INFRASTRUCTURES DE TRANSPORTS
EQUIP_F1,4.883737,2.394925e-03,0.000000,-0.154807,ÉQUIPEMENTS SPORTIFS
EQUIP_F2,0.647312,5.850160e-01,0.000000,-0.067559,ÉQUIPEMENTS DE LOISIRS
EQUIP_F3,1.211892,3.050321e-01,0.000000,-0.028619,ÉQUIPEMENTS CULTURELS ET SOCIOCULTURELS


In [28]:
admin_data.metadata

Unnamed: 0,COD_VAR,LIB_VAR_LONG
0,C19_ACT1564,Nombre de personnes actives de 15 à 64 ans
1,C19_ACT1564_CS1,Nombre d'agriculteurs exploitants actifs de 15...
2,C19_ACT1564_CS2,"Nombre d'artisans, commerçants, chefs d'entrep..."
3,C19_ACT1564_CS3,Nombre de cadres et professions intellectuelle...
4,C19_ACT1564_CS4,Nombre de professions intermédiaires actifs de...
...,...,...
394,P19_SCOL1517,Nombre de personnes scolarisées de 15 à 17 ans
395,P19_SCOL1824,Nombre de personnes scolarisées de 18 à 24 ans
396,P19_SCOL2529,Nombre de personnes scolarisées de 25 à 29 ans
397,P19_SCOL30P,Nombre de personnes scolarisées de 30 ans ou plus


## Plots for the robustness to change in start_bed_time

In [197]:
start_bed_times = [time(21), time(21,15), time(21, 30), time(21,45), time(22), time(22,15), time(22,30)]
ExpectedBedTimeAPI.plot_start_bed_time_robustness(traffic_data=city_traffic_data, start_bed_times=start_bed_times)

In [198]:
df = pd.DataFrame({'x': [1, 2, 3], 'y_1': [2, 4, 5], 'y_2': [2, 6, 8]})

In [199]:
df

Unnamed: 0,x,y_1,y_2
0,1,2,2
1,2,4,6
2,3,5,8


In [200]:
df.melt(id_vars=['x'], value_vars=['y_1', 'y_2'])

Unnamed: 0,x,variable,value
0,1,y_1,2
1,2,y_1,4
2,3,y_1,5
3,1,y_2,2
4,2,y_2,6
5,3,y_2,8


In [230]:
from DataPreprocessing.GeoData.GeoData import TileGeoData, PollingStationGeoData, IrisGeoData
from Utils import City
from DataPreprocessing.GeoData.GeoMatchers.IrisPollingStationMatcher import IrisPollingStationMatcher

In [231]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [202]:
tile_geo_data = [TileGeoData(city=city) for city in City]

In [213]:
iris_geo_data = IrisGeoData()

In [253]:
polling_station_geo_data = PollingStationGeoData()

In [272]:
iris_polling_station_matcher = IrisPollingStationMatcher(iris_geo_data=iris_geo_data, polling_station_geo_data=polling_station_geo_data)

In [273]:
matching =  iris_polling_station_matcher.get_matching()

In [275]:
matching.groupby('iris')['polling_station'].count().sort_values(ascending=False)

iris
763511504    44
441090710    43
315551806    41
674821904    41
441091007    41
             ..
395310000     1
395320000     1
395330000     1
395340000     1
956900000     1
Name: polling_station, Length: 48590, dtype: int64