In [80]:
from datetime import time

import pandas as pd
import numpy as np
import plotly.graph_objects as go
import plotly.express as px
from jupyter_dash import JupyterDash
from dash import dcc, html
import dash_mantine_components as dmc

from Utils import City, AggregationLevel, TrafficType
from DataPreprocessing.WeatherData.Data import WeatherData
from DataPreprocessing.TrafficData.Data import CityTrafficData
from DataPreprocessing.GeoData.GeoData import IrisGeoData
from ExpectedBedTime.ExpectedBedTimeAPI import ExpectedBedTimeAPI
from ExpectedBedTime.ExpectedBedTimeCalculator import ExpectedBedTimeCalculator
from DataPreprocessing.AdminData.AdminDataComplete import AdminData
from ExpectedBedTime.Plots import StartBedTimeRobustness
from FeatureSelection.BasicFeatureImportanceCalculator import BasicFeatureImportanceCalculator
from FeatureSelection.RegressionConstructor import Regression, FeatureConstructor
from DataPreprocessing.GeoData.GeoDataComplete import GeoData, GeoDataType
from SessionDistribution.SessionDistributionCalculator import SessionDistributionCalculator

In [2]:
%load_ext autoreload
%autoreload 2

In [97]:
traffic_data = CityTrafficData(city=City.PARIS, geo_data_type=GeoDataType.IRIS, traffic_type=TrafficType.USERS)

100%|██████████| 77/77 [01:16<00:00,  1.00it/s]


In [76]:
geo_data = GeoData()
geo_data.load(GeoDataType.IRIS)

In [98]:
session_distribution_calculator = SessionDistributionCalculator(city_traffic_data=traffic_data, start=time(21, 30), end=time(3, 30))

In [99]:
session_distribution = session_distribution_calculator.calculate_session_distribution()

In [107]:
session_distribution.distribution_plot()

In [108]:
session_distribution.expectation_by_location_plot()

In [100]:
session_distribution.geo_plot(geo_data=geo_data)

In [104]:
expected_bed_time_calculator = ExpectedBedTimeCalculator(city_traffic_data=traffic_data, start_bed_time=time(21,30))

In [105]:
expected_bed_time = expected_bed_time_calculator.calculate_expected_bed_time()

In [106]:
expected_bed_time.geo_plot(iris_geo_data=geo_data.geo_data[GeoDataType.IRIS.value])

In [112]:
admin_data = AdminData()

In [181]:
geo_data = GeoData()
geo_data.load(GeoDataType.IRIS)

In [9]:
traffic_data = (CityTrafficData(city=city, geo_data_type=GeoDataType.IRIS, traffic_type=TrafficType.USERS) for city in City)

In [10]:
expected_bed_time = ExpectedBedTimeAPI.compute_expected_bed_time(traffic_data=traffic_data)

100%|██████████| 77/77 [00:19<00:00,  3.88it/s]
100%|██████████| 77/77 [00:15<00:00,  4.82it/s]
100%|██████████| 77/77 [00:15<00:00,  4.89it/s]
100%|██████████| 77/77 [00:16<00:00,  4.69it/s]
100%|██████████| 77/77 [00:22<00:00,  3.44it/s]
100%|██████████| 77/77 [00:23<00:00,  3.33it/s]
100%|██████████| 77/77 [00:15<00:00,  5.11it/s]
100%|██████████| 77/77 [00:19<00:00,  3.91it/s]
100%|██████████| 77/77 [00:15<00:00,  4.83it/s]
100%|██████████| 77/77 [00:17<00:00,  4.50it/s]
100%|██████████| 77/77 [00:14<00:00,  5.25it/s]
100%|██████████| 77/77 [00:17<00:00,  4.32it/s]
100%|██████████| 77/77 [00:16<00:00,  4.67it/s]
100%|██████████| 77/77 [00:15<00:00,  4.93it/s]
100%|██████████| 77/77 [01:07<00:00,  1.13it/s]
100%|██████████| 77/77 [00:17<00:00,  4.39it/s]
100%|██████████| 77/77 [00:15<00:00,  4.86it/s]
100%|██████████| 77/77 [00:15<00:00,  4.85it/s]
100%|██████████| 77/77 [00:17<00:00,  4.45it/s]
100%|██████████| 77/77 [00:15<00:00,  5.06it/s]


# EXPLORATORY DATA ANALYSIS

In [110]:
iris_subset = session_distribution.data.coords[GeoDataType.IRIS.value].values

In [113]:
feature_constructor = FeatureConstructor(admin_data=admin_data, geo_data=geo_data)

## Variables considered

In [114]:
iris_centrality = feature_constructor.iris_centrality(iris=iris_subset)

In [115]:
iris_density_of_city = feature_constructor.iris_density_of_city(iris=iris_subset).rename(columns={'density': 'density_of_city'})

In [116]:
iris_pop_density = feature_constructor.iris_density(iris=iris_subset, var_names=['P19_POP']).rename(columns={'P19_POP': 'POP_DEN'})

In [117]:
iris_business_density = feature_constructor.iris_business_density(iris=iris_subset)

In [118]:
age_shares = feature_constructor.iris_shares(iris=iris_subset, var_names=['P19_POP1529', 'P19_POP3044', 'P19_POP4559', 'P19_POP6074', 'P19_POP75P'])

In [119]:
lonely_shares = feature_constructor.iris_shares(iris=iris_subset, var_names=['P19_POP15P_PSEUL'])

In [120]:
income = admin_data.get_admin_data(subset=iris_subset)[['DEC_MED19', 'DEC_GI19']]

In [121]:
shares_of_workers_with_far_work = feature_constructor.iris_shares(iris=iris_subset, var_names=['P19_ACTOCC15P_ILT2P'])

In [122]:
education = feature_constructor.iris_shares(iris=iris_subset, var_names=['P19_ACT_DIPLMIN','P19_ACT_BAC', 'P19_ACT_SUP2'])

In [123]:
unemployment = feature_constructor.iris_shares(iris=iris_subset, var_names=['P19_CHOM1524', 'P19_CHOM2554', 'P19_CHOM5564'])

In [124]:
retirees = feature_constructor.iris_shares(iris=iris_subset, var_names=['P19_RETR1564'])

In [125]:
nationality = feature_constructor.iris_shares(iris=iris_subset, var_names=['P19_POP_ETR'])

In [126]:
house_sizes = feature_constructor.iris_shares(iris=iris_subset, var_names=['P19_RP_M30M2', 'P19_RP_3040M2', 'P19_RP_4060M2', 'P19_RP_6080M2', 'P19_RP_80100M2', 'P19_RP_100120M2', 'P19_RP_120M2P'])

In [127]:
men = feature_constructor.iris_shares(iris=iris_subset, var_names=['P19_POPH'])

In [128]:
families = feature_constructor.iris_shares(iris=iris_subset, var_names=['P19_POP15P_MARIEE', 'C19_FAM'])

In [129]:
# services = feature_constructor.iris_shares(iris=iris_subset, var_names=['EQUIP_E107', 'EQUIP_E108', 'EQUIP_E109', 'EQUIP_A504'])

In [130]:
displacement_to_work = feature_constructor.iris_shares(iris=iris_subset, var_names=['C19_ACTOCC15P_MAR', 'C19_ACTOCC15P_TCOM', 'C19_ACTOCC15P_PAS', 'C19_ACTOCC15P_VELO', 'C19_ACTOCC15P_VOIT'])

In [134]:
session_expectation = session_distribution.compute_expectation_by_location()[['expectation']].rename(columns={'expectation': 'session_expectation'})

In [419]:
ebt = expected_bed_time.data['mean_float'].to_frame(name='expected_bed_time')

## Merge variables

In [135]:
data = pd.merge(iris_pop_density, session_expectation, left_index=True, right_index=True, how='inner')
data = pd.merge(data, iris_density_of_city, left_index=True, right_index=True, how='inner')
data = pd.merge(data, iris_centrality, left_index=True, right_index=True, how='inner')
data = pd.merge(data, income, left_index=True, right_index=True, how='inner')
data = pd.merge(data, iris_business_density + np.quantile(iris_business_density, 0.1), left_index=True, right_index=True, how='inner')
data = np.log(data)
data = pd.merge(data, age_shares, left_index=True, right_index=True, how='inner')
data = pd.merge(data, lonely_shares, left_index=True, right_index=True, how='inner')
data = pd.merge(data, shares_of_workers_with_far_work, left_index=True, right_index=True, how='inner')
data = pd.merge(data, education, left_index=True, right_index=True, how='inner')
data = pd.merge(data, unemployment, left_index=True, right_index=True, how='inner')
data = pd.merge(data, retirees, left_index=True, right_index=True, how='inner')
data = pd.merge(data, nationality, left_index=True, right_index=True, how='inner')
data = pd.merge(data, house_sizes, left_index=True, right_index=True, how='inner')
data = pd.merge(data, men, left_index=True, right_index=True, how='inner')
data = pd.merge(data, families, left_index=True, right_index=True, how='inner')
data = pd.merge(data, displacement_to_work, left_index=True, right_index=True, how='inner')
data.dropna(inplace=True)
data.shape, np.array(data.columns)

((2684, 37),
 array(['POP_DEN', 'session_expectation', 'density_of_city', 'centrality',
        'DEC_MED19', 'DEC_GI19', 'business_density', 'P19_POP1529',
        'P19_POP3044', 'P19_POP4559', 'P19_POP6074', 'P19_POP75P',
        'P19_POP15P_PSEUL', 'P19_ACTOCC15P_ILT2P', 'P19_ACT_DIPLMIN',
        'P19_ACT_BAC', 'P19_ACT_SUP2', 'P19_CHOM1524', 'P19_CHOM2554',
        'P19_CHOM5564', 'P19_RETR1564', 'P19_POP_ETR', 'P19_RP_M30M2',
        'P19_RP_3040M2', 'P19_RP_4060M2', 'P19_RP_6080M2',
        'P19_RP_80100M2', 'P19_RP_100120M2', 'P19_RP_120M2P', 'P19_POPH',
        'P19_POP15P_MARIEE', 'C19_FAM', 'C19_ACTOCC15P_MAR',
        'C19_ACTOCC15P_TCOM', 'C19_ACTOCC15P_PAS', 'C19_ACTOCC15P_VELO',
        'C19_ACTOCC15P_VOIT'], dtype=object))

## Run analysis and plot

In [140]:
x_axis = 'DEC_MED19'
features = data[[c for c in data.columns if c not in ['session_expectation', 'density_of_city']]]
labels = data['session_expectation'].to_frame()
regression = Regression(features=features['DEC_MED19'].to_frame(), labels=labels)
regression.plot(x_axis=x_axis, color=data['density_of_city'].to_frame())