In [1]:
import numpy as np
import pandas as pd
import geopandas as gpd
import insee
from shapely.geometry import Point

In [2]:
%load_ext autoreload
%autoreload 2

In [3]:
path = '/Users/andrea/Desktop/PhD/Projects/Current/NetMob/Data/BaseData'

In [4]:
bed_time_index = pd.read_csv(path + '/bed_time_index_insee_commune.csv', dtype={'insee_com':str})

In [5]:
insee_com = bed_time_index['insee_com'].unique()

In [6]:
disp_data = insee.commune.get_home_work_displacements(home_commune_ids=insee_com)
total_pop = disp_data.groupby(['home_insee_com']).agg({'count': 'sum'}).rename(columns={'count': 'total_pop'})

In [7]:
means_of_disp_data = pd.read_csv(path + '/admin_data_commune_means_of_displacement_to_work_and_income.csv', dtype={'commune':str})
means_of_disp_data['frac_car'] = means_of_disp_data['C19_ACTOCC15P_VOIT'] / means_of_disp_data['C19_ACTOCC15P']
frac_car = means_of_disp_data[['commune', 'frac_car']].rename(columns={'commune': 'home_insee_com'}).set_index('home_insee_com')
income = means_of_disp_data[['commune', 'DEC_MED19']].rename(columns={'commune': 'home_insee_com'}).set_index('home_insee_com')
income.rename(columns={'DEC_MED19': 'income'}, inplace=True)

In [8]:
work_to_home_dist = pd.read_csv(path + '/gmaps_distances_from_work_to_home.csv', dtype={'work_insee_com':str, 'home_insee_com':str})

In [9]:
disp_data.set_index(['home_insee_com'], inplace=True)
disp_data = disp_data.merge(frac_car, how='left', left_index=True, right_index=True)
disp_data = disp_data.merge(total_pop, how='left', left_index=True, right_index=True)

In [10]:
disp_data.reset_index(inplace=True)
disp_data.set_index(['home_insee_com', 'work_insee_com'], inplace=True)
work_to_home_dist.set_index(['home_insee_com', 'work_insee_com'], inplace=True)

In [11]:
disp_data = disp_data.merge(work_to_home_dist, how='left', left_index=True, right_index=True)

In [12]:
disp_data = disp_data[['home_insee_com_label', 'work_insee_com_label', 'count', 'duration_seconds', 'distance_meters', 'frac_car', 'total_pop']].copy()
disp_data['frac'] = disp_data['count'] / disp_data['total_pop']
disp_data = disp_data.loc[disp_data['frac'] > 0.005]

In [13]:
disp_data['duration_seconds'] = disp_data['duration_seconds'].fillna(5*60)
disp_data['distance_meters'] = disp_data['distance_meters'].fillna(1000)

In [14]:
disp_data['total_duration'] = disp_data['count'] * (1 - disp_data['frac_car']) * disp_data['duration_seconds'] + disp_data['count'] * disp_data['frac_car'] * ((disp_data['distance_meters'] /(30*1000))* 3600)

In [15]:
total_duration = disp_data.groupby(['home_insee_com']).agg({'total_duration': 'sum'})

In [16]:
average_duration = total_duration.div(total_pop['total_pop'].loc[total_duration.index], axis=0)

In [17]:
bed_time_index_and_avg_duration = average_duration.merge(bed_time_index.set_index('insee_com'), how='inner', left_index=True, right_index=True)

In [18]:
bed_time_index_and_avg_duration_income = bed_time_index_and_avg_duration.merge(income, how='inner', left_index=True, right_index=True)

In [19]:
bed_time_index_and_avg_duration_income.reset_index(inplace=True)

In [20]:
bed_time_index_and_avg_duration_income.to_csv(path + '/bed_time_index_avg_work_to_home_trip_duration_income_commune.csv', index=False)

In [21]:
import plotly.graph_objects as go
import statsmodels.api as sm

In [22]:
bed_time_index_and_avg_duration_income = bed_time_index_and_avg_duration_income.loc[bed_time_index_and_avg_duration_income['total_duration'] < 50*60]
bed_time_index_and_avg_duration_income['total_duration'] = bed_time_index_and_avg_duration_income['total_duration'] / 60
bed_time_index_and_avg_duration_income['income'] = np.log2(bed_time_index_and_avg_duration_income['income'])

In [47]:
def plot_with_regression(data, x_axis, y_axis, xaxis_title, yaxis_title):
    reg = sm.OLS(data[y_axis], sm.add_constant(data[x_axis])).fit()
    slope = reg.params[x_axis]
    intercept = reg.params['const']
    pvalue = reg.pvalues[x_axis]
    
    fig = go.Figure()
    trace = go.Scatter(x=data[x_axis], y=data[y_axis], mode='markers', name='Commune', marker=dict(color='black', size=8), showlegend=True)
    trace_reg = go.Scatter(x=data[x_axis], y=data[x_axis] * slope + intercept, mode='lines', line=dict(color='red'), name='Slope: {:.2f} (p-value: {:.2f})'.format(slope, pvalue))
    fig.add_trace(trace)
    fig.add_trace(trace_reg)
    fig.update_layout(title='Income vs work to home displacement', xaxis_title=xaxis_title, yaxis_title=yaxis_title, template='plotly_white', width=1000, height=650, font=dict(size=20, color='black'), legend=dict(font=dict(size=20)))
    fig.show(renderer='browser')
    

In [45]:
plot_with_regression(data=bed_time_index_and_avg_duration_income, x_axis='bed_time_index', y_axis='total_duration', xaxis_title='Bedtime index', yaxis_title='Duration of work to home displacement (minutes)')

In [48]:
plot_with_regression(data=bed_time_index_and_avg_duration_income, x_axis='income', y_axis='total_duration', xaxis_title='Log2 Median Income', yaxis_title='Duration of work to home displacement (minutes)')