In [None]:
import pandas as pd
import geopandas as gpd
from shapely.geometry import Point, Polygon
import numpy as np
from scipy.stats import ttest_ind
import glob
from tqdm import tqdm, trange
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import osmnx as ox
import seaborn as sns
import folium
import plotly.express as px
import plotly.graph_objects as go
import matplotlib.pyplot as plt
from scipy import stats
%matplotlib inline 

np.random.seed(0)

# Read IMOB results

In [None]:
data_IMOB = []
mean_circuity_IMOB = {
        'circuity_driving-car': [],
        'circuity_driving-hgv': [],
        'circuity_foot-walking': [],
        'circuity_foot-hiking': [],
        'circuity_cycling-regular': [],
        'circuity_cycling-road': [],
        'circuity_cycling-mountain': [],
        'circuity_cycling-electric': [],
    }


for i, data_file in enumerate(sorted(glob.glob('data/dist_time_lisbon_imob_*.csv'))):
    if 'circuity' in data_file:
        continue
    
    print('=====', data_file, '=====')
    df = pd.read_csv(data_file, index_col=0, skiprows=range(1,111515+1))

    df['circuity_driving-car'] = df['driving-car_dist'] / df['haversine_dist']/1000
    df['circuity_driving-hgv'] = df['driving-hgv_dist'] / df['haversine_dist']/1000
    df['circuity_foot-walking'] = df['foot-walking_dist'] / df['haversine_dist']/1000
    df['circuity_foot-hiking'] = df['foot-hiking_dist'] / df['haversine_dist']/1000
    df['circuity_cycling-regular'] = df['cycling-regular_dist'] / df['haversine_dist']/1000
    df['circuity_cycling-road'] = df['cycling-road_dist'] / df['haversine_dist']/1000
    df['circuity_cycling-mountain'] = df['cycling-mountain_dist'] / df['haversine_dist']/1000
    df['circuity_cycling-electric'] = df['cycling-electric_dist'] / df['haversine_dist']/1000

    mean_circuity_IMOB['circuity_driving-car'].append(df['circuity_driving-car'].mean(skipna=True))
    mean_circuity_IMOB['circuity_driving-hgv'].append(df['circuity_driving-hgv'].mean(skipna=True))
    mean_circuity_IMOB['circuity_foot-walking'].append(df['circuity_foot-walking'].mean(skipna=True))
    mean_circuity_IMOB['circuity_foot-hiking'].append(df['circuity_foot-hiking'].mean(skipna=True))
    mean_circuity_IMOB['circuity_cycling-regular'].append(df['circuity_cycling-regular'].mean(skipna=True))
    mean_circuity_IMOB['circuity_cycling-road'].append(df['circuity_cycling-road'].mean(skipna=True))
    mean_circuity_IMOB['circuity_cycling-mountain'].append(df['circuity_cycling-mountain'].mean(skipna=True))
    mean_circuity_IMOB['circuity_cycling-electric'].append(df['circuity_cycling-electric'].mean(skipna=True))
    
    if i == 0:
        drop_indices = np.random.choice(df.index, 11515, replace=False)
    
    df = df.drop(drop_indices)
    #df.to_csv(data_file[:-4]+'_circuity.csv')
    data_IMOB.append(df.reset_index(drop=True))

# Read random sampling (RS) results

In [None]:
data_RS = []
mean_circuity_RS = {
        'circuity_driving-car': [],
        'circuity_driving-hgv': [],
        'circuity_foot-walking': [],
        'circuity_foot-hiking': [],
        'circuity_cycling-regular': [],
        'circuity_cycling-road': [],
        'circuity_cycling-mountain': [],
        'circuity_cycling-electric': [],
    }

for data_file in sorted(glob.glob('data/dist_time_lisbon_*.csv')):
    if 'circuity' in data_file or 'imob' in data_file:
        continue

    print('=====', data_file, '=====')
    df = pd.read_csv(data_file, index_col=0)

    df['circuity_driving-car'] = df['driving-car_dist'] / df['haversine_dist']/1000
    df['circuity_driving-hgv'] = df['driving-hgv_dist'] / df['haversine_dist']/1000
    df['circuity_foot-walking'] = df['foot-walking_dist'] / df['haversine_dist']/1000
    df['circuity_foot-hiking'] = df['foot-hiking_dist'] / df['haversine_dist']/1000
    df['circuity_cycling-regular'] = df['cycling-regular_dist'] / df['haversine_dist']/1000
    df['circuity_cycling-road'] = df['cycling-road_dist'] / df['haversine_dist']/1000
    df['circuity_cycling-mountain'] = df['cycling-mountain_dist'] / df['haversine_dist']/1000
    df['circuity_cycling-electric'] = df['cycling-electric_dist'] / df['haversine_dist']/1000

    mean_circuity_RS['circuity_driving-car'].append(df['circuity_driving-car'].mean(skipna=True))
    mean_circuity_RS['circuity_driving-hgv'].append(df['circuity_driving-hgv'].mean(skipna=True))
    mean_circuity_RS['circuity_foot-walking'].append(df['circuity_foot-walking'].mean(skipna=True))
    mean_circuity_RS['circuity_foot-hiking'].append(df['circuity_foot-hiking'].mean(skipna=True))
    mean_circuity_RS['circuity_cycling-regular'].append(df['circuity_cycling-regular'].mean(skipna=True))
    mean_circuity_RS['circuity_cycling-road'].append(df['circuity_cycling-road'].mean(skipna=True))
    mean_circuity_RS['circuity_cycling-mountain'].append(df['circuity_cycling-mountain'].mean(skipna=True))
    mean_circuity_RS['circuity_cycling-electric'].append(df['circuity_cycling-electric'].mean(skipna=True))
    
    #df.to_csv(data_file[:-4]+'_circuity.csv')
    data_RS.append(df.reset_index(drop=True))

Rearrange data in dict format

In [None]:
years = [str(i) for i in range(2013, 2021)]

data_aux = {}
data_aux2 = {}

for i, year in enumerate(years):
    data_aux[year] = data_RS[i]
    data_aux2[year] = data_IMOB[i]

data_RS = data_aux
data_IMOB = data_aux2

### Plot mean circuity for different modes

In [None]:
fig = go.Figure()
for circuity in mean_circuity_IMOB.keys():
    fig.add_trace(go.Scatter(y=mean_circuity_IMOB[circuity],
                             x=[str(i) for i in range(2013, 2021)],
                             mode='lines+markers',
                             name=circuity.replace('circuity_',''),
                             ))
    fig.update_layout(
        title="IMOB Mean Circuity",
        xaxis_title="Years",
        yaxis_title="Circuity",
        legend_title="Mode",
        font=dict(
            family="Courier New, monospace",
            size=12,
            color="Black"
        ))

for circuity in mean_circuity_RS.keys():
    fig.add_trace(go.Scatter(y=mean_circuity_RS[circuity],
                             x=[str(i) for i in range(2013, 2021)],
                             mode='lines',
                             name=circuity.replace('circuity_',''),
                             marker=dict(
                                 symbol='x'
                             )
                             ))
fig.show()

In [None]:
#    '#1f77b4',  # muted blue
#    '#ff7f0e',  # safety orange
#    '#2ca02c',  # cooked asparagus green

colors = ['#636EFA',
'#EF553B',
'#00CC96',
'#AB63FA',
'#FFA15A',
'#19D3F3',
'#FF6692',
'#B6E880',
'#FF97FF',
'#FECB52']
fig = go.Figure()

circuities = ['circuity_driving-car', 'circuity_cycling-regular', 'circuity_foot-walking', ]

for aa, circuity in enumerate(circuities):
    fig.add_trace(
        go.Scatter(y=mean_circuity_IMOB[circuity],
                   x=[str(i) for i in range(2013, 2021)],
                   mode='lines+markers',
                   name='imob_'+circuity.replace('circuity_',''),
                   line=dict(color=colors[aa])
                  )
    )
    fig.update_layout(
        title="Mean Circuity",
        xaxis_title="Years",
        yaxis_title="Circuity",
        legend_title="Mode",
        font=dict(
            family="Times New Roman",
            size=18,
            color="Black"
        )
    )

for aa, circuity in enumerate(circuities):
    fig.add_trace(
        go.Scatter(y=mean_circuity_RS[circuity],
                   x=[str(i) for i in range(2013, 2021)],
                   mode='lines',
                   name='rs_'+circuity.replace('circuity_',''),
                   line=dict(color=colors[aa])
                  
                  )
                 )
    
fig.update_layout(legend=dict(
    orientation="h",
    yanchor="top",
    y=-0.2,
    xanchor="right",
    x=1
))
fig.show()

# Compute average circuity for different distances

In [None]:
data_dist_RS = {}
data_dist_IMOB = {}

years = [str(i) for i in range(2013, 2021)]
dist_thresholds = [[0, 1],
                   [1, 2],
                   [2, 8],
                   [8, 12],
                  # [4, 5],
                  # [5, 6],
                  # [6, 7],
                  # [7, 8],
                  # [8, 9],
                  # [9, 10],
                   [12, float('inf')]]

for year in years:
    data_dist_RS[year] = {}
    data_dist_IMOB[year] = {}
    
    for dist in dist_thresholds:
        data_dist_RS[year][str(dist[0])+'_'+str(dist[1])] = \
            data_RS[year][(data_RS[year]['haversine_dist']>=dist[0]) & (data_RS[year]['haversine_dist']<dist[1])]
        data_dist_IMOB[year][str(dist[0])+'_'+str(dist[1])] = \
            data_IMOB[year][(data_IMOB[year]['haversine_dist']>=dist[0]) & (data_IMOB[year]['haversine_dist']<dist[1])]        

### Distribution of trips per distance category

In [None]:
df_dist_distribution = pd.DataFrame(columns=['RS','IMOB'])


for dist in dist_thresholds: 
    key = str(dist[0])+'_'+str(dist[1])
    
    row = {}
    
    row['RS'] = data_dist_RS[year][key].shape[0]
    row['IMOB'] = data_dist_IMOB[year][key].shape[0]
    
    df_dist_distribution.loc[key] = row

distances=df_dist_distribution.index

fig = go.Figure(data=[
    go.Bar(name='IMOB', x=distances, y=df_dist_distribution.IMOB),
    go.Bar(name='RS', x=distances, y=df_dist_distribution.RS),
])

fig.update_layout(
    barmode='group',
    title="Distribution of trips per distance category",
    xaxis_title="Distance categories",
    yaxis_title="# of trips",
    legend_title="Mode",
    font=dict(
        family="Courier New, monospace",
        size=12,
        color="Black"
    ))

fig.show()

### Circuity per distance category per mode

In [None]:
modes = ['circuity_driving-car',
         #'circuity_driving-hgv',
         'circuity_foot-walking',
         #'circuity_foot-hiking',
         'circuity_cycling-regular',
         #'circuity_cycling-road',
         #'circuity_cycling-mountain',
         #'circuity_cycling-electric',
        ]
mean_circuity_dist_RS = {}
mean_circuity_dist_IMOB = {}

In [None]:
for dist in dist_thresholds: 
    key = str(dist[0])+'_'+str(dist[1])
    mean_circuity_dist_RS[key] = {}
    mean_circuity_dist_IMOB[key] = {} 
    
    for mode in modes:
        mean_circuity_dist_RS[key][mode] = []
        mean_circuity_dist_IMOB[key][mode] = []
        for year in years:
            mean_circuity_dist_RS[key][mode].append(data_dist_RS[year][key][mode].mean(skipna=True))  
            mean_circuity_dist_IMOB[key][mode].append(data_dist_IMOB[year][key][mode].mean(skipna=True))             

### Circuity per distance category

In [None]:
mean_circuity_dist_RS[key].keys()

In [None]:


for dist in dist_thresholds: 
    key = str(dist[0])+'_'+str(dist[1])
    
    fig = go.Figure()
    
    for circuity in reversed(['circuity_driving-car', 'circuity_foot-walking', 'circuity_cycling-regular']):
        fig.add_trace(go.Scatter(y=mean_circuity_dist_IMOB[key][circuity],
                                 x=years,
                            mode='lines+markers',
                            name='imob_'+circuity))
    for circuity in reversed(['circuity_driving-car', 'circuity_foot-walking', 'circuity_cycling-regular']):
        fig.add_trace(go.Scatter(y=mean_circuity_dist_RS[key][circuity],
                                 x=years,
                            mode='lines',
                            name='rs_'+circuity))
 
    fig.update_layout(
        title="Distance: "+key,
        xaxis_title="Years",
        yaxis_title="Circuity",
        legend_title="Transport Mode",
        font=dict(
            family="Times New Roman",
            size=18,
            color="Black"
        )
    )
    fig.update_layout(legend=dict(
        orientation="h",
        yanchor="top",
        y=-0.2,
        xanchor="right",
        x=1),
        yaxis_range=[1.2,3.2])
    fig.show()
    plt.show()

# ===================

In [None]:
dist_thresholds[:-1]

In [None]:
colors = ['#636EFA',
          '#EF553B',
          '#00CC96',
          '#11B1F5',
          '#EF979D',
          '#008796']

circuities = ['circuity_driving-car', 'circuity_cycling-regular', 'circuity_foot-walking', ]



for color_i, circuity in enumerate(circuities):
    fig = go.Figure()
    
    
    fig.add_trace(go.Scatter(y=mean_circuity_dist_IMOB['0_1'][circuity],
                             x=years,
                             mode='lines+markers',
                             name='MSS 0-1 km',
                             line=dict(color=colors[color_i]),
                             marker_symbol='x',
                             marker_size=10,
                            )
                 )
    fig.add_trace(go.Scatter(y=mean_circuity_dist_IMOB['1_2'][circuity],
                             x=years,
                             mode='lines+markers',
                             name='MSS 1-2 km',
                             line=dict(color=colors[color_i]),
                             marker_symbol='square',
                             marker_size=10,
                            )
                 )
    fig.add_trace(go.Scatter(y=mean_circuity_dist_IMOB['2_8'][circuity],
                             x=years,
                             mode='lines+markers',
                             name='MSS 2-8 km',
                             line=dict(color=colors[color_i]),
                             marker_symbol='triangle-up',
                             marker_size=10,
                            )
                 )
    fig.add_trace(go.Scatter(y=mean_circuity_dist_IMOB['8_12'][circuity],
                             x=years,
                             mode='lines+markers',
                             name='MSS 8-12 km',
                             line=dict(color=colors[color_i]),
                             marker_symbol='circle',
                             marker_size=10,
                            )
                 )
 
    fig.add_trace(go.Scatter(y=mean_circuity_dist_RS['0_1'][circuity],
                             x=years,
                             mode='lines+markers',
                             name='RS 0-1 km',
                             line=dict(color=colors[color_i+3]),
                             marker_symbol='x',
                             marker_size=10,
                            )
                 )
    fig.add_trace(go.Scatter(y=mean_circuity_dist_RS['1_2'][circuity],
                             x=years,
                             mode='lines+markers',
                             name='RS 1-2 km',
                             line=dict(color=colors[color_i+3]),
                             marker_symbol='square',
                             marker_size=10,
                            )
                 )
    fig.add_trace(go.Scatter(y=mean_circuity_dist_RS['2_8'][circuity],
                             x=years,
                             mode='lines+markers',
                             name='RS 2-8 km',
                             line=dict(color=colors[color_i+3]),
                             marker_symbol='triangle-up',
                             marker_size=10,
                            )
                 )
    fig.add_trace(go.Scatter(y=mean_circuity_dist_RS['8_12'][circuity],
                             x=years,
                             mode='lines+markers',
                             name='RS 8-12 km',
                             line=dict(color=colors[color_i+3]),
                             marker_symbol='circle',
                             marker_size=10,
                            )
                 )
        



     
        
    fig.update_layout(
        title="Mode: "+circuity,
        xaxis_title="Years",
        yaxis_title="Circuity",
        legend_title="",
        font=dict(
            family="Times New Roman",
            size=18,
            color="Black"
        )
    )
    #fig.update_layout(legend=dict(
    #    orientation="h",
    #    yanchor="top",
    #    y=-0.2,
    #    xanchor="right",
    #    x=1),
    #    yaxis_range=[1.2,3.2])
    fig.show()
    plt.show()

In [None]:
colors = ['#636EFA', '#EF553B', '#00CC96', '#AB63FA', '#FFA15A', '#19D3F3', '#FF6692', '#B6E880', '#FF97FF', '#FECB52']

for dist in dist_thresholds: 
    key = str(dist[0])+'_'+str(dist[1])
    
    fig = go.Figure()
    
    for circuity in reversed(['circuity_driving-car', 'circuity_foot-walking', 'circuity_cycling-regular']):
        fig.add_trace(go.Scatter(y=mean_circuity_dist_IMOB[key][circuity],
                                 x=years,
                                 mode='lines+markers',
                                 name='mss_'+circuity,
                                 line=dict(color=colors[aa]))
    for circuity in reversed(['circuity_driving-car', 'circuity_foot-walking', 'circuity_cycling-regular']):
        fig.add_trace(go.Scatter(y=mean_circuity_dist_RS[key][circuity],
                                 x=years,
                                 mode='lines',
                                 name='rs_'+circuity,
                                 line=dict(color=colors[aa]))
 
    fig.update_layout(
        title="Distance: "+key,
        xaxis_title="Years",
        yaxis_title="Circuity",
        legend_title="Transport Mode",
        font=dict(
            family="Times New Roman",
            size=18,
            color="Black"
        )
    )
    fig.update_layout(legend=dict(
        orientation="h",
        yanchor="top",
        y=-0.2,
        xanchor="right",
        x=1),
        yaxis_range=[1.2,3.2])
    fig.show()
    plt.show()
    break

# ============

### Circuity per mode

In [None]:
for mode in modes:
    fig = go.Figure()
    
    for dist in mean_circuity_dist_RS.keys():
        fig.add_trace(go.Scatter(y=mean_circuity_dist_RS[dist][mode],
                                 x=years,
                            mode='lines',
                            name='rs_'+dist))
        
    for dist in mean_circuity_dist_IMOB.keys():
        fig.add_trace(go.Scatter(y=mean_circuity_dist_IMOB[dist][mode],
                                 x=years,
                            mode='lines+markers',
                            name='imob_'+dist))
        
    fig.update_layout(
        title="Mode: "+mode,
        xaxis_title="Years",
        yaxis_title="Circuity",
        legend_title="Distances",
        font=dict(
            family="Courier New, monospace",
            size=12,
            color="Black"
        )
    )
    fig.update_layout(legend=dict(
        orientation="h",
        yanchor="top",
        y=-0.2,
        xanchor="right",
        x=1
    ))
        
    fig.show()

# Look at distribution of points per parish

In [None]:
gdf_freguesias = gpd.read_file("Lisboa_Freguesias/Lisboa_Freguesias_CAOP2015_TM06.shp")
gdf_freguesias = gdf_freguesias.to_crs(epsg=4326)
gdf_freguesias.geometry.index = gdf_freguesias['DICOFRE']

### Map of parishes

In [None]:
map = folium.Map([38.748662, -9.145801],
                 zoom_start=12,
                 tiles='cartodbpositron')
style_or = {'fillColor': '#F8C290', 'color': '#F8C290'}
unique_zones = gdf_freguesias['DICOFRE'].unique()


for i, zone in enumerate(unique_zones):
    mask = gdf_freguesias['DICOFRE'] == zone

    example_or = gdf_freguesias.loc[mask]
    folium.GeoJson(example_or,name='polygon_or',style_function=lambda x:style_or).add_to(map)

map

### Read/Associate data with parishes

In [None]:
read_data_points = True

In [None]:
if read_data_points:
    df_lisbon = pd.read_csv('data/df_lisbon.csv', index_col=0)
    
else:
    df = pd.read_csv('IMOB/15.1_IMOB/BASE DADOS/AML/CSV/TBL_AML/TBL_viagens_OR_DE_AML.csv', sep=';')
    df = df[df['DTCC_or11'].notna()]
    df = df[df['DTCC_de11'].notna()]
    df['DTCC_or11'] = df['DTCC_or11'].astype('int64')
    df['FR_or11'] = df['FR_or11'].astype('int64')
    df['Sec_or11'] = df['Sec_or11'].astype('int64')
    df['SS_or11'] = df['SS_or11'].astype('int64')
    df['DTCC_de11'] = df['DTCC_de11'].astype('int64')
    df['FR_de11'] = df['FR_de11'].astype('int64')
    df['Sec_de11'] = df['Sec_de11'].astype('int64')
    df['SS_de11'] = df['SS_de11'].astype('int64')
    df['Tipo_veiculo_2'] = df['Tipo_veiculo_2'].astype('category')

    print('Original IMOB data shape: ', df.shape)

    df['BRI11_or'] = df['DTCC_or11'].astype('str').str.zfill(4) + \
                     df['FR_or11'].astype('str').str.zfill(2) + \
                     df['Sec_or11'].astype('str').str.zfill(3) + \
                     df['SS_or11'].astype('str').str.zfill(2)
    df['BRI11_de'] = df['DTCC_de11'].astype('str').str.zfill(4) + \
                     df['FR_de11'].astype('str').str.zfill(2) + \
                     df['Sec_de11'].astype('str').str.zfill(3) + \
                     df['SS_de11'].astype('str').str.zfill(2)
    df['BRI11_or'] = df['BRI11_or'].astype('int64')
    df['BRI11_de'] = df['BRI11_de'].astype('int64')

    mask_lisboa = (df['DTCC_or11'] == 1106) & (df['DTCC_de11'] == 1106)
    df = df.loc[mask_lisboa]

    df_lisbon = df[['BRI11_or','BRI11_de']].copy()
    df_lisbon = df_lisbon.loc[df_lisbon.index.repeat([5]*df_lisbon.shape[0])].reset_index(drop=True)

    df_lisbon = df_lisbon.drop(drop_indices-111515).reset_index(drop=True)

    df_lisbon.to_csv('data/df_lisbon.csv')

In [None]:
if read_data_points:
    df_IMOB_points = pd.read_csv('data/df_IMOB_points.csv', index_col=0)
    
else:
    gdf_BGRI11 = gpd.read_file("IMOB/lisboa2011/BGRI11_LISBOA.shp")

    gdf_BGRI11['DTMN11'] = gdf_BGRI11['DTMN11'].astype('int64')
    gdf_BGRI11['FR11'] = gdf_BGRI11['FR11'].astype('int64')
    gdf_BGRI11['SEC11'] = gdf_BGRI11['SEC11'].astype('int64')
    gdf_BGRI11['SS11'] = gdf_BGRI11['SS11'].astype('int64')
    gdf_BGRI11['BGRI11'] = gdf_BGRI11['BGRI11'].astype('int64')
    gdf_BGRI11['LUG11'] = gdf_BGRI11['LUG11'].astype('int64')

    gdf_proj = gdf_BGRI11.to_crs(epsg=4326)

    # Extract only Lisbon's municipality and not the metropolitan area
    mask_lisboa = (gdf_proj['DTMN11'] == 1106) & (gdf_proj['DTMN11'] == 1106)
    gdf_proj = gdf_proj.loc[mask_lisboa]
    
    
    df_IMOB_points = data_IMOB['2020'][['point_A', 'point_B']].copy()
    df_IMOB_points['BRI11_or'] = df_lisbon['BRI11_or']
    df_IMOB_points['BRI11_de'] = df_lisbon['BRI11_de']
    df_IMOB_points['BRI11_total'] = df_IMOB_points['BRI11_or'].astype('str') + df_IMOB_points['BRI11_de'].astype('str')

    df_IMOB_points['freguesia_or'] = ''
    df_IMOB_points['freguesia_de'] = ''

    # Discover in what parish is point_A -> Origin parish
    polys = gdf_freguesias.geometry
    _pnts = pd.DataFrame(df_IMOB_points['point_A'].str[1:-1].str.split(', ').to_list()).astype('float')
    pnts = gpd.GeoDataFrame(
        _pnts, geometry=gpd.points_from_xy(_pnts[0], _pnts[1]))
    pnts = pnts.assign(**{key: pnts.within(geom) for key, geom in polys.items()})
    df_IMOB_points['freguesia_or'] = pnts[['110660','110639','110608','110621','110654','110611','110661','110665','110658','110667','110659','110601','110602','110666','110656','110663','110607','110610','110657','110655','110618','110633','110664','110662']].idxmax(1).to_frame('freguesia_or')

    # Discover in what parish is point_B -> Destination parish
    _pnts = pd.DataFrame(df_IMOB_points['point_B'].str[1:-1].str.split(', ').to_list()).astype('float')
    pnts = gpd.GeoDataFrame(
        _pnts, geometry=gpd.points_from_xy(_pnts[0], _pnts[1]))
    pnts = pnts.assign(**{key: pnts.within(geom) for key, geom in polys.items()})
    df_IMOB_points['freguesia_de'] = pnts[['110660','110639','110608','110621','110654','110611','110661','110665','110658','110667','110659','110601','110602','110666','110656','110663','110607','110610','110657','110655','110618','110633','110664','110662']].idxmax(1).to_frame('freguesia_de')
    df_IMOB_points['freguesia_total'] = df_IMOB_points['freguesia_or'].astype('str') + df_IMOB_points['freguesia_de'].astype('str')
 
    # Save IMOB points to a file
    df_IMOB_points.to_csv('data/df_IMOB_points.csv')
     

In [None]:
if read_data_points:
    df_RS_points = pd.read_csv('data/df_RS_points.csv', index_col=0)
    
else:
    ## TODO CODE BELOW IS IMCOMPLETE AND NOT VERY EFFICIENT. NEEDS FIXING
    df_RS_points = data_RS['2020'][['point_A', 'point_B']].copy()
    df_RS_points['BRI11_or'] = ''
    df_RS_points['BRI11_de'] = ''

    with tqdm(total=df_RS_points.shape[0]) as t:
        t.set_description('Looking at points')
        for i, row in df_RS_points.iterrows():
            t.update(1)

            point_or = Point(float(row['point_A'][1:-1].split(", ")[0]), float(row['point_A'][1:-1].split(", ")[1]))
            point_de = Point(float(row['point_B'][1:-1].split(", ")[0]), float(row['point_B'][1:-1].split(", ")[1]))

            found_or = False
            found_de = False
            for j, row_gdf in gdf_proj.iterrows():
                polygon = row_gdf.geometry

                if not found_or and polygon.contains(point_or):
                    found_or = True
                    row['BRI11_or'] = row_gdf['BGRI11']
                if not found_de and polygon.contains(point_or):
                    found_de = True
                    row['BRI11_de'] = row_gdf['BGRI11']    

                if found_or and found_de:
                    break
                 
    df_RS_points['BRI11_total'] = df_RS_points['BRI11_or'].astype('str') + df_RS_points['BRI11_de'].astype('str')
    
    polys = gdf_freguesias.geometry

    _pnts = pd.DataFrame(df_RS_points['point_A'].str[1:-1].str.split(', ').to_list()).astype('float')
    pnts = gpd.GeoDataFrame(
        _pnts, geometry=gpd.points_from_xy(_pnts[0], _pnts[1]))
    pnts = pnts.assign(**{key: pnts.within(geom) for key, geom in polys.items()})
    df_RS_points['freguesia_or'] = pnts[['110660','110639','110608','110621','110654','110611','110661','110665','110658','110667','110659','110601','110602','110666','110656','110663','110607','110610','110657','110655','110618','110633','110664','110662']].idxmax(1).to_frame('freguesia_or')
   
    _pnts = pd.DataFrame(df_RS_points['point_B'].str[1:-1].str.split(', ').to_list()).astype('float')
    pnts = gpd.GeoDataFrame(
        _pnts, geometry=gpd.points_from_xy(_pnts[0], _pnts[1]))
    pnts = pnts.assign(**{key: pnts.within(geom) for key, geom in polys.items()})
    df_RS_points['freguesia_de'] = pnts[['110660','110639','110608','110621','110654','110611','110661','110665','110658','110667','110659','110601','110602','110666','110656','110663','110607','110610','110657','110655','110618','110633','110664','110662']].idxmax(1).to_frame('freguesia_de')
   
    df_RS_points['freguesia_total'] = df_RS_points['freguesia_or'].astype('str') + df_RS_points['freguesia_de'].astype('str')

    df_RS_points.to_csv('data/df_RS_points.csv') 

# Look at trips RS vs. IMOB

In [None]:
#df = pd.read_csv('Data/IMOB/15.1_IMOB/BASE DADOS/AML/CSV/TBL_AML/TBL_viagens_OR_DE_AML.csv', sep=';')
#df = df[df['DTCC_or11'].notna()]
#df = df[df['DTCC_de11'].notna()]
#df['DTCC_or11'] = df['DTCC_or11'].astype('int64')
#df['FR_or11'] = df['FR_or11'].astype('int64')
#df['Sec_or11'] = df['Sec_or11'].astype('int64')
#df['SS_or11'] = df['SS_or11'].astype('int64')
#df['DTCC_de11'] = df['DTCC_de11'].astype('int64')
#df['FR_de11'] = df['FR_de11'].astype('int64')
#df['Sec_de11'] = df['Sec_de11'].astype('int64')
#df['SS_de11'] = df['SS_de11'].astype('int64')
#df['Tipo_veiculo_2'] = df['Tipo_veiculo_2'].astype('category')
#
#print('Original IMOB data shape: ', df.shape)
#
#### Compute BRI for OD trips
#
#df['BRI11_or'] = df['DTCC_or11'].astype('str').str.zfill(4) + \
#                 df['FR_or11'].astype('str').str.zfill(2) + \
#                 df['Sec_or11'].astype('str').str.zfill(3) + \
#                 df['SS_or11'].astype('str').str.zfill(2)
#df['BRI11_de'] = df['DTCC_de11'].astype('str').str.zfill(4) + \
#                 df['FR_de11'].astype('str').str.zfill(2) + \
#                 df['Sec_de11'].astype('str').str.zfill(3) + \
#                 df['SS_de11'].astype('str').str.zfill(2)
#df['BRI11_or'] = df['BRI11_or'].astype('int64')
#df['BRI11_de'] = df['BRI11_de'].astype('int64')
#
#### Filter for Lisbon municipaly instead of metropolitan area
#
#mask_lisboa = (df['DTCC_or11'] == 1106) & (df['DTCC_de11'] == 1106)
#df = df.loc[mask_lisboa]
#print('IMOB Lisbon data shape: ', df.shape)
#
#df_lisbon = df[['BRI11_or','BRI11_de']].copy()
#df_lisbon = df_lisbon.loc[df_lisbon.index.repeat([5]*df_lisbon.shape[0])].reset_index(drop=True)

### How different are origin points

In [None]:
merged = pd.merge(
    df_RS_points['BRI11_or'].value_counts(normalize=True).to_frame(),
    df_IMOB_points['BRI11_or'].value_counts(normalize=True).to_frame(),
    left_index=True,
    right_index=True)

# two-sided Kolmogorov-Smirnov test
stats.ks_2samp(merged.BRI11_or_x, merged.BRI11_or_y)
# If p-value significant, then you reject the hypothesis that both distributions are the same

In [None]:
merged['min'] = merged.min(axis=1)
print('Matching origin zones:', merged.sum()['min'] )

### How diffferent are destination points

In [None]:
merged = pd.merge(
    df_RS_points['BRI11_de'].value_counts(normalize=True).to_frame(),
    df_IMOB_points['BRI11_de'].value_counts(normalize=True).to_frame(),
    left_index=True,
    right_index=True)

# two-sided Kolmogorov-Smirnov test
stats.ks_2samp(merged.BRI11_de_x, merged.BRI11_de_y)
# If p-value significant, then you reject the hypothesis that both distributions are the same

In [None]:
merged['min'] = merged.min(axis=1)
print('Matching destination zones:', merged.sum()['min'] )

### How different are origin+destination points

In [None]:
merged = pd.merge(
    df_RS_points['BRI11_total'].value_counts(normalize=True).to_frame(),
    df_IMOB_points['BRI11_total'].value_counts(normalize=True).to_frame(),
    left_index=True,
    right_index=True)

# two-sided Kolmogorov-Smirnov test
stats.ks_2samp(merged.BRI11_total_x, merged.BRI11_total_y)
# If p-value significant, then you reject the hypothesis that both distributions are the same

In [None]:
merged['min'] = merged.min(axis=1)
print('Matching origin+destination zones:', merged.sum()['min'] )

# Compare OD for RS & IMOB for Lisbon's parishes

Set up some variables

In [None]:
gdf_freguesias['Freguesia'].index = gdf_freguesias['DICOFRE'].astype('str')

df_IMOB_points.index = df_IMOB_points.index.astype('int64')

imob_or = df_IMOB_points.freguesia_or.value_counts()
imob_or.index = imob_or.index.astype('int64')
imob_de = df_IMOB_points.freguesia_de.value_counts()
imob_de.index = imob_de.index.astype('int64')
rs_or = df_RS_points.freguesia_or.value_counts()
rs_or.index = rs_or.index.astype('int64')
rs_de = df_RS_points.freguesia_de.value_counts()
rs_de.index = rs_de.index.astype('int64')

### Origin


In [None]:
df_points = pd.DataFrame()
df_points['imob_or'] = imob_or
df_points['rs_or'] = rs_or

x_axis = [gdf_freguesias['Freguesia'][str(x)] for x in df_points.index]

fig = go.Figure(data=[
    go.Bar(name='IMOB', x=x_axis, y=df_points.imob_or),
    go.Bar(name='RS', x=x_axis, y=df_points.rs_or),
])

fig.update_layout(
    barmode='group',
    title="Distribution of trips per origin parish",
    xaxis_title="Origin Parish",
    yaxis_title="# of trips",
    legend_title="Type",
    font=dict(
        family="Courier New, monospace",
        size=12,
        color="Black"
    ))

fig.show()

### Destination


In [None]:
df_points = pd.DataFrame()
df_points['imob_de'] = imob_or
df_points['rs_de'] = rs_or

x_axis = [gdf_freguesias['Freguesia'][str(x)] for x in df_points.index]

fig = go.Figure(data=[
    go.Bar(name='IMOB', x=x_axis, y=df_points.imob_de),
    go.Bar(name='RS', x=x_axis, y=df_points.rs_de),
])

fig.update_layout(
    barmode='group',
    title="Distribution of trips per destination parish",
    xaxis_title="Destination Parish",
    yaxis_title="# of trips",
    legend_title="Type",
    font=dict(
        family="Courier New, monospace",
        size=12,
        color="Black"
    ))

fig.show()

In [None]:
df_IMOB_points['freguesia_total'] = df_IMOB_points['freguesia_or'].astype('str') + df_IMOB_points['freguesia_de'].astype('str')

In [None]:
imob_total = df_IMOB_points.freguesia_total.value_counts()
imob_total.index = imob_total.index.astype('int64')
rs_total = df_RS_points.freguesia_total.value_counts()
rs_total.index = rs_total.index.astype('int64')

df_points = pd.DataFrame()
df_points['imob_total'] = imob_total
df_points['rs_total'] = rs_total
df_points['index'] = df_points.index


In [None]:
fig, ax1 = plt.subplots(figsize=(100, 50))
tidy = df_points.melt(id_vars='index')
chart = sns.barplot(x='index', y='value', hue='variable', data=tidy, ax=ax1)
sns.despine(fig)
chart.set_xticklabels(chart.get_xticklabels(), rotation=45, horizontalalignment='right');
#chart.set_xticklabels([gdf_freguesias['Freguesia'][str(x)] for x in df_points.sort_index().index]);
chart.set(xlabel="Destination Parish", ylabel = "Count %");

## Trips per parish density

### Origin

In [None]:
df_points = pd.DataFrame()
df_points['imob_or'] = imob_or
df_points['rs_or'] = rs_or
df_points['imob_rs_or_diff'] = 0
df_points['area'] = 0
df_points['dens_imob_area'] = 0
df_points['dens_rs_area'] = 0
df_points['dens_diff'] = 0

gdf_freguesias['AREA_T_Ha'].index = gdf_freguesias['DICOFRE'].astype('str')


for index, freguesia in df_points.iterrows():
    df_points['area'].loc[index] = gdf_freguesias['AREA_T_Ha'].loc[str(index)]
    df_points['dens_imob_area'].loc[index] = df_points['imob_or'].loc[index] / df_points['area'].loc[index]
    df_points['dens_rs_area'].loc[index] = df_points['rs_or'].loc[index] / df_points['area'].loc[index]
    
    df_points['imob_rs_or_diff'].loc[index] = df_points['imob_or'].loc[index] - df_points['rs_or'].loc[index]
    df_points['dens_diff'].loc[index] = df_points['dens_imob_area'].loc[index] - df_points['dens_rs_area'].loc[index]
    

df_points = df_points.sort_values(by=['dens_imob_area'], ascending=False)


x_axis = [gdf_freguesias['Freguesia'][str(x)] for x in df_points.index]

fig = go.Figure(data=[
    go.Bar(name='IMOB', x=x_axis, y=df_points.dens_imob_area),
    go.Bar(name='RS', x=x_axis, y=df_points.dens_rs_area),
])

fig.update_layout(
    barmode='group',
    title="Density of origin trips per parish area",
    xaxis_title="Origin Parish",
    yaxis_title="Number of trips per parish area",
    legend_title="Type",
    font=dict(
         family="Times New Roman",
         size=18,
         color="Black"
    ))
fig.update_xaxes(tickangle=-45)
fig.update_xaxes(
        ticktext=['Arroios',
                  'Av. Novas',
                  'Misericórdia',
                  'Sto. António',
                  'Areeiro',
                  'Sta. Maria Maior',
                  'Campo de Ourique',
                  'Alvalade',
                  'S. Vicente',
                  'Penha de França',
                  'Campolide',
                  'Estrela',
                  'S. Domingos de Benfica',
                  'Belém',
                  'Lumiar',
                  'Ajuda',
                  'Alcântara',
                  'Parque das Nações',
                  'Carnide',
                  'Olivais',
                  'Beato',
                  'Benfica',
                  'Marvila',
                  'Santa Clara'],
        tickvals=['Arroios',
                  'Avenidas Novas',
                  'Miseric',
                  'Santo Ant',
                  'Areeiro',
                  'Santa Maria Maior',
                  'Campo de Ourique',
                  'Alvalade',
                  'S1o Vicente',
                  'Penha de Fran',
                  'Campolide',
                  'Estrela',
                  'S1o Domingos de Benfica',
                  'Bel6m',
                  'Lumiar',
                  'Ajuda',
                  'Alc6ntara',
                  'Parque das Nas',
                  'Carnide',
                  'Olivais',
                  'Beato',
                  'Benfica',
                  'Marvila',
                  'Santa Clara'])  
fig.show()

In [None]:
df_points = df_points.sort_values(by=['dens_diff'], ascending=False)


x_axis = [gdf_freguesias['Freguesia'][str(x)] for x in df_points.index]

fig = go.Figure(data=[
    go.Bar(name='IMOB-RS', x=x_axis, y=df_points.dens_diff),
])

fig.update_layout(
    barmode='group',
    title="IMOB-RS Diff density of origin trips per parish area",
    xaxis_title="Origin Parish",
    yaxis_title="# of trips/Parish Area",
    legend_title="Type",
    font=dict(
        family="Courier New, monospace",
        size=12,
        color="Black"
    ))

fig.show()

In [None]:
fig = go.Figure()

# Add traces
fig.add_trace(
    go.Scatter(x=df_points.area, 
               y=df_points.imob_rs_or_diff,
               mode='markers',
               text=x_axis,
               marker_color=df_points.imob_rs_or_diff,
               marker=dict(
                    size=16,
                    color=df_points.imob_rs_or_diff, #set color equal to a variable
                    colorscale='Viridis', # one of plotly colorscales
                    showscale=True
                )
                        ))

fig.update_layout(
    barmode='group',
    title="IMOB-RS Diff over parish area",
    xaxis_title="Area",
    yaxis_title="IMOB trips - RS trips",
    legend_title="Diff",
    font=dict(
        family="Courier New, monospace",
        size=12,
        color="Black"
    ))

fig.show()

### Destination

In [None]:
x_axis

In [None]:
df_points = pd.DataFrame()
df_points['imob_de'] = imob_de
df_points['rs_de'] = rs_de
df_points['imob_rs_de_diff'] = 0
df_points['area'] = 0
df_points['dens_imob_area'] = 0
df_points['dens_rs_area'] = 0
df_points['dens_diff'] = 0

gdf_freguesias['AREA_T_Ha'].index = gdf_freguesias['DICOFRE'].astype('str')


for index, freguesia in df_points.iterrows():
    df_points['area'].loc[index] = gdf_freguesias['AREA_T_Ha'].loc[str(index)]
    df_points['dens_imob_area'].loc[index] = df_points['imob_de'].loc[index] / df_points['area'].loc[index]
    df_points['dens_rs_area'].loc[index] = df_points['rs_de'].loc[index] / df_points['area'].loc[index]
    
    df_points['imob_rs_de_diff'].loc[index] = df_points['imob_de'].loc[index] - df_points['rs_de'].loc[index]
    df_points['dens_diff'].loc[index] = df_points['dens_imob_area'].loc[index] - df_points['dens_rs_area'].loc[index]
    

df_points = df_points.sort_values(by=['dens_imob_area'], ascending=False)


x_axis = [gdf_freguesias['Freguesia'][str(x)] for x in df_points.index]

fig = go.Figure(data=[
    go.Bar(name='MSS', x=x_axis, y=df_points.dens_imob_area),
    go.Bar(name='RS', x=x_axis, y=df_points.dens_rs_area),
])

fig.update_layout(
    barmode='group',
    title="Density of destination trips per parish area",
    xaxis_title="Origin's Parish",
    yaxis_title="# of trips / Parish Area",
    legend_title="Type",
    autosize=False,
    width=900,
    height=700,
    margin=dict(
        l=50,
        r=50,
        b=100,
        t=100,
        pad=4
    ),
    font=dict(
        family="Times New Roman",
        size=18,
        color="Black"
    ))
fig.update_xaxes(tickangle=-45)
fig.update_xaxes(
    ticktext=['Arroios',
              'Av. Novas',
              'Misericórdia',
              'Sto. António',
              'Areeiro',
              'Sta. Maria Maior',
              'Campo de Ourique',
              'Alvalade',
              'S. Vicente',
              'Penha de França',
              'Campolide',
              'Estrela',
              'S. Domingos de Benfica',
              'Belém',
              'Lumiar',
              'Ajuda',
              'Alcântara',
              'Parque das Nações',
              'Carnide',
              'Olivais',
              'Beato',
              'Benfica',
              'Marvila',
              'Santa Clara'],
tickvals=['Arroios',
 'Avenidas Novas',
 'Miseric',
 'Santo Ant',
 'Areeiro',
 'Santa Maria Maior',
 'Campo de Ourique',
 'Alvalade',
 'S1o Vicente',
 'Penha de Fran',
 'Campolide',
 'Estrela',
 'S1o Domingos de Benfica',
 'Bel6m',
 'Lumiar',
 'Ajuda',
 'Alc6ntara',
 'Parque das Nas',
 'Carnide',
 'Olivais',
 'Beato',
 'Benfica',
 'Marvila',
 'Santa Clara'])
fig.show()

In [None]:
df_points = df_points.sort_values(by=['dens_diff'], ascending=False)


x_axis = [gdf_freguesias['Freguesia'][str(x)] for x in df_points.index]

fig = go.Figure(data=[
    go.Bar(name='IMOB-RS', x=x_axis, y=df_points.dens_diff),
])

fig.update_layout(
    barmode='group',
    title="IMOB-RS Diff density of destination trips per parish area",
    xaxis_title="Origin Parish",
    yaxis_title="# of trips/Parish Area",
    legend_title="Type",
    font=dict(
        family="Courier New, monospace",
        size=12,
        color="Black"
    ))

fig.show()

# Analyze circuity for the different parishes

In [None]:
mask_same_parish = df_IMOB_points['freguesia_or'] == df_IMOB_points['freguesia_de']

In [None]:
print('Median haversine distance for trips inside the same parish: {:.3f}km'.format(data_IMOB['2020'].loc[mask_same_parish]['haversine_dist'].median()))

In [None]:
print('Median haversine distance for trips in between parishes: {:.3f}km'.format(data_IMOB['2020'].loc[~mask_same_parish]['haversine_dist'].median()))

In [None]:
parishes_or = {}
parishes_de = {}

for index, parish in gdf_freguesias['Freguesia'].items():
    mask_parish = df_IMOB_points['freguesia_or'] == int(index)    
    parishes_or[index] = df_IMOB_points.loc[mask_parish]
    
    mask_parish = df_IMOB_points['freguesia_de'] == int(index)
    parishes_de[index] = df_IMOB_points.loc[mask_parish]

for key, value in parishes_or.items():
    print('[O] Parish', gdf_freguesias['Freguesia'][str(key)], 'with', value.shape[0], 'samples.')
    print('[D] Parish', gdf_freguesias['Freguesia'][str(key)], 'with', parishes_de[key].shape[0], 'samples.')

In [None]:
columns = ['freguesia_code',
           'freguesia',
           'haversine_dist_mean',
           'haversine_dist_median',
           '2020_d-all_cycling_circuity_mean',
           '2020_d-all_cycling_circuity_median',
           '2020_d-all_driving_circuity_mean',
           '2020_d-all_driving_circuity_median',
           '2020_d-all_walking_circuity_mean',
           '2020_d-all_walking_circuity_median',           
          ]

years = [str(i) for i in range(2013, 2021)]
dist_thresholds = [[0, 1],
                   [1, 2],
                   [2, 8],
                   [8, 12],
                   [12, float('inf')]]
modes = ['cycling', 'driving', 'walking']
metrics = ['mean', 'median']
for i in years:
    for j in dist_thresholds:
        for k in modes:
            for h in metrics:
                column = i + '_d-'+str(j[0])+'-'+str(j[1])+'_'+k+'_circuity_'+h
                columns.append(column)

df_O_parishes = pd.DataFrame(columns=columns)

for key, value in parishes_or.items():
    data_row = {}
    mask_parish = df_IMOB_points['freguesia_or'] == int(key)

    
    data_row['freguesia'] = gdf_freguesias['Freguesia'][str(key)]
    data_row['freguesia_code'] = key
    data_row['haversine_dist_mean'] = data_IMOB[year].loc[mask_parish]['haversine_dist'].mean()
    data_row['haversine_dist_median'] = data_IMOB[year].loc[mask_parish]['haversine_dist'].median()
    
    for year in years:
        data_IMOB_year = data_IMOB[year].loc[mask_parish]
        
        data_row[year+'_d-all_cycling_circuity_mean'] = data_IMOB_year['circuity_cycling-regular'].mean()
        data_row[year+'_d-all_cycling_circuity_median'] = data_IMOB_year['circuity_cycling-regular'].median()    
        data_row[year+'_d-all_driving_circuity_mean'] = data_IMOB_year['circuity_driving-car'].mean()
        data_row[year+'_d-all_driving_circuity_median'] = data_IMOB_year['circuity_driving-car'].median()     
        data_row[year+'_d-all_walking_circuity_mean'] = data_IMOB_year['circuity_foot-walking'].mean()
        data_row[year+'_d-all_walking_circuity_median'] = data_IMOB_year['circuity_foot-walking'].median() 

        for dist in dist_thresholds:
            mask_dist = (data_IMOB_year['haversine_dist']>=dist[0]) & (data_IMOB_year['haversine_dist']<dist[1])
            
            data_IMOB_year_dist = data_IMOB_year.loc[mask_dist]
            
            data_row[year+'_d-'+str(dist[0])+'-'+str(dist[1])+'_cycling_circuity_mean']   = data_IMOB_year_dist['circuity_cycling-regular'].mean()
            data_row[year+'_d-'+str(dist[0])+'-'+str(dist[1])+'_cycling_circuity_median'] = data_IMOB_year_dist['circuity_cycling-regular'].median()    
            data_row[year+'_d-'+str(dist[0])+'-'+str(dist[1])+'_driving_circuity_mean']   = data_IMOB_year_dist['circuity_driving-car'].mean()
            data_row[year+'_d-'+str(dist[0])+'-'+str(dist[1])+'_driving_circuity_median'] = data_IMOB_year_dist['circuity_driving-car'].median()     
            data_row[year+'_d-'+str(dist[0])+'-'+str(dist[1])+'_walking_circuity_mean']   = data_IMOB_year_dist['circuity_foot-walking'].mean()
            data_row[year+'_d-'+str(dist[0])+'-'+str(dist[1])+'_walking_circuity_median'] = data_IMOB_year_dist['circuity_foot-walking'].median()
    
    df_O_parishes = df_O_parishes.append(data_row, ignore_index=True, sort=False)
df_O_parishes.head()

In [None]:
df_O_parishes.transpose()

### Plot circuity per parish per distance for 2020

In [None]:
for dist in dist_thresholds: 
    df_O_parishes = df_O_parishes.sort_values(by=['2020_d-'+str(dist[0])+'-'+str(dist[1])+'_cycling_circuity_median'], ascending=False)
    
    
    
    fig = go.Figure()
    fig.add_trace(go.Scatter(y=df_O_parishes['2020_d-'+str(dist[0])+'-'+str(dist[1])+'_driving_circuity_median'],
                             x=df_O_parishes['freguesia'],
                             mode='lines+markers',
                             name='driving',
                             ))
    fig.add_trace(go.Scatter(y=df_O_parishes['2020_d-'+str(dist[0])+'-'+str(dist[1])+'_cycling_circuity_median'],
                             x=df_O_parishes['freguesia'],
                             mode='lines+markers',
                             name='cycling',
                             ))
    fig.add_trace(go.Scatter(y=df_O_parishes['2020_d-'+str(dist[0])+'-'+str(dist[1])+'_walking_circuity_median'],
                             x=df_O_parishes['freguesia'],
                             mode='lines+markers',
                             name='walking',
                             ))
        
    fig.update_layout(
        title="IMOB Median Circuity for D=["+str(dist[0])+'-'+str(dist[1])+"] ",
        xaxis_title="Parishes",
        yaxis_title="Circuity",
        legend_title="Mode",
        font=dict(
            family="Courier New, monospace",
            size=12,
            color="Black"
        ))
        
    fig.show()
    plt.show()

### Plot circuity per parish per distance for 2013-2020 for D=[2,8]

In [None]:
for i, parish in df_O_parishes['freguesia'].items(): 

    
    data = df_O_parishes[df_O_parishes['freguesia'] == parish]
    data_driving = data.filter(regex='_d-2-8_driving_circuity_median$',axis=1)
    data_cycling = data.filter(regex='_d-2-8_cycling_circuity_median$',axis=1)
    data_walking = data.filter(regex='_d-2-8_walking_circuity_median$',axis=1)

    fig = go.Figure()
    fig.add_trace(go.Scatter(y=data_driving.transpose()[data_driving.index[0]],
                             x=years,
                             mode='lines+markers',
                             name='driving',
                             ))
    fig.add_trace(go.Scatter(y=data_cycling.transpose()[data_driving.index[0]],
                             x=years,
                             mode='lines+markers',
                             name='cycling',
                             ))
    fig.add_trace(go.Scatter(y=data_walking.transpose()[data_driving.index[0]],
                             x=years,
                             mode='lines+markers',
                             name='walking',
                             ))
        
    fig.update_layout(
        title="IMOB Median Circuity for D=[2,8] across the years for "+parish,
        xaxis_title="Parishes",
        yaxis_title="Circuity",
        legend_title="Mode",
        font=dict(
            family="Times New Roman",
            size=18,
            color="Black"
        )
    )
        
    fig.show()
    plt.show()

## Analyze trips that start and end on the SAME parish

In [None]:
mask_same_parish = df_IMOB_points['freguesia_or'] == df_IMOB_points['freguesia_de']

In [None]:
parishes_or = {}
parishes_de = {}

df_IMOB_points_same_freguesia = df_IMOB_points.loc[mask_same_parish]

for index, parish in gdf_freguesias['Freguesia'].items():
    mask_parish = df_IMOB_points_same_freguesia['freguesia_or'] == int(index)    
    parishes_or[index] = df_IMOB_points_same_freguesia.loc[mask_parish]
    
    mask_parish = df_IMOB_points_same_freguesia['freguesia_de'] == int(index)
    parishes_de[index] = df_IMOB_points_same_freguesia.loc[mask_parish]

for key, value in parishes_or.items():
    print('[O] Parish', gdf_freguesias['Freguesia'][str(key)], 'with', value.shape[0], 'samples.')
    print('[D] Parish', gdf_freguesias['Freguesia'][str(key)], 'with', parishes_de[key].shape[0], 'samples.')

In [None]:
columns = ['freguesia_code',
           'freguesia',
           'haversine_dist_mean',
           'haversine_dist_median',
           '2020_d-all_cycling_circuity_mean',
           '2020_d-all_cycling_circuity_median',
           '2020_d-all_driving_circuity_mean',
           '2020_d-all_driving_circuity_median',
           '2020_d-all_walking_circuity_mean',
           '2020_d-all_walking_circuity_median',           
          ]

years = [str(i) for i in range(2013, 2021)]
dist_thresholds = [[0, 1],
                   [1, 2],
                   [2, 8],
                   [8, 12],
                   [12, float('inf')]]
modes = ['cycling', 'driving', 'walking']
metrics = ['mean', 'median']
for i in years:
    for j in dist_thresholds:
        for k in modes:
            for h in metrics:
                column = i + '_d-'+str(j[0])+'-'+str(j[1])+'_'+k+'_circuity_'+h
                columns.append(column)

df_O_parishes =pd.DataFrame(columns=columns)

for key, value in parishes_or.items():
    data_row = {}
    mask_parish = df_IMOB_points['freguesia_or'] == int(key)

    
    data_row['freguesia'] = gdf_freguesias['Freguesia'][str(key)]
    data_row['freguesia_code'] = key
    data_row['haversine_dist_mean'] = data_IMOB[year].loc[mask_parish & mask_same_parish]['haversine_dist'].mean()
    data_row['haversine_dist_median'] = data_IMOB[year].loc[mask_parish & mask_same_parish]['haversine_dist'].median()
    
    for year in years:
        data_IMOB_year = data_IMOB[year].loc[mask_parish & mask_same_parish]
        
        data_row[year+'_d-all_cycling_circuity_mean'] = data_IMOB_year['circuity_cycling-regular'].mean()
        data_row[year+'_d-all_cycling_circuity_median'] = data_IMOB_year['circuity_cycling-regular'].median()    
        data_row[year+'_d-all_driving_circuity_mean'] = data_IMOB_year['circuity_driving-car'].mean()
        data_row[year+'_d-all_driving_circuity_median'] = data_IMOB_year['circuity_driving-car'].median()     
        data_row[year+'_d-all_walking_circuity_mean'] = data_IMOB_year['circuity_foot-walking'].mean()
        data_row[year+'_d-all_walking_circuity_median'] = data_IMOB_year['circuity_foot-walking'].median() 
        
        
        for dist in dist_thresholds:
            mask_dist = (data_IMOB_year['haversine_dist']>=dist[0]) & (data_IMOB_year['haversine_dist']<dist[1])
            
            data_IMOB_year_dist = data_IMOB_year.loc[mask_dist]
            
            data_row[year+'_d-'+str(dist[0])+'-'+str(dist[1])+'_cycling_circuity_mean']   = data_IMOB_year_dist['circuity_cycling-regular'].mean()
            data_row[year+'_d-'+str(dist[0])+'-'+str(dist[1])+'_cycling_circuity_median'] = data_IMOB_year_dist['circuity_cycling-regular'].median()    
            data_row[year+'_d-'+str(dist[0])+'-'+str(dist[1])+'_driving_circuity_mean']   = data_IMOB_year_dist['circuity_driving-car'].mean()
            data_row[year+'_d-'+str(dist[0])+'-'+str(dist[1])+'_driving_circuity_median'] = data_IMOB_year_dist['circuity_driving-car'].median()     
            data_row[year+'_d-'+str(dist[0])+'-'+str(dist[1])+'_walking_circuity_mean']   = data_IMOB_year_dist['circuity_foot-walking'].mean()
            data_row[year+'_d-'+str(dist[0])+'-'+str(dist[1])+'_walking_circuity_median'] = data_IMOB_year_dist['circuity_foot-walking'].median()
    
    df_O_parishes = df_O_parishes.append(data_row, ignore_index=True, sort=False)
df_O_parishes.head()

In [None]:
for dist in dist_thresholds: 
    df_O_parishes = df_O_parishes.sort_values(by=['2020_d-'+str(dist[0])+'-'+str(dist[1])+'_cycling_circuity_median'], ascending=False)
    
    fig = go.Figure()
    fig.add_trace(go.Scatter(y=df_O_parishes['2020_d-'+str(dist[0])+'-'+str(dist[1])+'_driving_circuity_median'],
                             x=df_O_parishes['freguesia'],
                             mode='lines+markers',
                             name='driving',
                             ))
    fig.add_trace(go.Scatter(y=df_O_parishes['2020_d-'+str(dist[0])+'-'+str(dist[1])+'_cycling_circuity_median'],
                             x=df_O_parishes['freguesia'],
                             mode='lines+markers',
                             name='cycling',
                             ))
    fig.add_trace(go.Scatter(y=df_O_parishes['2020_d-'+str(dist[0])+'-'+str(dist[1])+'_walking_circuity_median'],
                             x=df_O_parishes['freguesia'],
                             mode='lines+markers',
                             name='walking',
                             ))
        
    fig.update_layout(
        title="IMOB Median Circuity for D=["+str(dist[0])+'-'+str(dist[1])+"] ",
        xaxis_title="Parishes",
        yaxis_title="Circuity",
        legend_title="Mode",
            autosize=False,
        width=900,
        height=500,
        margin=dict(
            l=20,
            r=20,
            b=20,
            t=20,
            pad=2
        ),
        font=dict(
            family="Times New Roman",
            size=18,
            color="Black"
        ))
    fig.update_xaxes(tickangle=-45)
    fig.update_xaxes(
        ticktext=['Arroios',
                  'Av. Novas',
                  'Misericórdia',
                  'Sto. António',
                  'Areeiro',
                  'Sta. Maria Maior',
                  'Campo de Ourique',
                  'Alvalade',
                  'S. Vicente',
                  'Penha de França',
                  'Campolide',
                  'Estrela',
                  'S. Domingos de Benfica',
                  'Belém',
                  'Lumiar',
                  'Ajuda',
                  'Alcântara',
                  'Parque das Nações',
                  'Carnide',
                  'Olivais',
                  'Beato',
                  'Benfica',
                  'Marvila',
                  'Santa Clara'],
        tickvals=['Arroios',
                  'Avenidas Novas',
                  'Miseric',
                  'Santo Ant',
                  'Areeiro',
                  'Santa Maria Maior',
                  'Campo de Ourique',
                  'Alvalade',
                  'S1o Vicente',
                  'Penha de Fran',
                  'Campolide',
                  'Estrela',
                  'S1o Domingos de Benfica',
                  'Bel6m',
                  'Lumiar',
                  'Ajuda',
                  'Alc6ntara',
                  'Parque das Nas',
                  'Carnide',
                  'Olivais',
                  'Beato',
                  'Benfica',
                  'Marvila',
                  'Santa Clara'])    
    fig.show()
    plt.show()

In [None]:
for i, parish in df_O_parishes['freguesia'].items(): 

    
    data = df_O_parishes[df_O_parishes['freguesia'] == parish]
    data_driving = data.filter(regex='_d-2-8_driving_circuity_median$',axis=1)
    data_cycling = data.filter(regex='_d-2-8_cycling_circuity_median$',axis=1)
    data_walking = data.filter(regex='_d-2-8_walking_circuity_median$',axis=1)

    fig = go.Figure()
    fig.add_trace(go.Scatter(y=data_driving.transpose()[data_driving.index[0]],
                             x=years,
                             mode='lines+markers',
                             name='driving',
                             ))
    fig.add_trace(go.Scatter(y=data_cycling.transpose()[data_driving.index[0]],
                             x=years,
                             mode='lines+markers',
                             name='cycling',
                             ))
    fig.add_trace(go.Scatter(y=data_walking.transpose()[data_driving.index[0]],
                             x=years,
                             mode='lines+markers',
                             name='walking',
                             ))
        
    fig.update_layout(
        title="IMOB Median Circuity for D=[2,8] across the years for "+parish,
        xaxis_title="Parishes",
        yaxis_title="Circuity",
        legend_title="Mode",
        font=dict(
            family="Courier New, monospace",
            size=12,
            color="Black"
        ))
        
    fig.show()
    plt.show()

## Analyze trips that start and end on DIFFERENT parishes

In [None]:
mask_diff_parish = df_IMOB_points['freguesia_or'] != df_IMOB_points['freguesia_de']

In [None]:
parishes_or = {}
parishes_de = {}

df_IMOB_points_diff_freguesia = df_IMOB_points.loc[mask_diff_parish]

for index, parish in gdf_freguesias['Freguesia'].items():
    mask_parish = df_IMOB_points_diff_freguesia['freguesia_or'] == int(index)    
    parishes_or[index] = df_IMOB_points_diff_freguesia.loc[mask_parish]
    
    mask_parish = df_IMOB_points_diff_freguesia['freguesia_de'] == int(index)
    parishes_de[index] = df_IMOB_points_diff_freguesia.loc[mask_parish]

for key, value in parishes_or.items():
    print('[O] Parish', gdf_freguesias['Freguesia'][str(key)], 'with', value.shape[0], 'samples.')
    print('[D] Parish', gdf_freguesias['Freguesia'][str(key)], 'with', parishes_de[key].shape[0], 'samples.')

In [None]:
columns = ['freguesia_code',
           'freguesia',
           'haversine_dist_mean',
           'haversine_dist_median',
           '2020_d-all_cycling_circuity_mean',
           '2020_d-all_cycling_circuity_median',
           '2020_d-all_driving_circuity_mean',
           '2020_d-all_driving_circuity_median',
           '2020_d-all_walking_circuity_mean',
           '2020_d-all_walking_circuity_median',           
          ]

years = [str(i) for i in range(2013, 2021)]
dist_thresholds = [[0, 1],
                   [1, 2],
                   [2, 8],
                   [8, 12],
                   [12, float('inf')]]
modes = ['cycling', 'driving', 'walking']
metrics = ['mean', 'median']
for i in years:
    for j in dist_thresholds:
        for k in modes:
            for h in metrics:
                column = i + '_d-'+str(j[0])+'-'+str(j[1])+'_'+k+'_circuity_'+h
                columns.append(column)

df_O_parishes =pd.DataFrame(columns=columns)

for key, value in parishes_or.items():
    data_row = {}
    mask_parish = df_IMOB_points['freguesia_or'] == int(key)

    
    data_row['freguesia'] = gdf_freguesias['Freguesia'][str(key)]
    data_row['freguesia_code'] = key
    data_row['haversine_dist_mean'] = data_IMOB[year].loc[mask_parish & mask_diff_parish]['haversine_dist'].mean()
    data_row['haversine_dist_median'] = data_IMOB[year].loc[mask_parish & mask_diff_parish]['haversine_dist'].median()
    
    for year in years:
        data_IMOB_year = data_IMOB[year].loc[mask_parish & mask_diff_parish]
        
        data_row[year+'_d-all_cycling_circuity_mean'] = data_IMOB_year['circuity_cycling-regular'].mean()
        data_row[year+'_d-all_cycling_circuity_median'] = data_IMOB_year['circuity_cycling-regular'].median()    
        data_row[year+'_d-all_driving_circuity_mean'] = data_IMOB_year['circuity_driving-car'].mean()
        data_row[year+'_d-all_driving_circuity_median'] = data_IMOB_year['circuity_driving-car'].median()     
        data_row[year+'_d-all_walking_circuity_mean'] = data_IMOB_year['circuity_foot-walking'].mean()
        data_row[year+'_d-all_walking_circuity_median'] = data_IMOB_year['circuity_foot-walking'].median() 
        
        
        for dist in dist_thresholds:
            mask_dist = (data_IMOB_year['haversine_dist']>=dist[0]) & (data_IMOB_year['haversine_dist']<dist[1])
            
            data_IMOB_year_dist = data_IMOB_year.loc[mask_dist]
            
            data_row[year+'_d-'+str(dist[0])+'-'+str(dist[1])+'_cycling_circuity_mean']   = data_IMOB_year_dist['circuity_cycling-regular'].mean()
            data_row[year+'_d-'+str(dist[0])+'-'+str(dist[1])+'_cycling_circuity_median'] = data_IMOB_year_dist['circuity_cycling-regular'].median()    
            data_row[year+'_d-'+str(dist[0])+'-'+str(dist[1])+'_driving_circuity_mean']   = data_IMOB_year_dist['circuity_driving-car'].mean()
            data_row[year+'_d-'+str(dist[0])+'-'+str(dist[1])+'_driving_circuity_median'] = data_IMOB_year_dist['circuity_driving-car'].median()     
            data_row[year+'_d-'+str(dist[0])+'-'+str(dist[1])+'_walking_circuity_mean']   = data_IMOB_year_dist['circuity_foot-walking'].mean()
            data_row[year+'_d-'+str(dist[0])+'-'+str(dist[1])+'_walking_circuity_median'] = data_IMOB_year_dist['circuity_foot-walking'].median()
    
    df_O_parishes = df_O_parishes.append(data_row, ignore_index=True, sort=False)
df_O_parishes.head()

In [None]:
for dist in dist_thresholds: 
    df_O_parishes = df_O_parishes.sort_values(by=['2020_d-'+str(dist[0])+'-'+str(dist[1])+'_cycling_circuity_median'], ascending=False)
    
    fig = go.Figure()
    fig.add_trace(go.Scatter(y=df_O_parishes['2020_d-'+str(dist[0])+'-'+str(dist[1])+'_driving_circuity_median'],
                             x=df_O_parishes['freguesia'],
                             mode='lines+markers',
                             name='driving',
                             ))
    fig.add_trace(go.Scatter(y=df_O_parishes['2020_d-'+str(dist[0])+'-'+str(dist[1])+'_cycling_circuity_median'],
                             x=df_O_parishes['freguesia'],
                             mode='lines+markers',
                             name='cycling',
                             ))
    fig.add_trace(go.Scatter(y=df_O_parishes['2020_d-'+str(dist[0])+'-'+str(dist[1])+'_walking_circuity_median'],
                             x=df_O_parishes['freguesia'],
                             mode='lines+markers',
                             name='walking',
                             ))
        
    fig.update_layout(
        title="IMOB Median Circuity for D=["+str(dist[0])+'-'+str(dist[1])+"] ",
        xaxis_title="Parishes",
        yaxis_title="Circuity",
        legend_title="Mode",
        font=dict(
            family="Times New Roman",
            size=18,
            color="Black"
        ))
        
    fig.show()
    plt.show()

In [None]:
for i, parish in df_O_parishes['freguesia'].items(): 

    
    data = df_O_parishes[df_O_parishes['freguesia'] == parish]
    data_driving = data.filter(regex='_d-2-8_driving_circuity_median$',axis=1)
    data_cycling = data.filter(regex='_d-2-8_cycling_circuity_median$',axis=1)
    data_walking = data.filter(regex='_d-2-8_walking_circuity_median$',axis=1)

    fig = go.Figure()
    fig.add_trace(go.Scatter(y=data_driving.transpose()[data_driving.index[0]],
                             x=years,
                             mode='lines+markers',
                             name='driving',
                             ))
    fig.add_trace(go.Scatter(y=data_cycling.transpose()[data_driving.index[0]],
                             x=years,
                             mode='lines+markers',
                             name='cycling',
                             ))
    fig.add_trace(go.Scatter(y=data_walking.transpose()[data_driving.index[0]],
                             x=years,
                             mode='lines+markers',
                             name='walking',
                             ))
        
    fig.update_layout(
        title="IMOB Median Circuity for D=[2,8] across the years for "+parish,
        xaxis_title="Parishes",
        yaxis_title="Circuity",
        legend_title="Mode",
        font=dict(
            family="Courier New, monospace",
            size=12,
            color="Black"
        ))
        
    fig.show()
    plt.show()

## Analyze circuity between OD parishes

In [None]:
parishes_or = {}
parishes_de = {}

for index, parish in gdf_freguesias['Freguesia'].items():
    mask_parish = df_IMOB_points['freguesia_or'] == int(index)    
    parishes_or[index] = df_IMOB_points.loc[mask_parish]
    
    mask_parish = df_IMOB_points['freguesia_de'] == int(index)
    parishes_de[index] = df_IMOB_points.loc[mask_parish]
parishes_or

In [None]:
columns = ['haversine_dist_mean',
           'haversine_dist_median',
           '2020_d-all_cycling_circuity_mean',
           '2020_d-all_cycling_circuity_median',
           '2020_d-all_driving_circuity_mean',
           '2020_d-all_driving_circuity_median',
           '2020_d-all_walking_circuity_mean',
           '2020_d-all_walking_circuity_median',           
          ]

years = [str(i) for i in range(2013, 2021)]
dist_thresholds = [[0, 1],
                   [1, 2],
                   [2, 8],
                   [8, 12],
                   [12, float('inf')]]
modes = ['cycling', 'driving', 'walking']
metrics = ['mean', 'median']
for i in years:
    for j in dist_thresholds:
        for k in modes:
            for h in metrics:
                column = i + '_d-'+str(j[0])+'-'+str(j[1])+'_'+k+'_circuity_'+h
                columns.append(column)

df_OD_parishes = pd.DataFrame(columns=columns)


for index_O, parish_O in gdf_freguesias['Freguesia'].items():
    
    for index_D, parish_D in gdf_freguesias['Freguesia'].items():
    
        data_row = {}
        data_row['freguesia_O'] = index_O
        data_row['freguesia_D'] = index_D
        data_row['freguesia_O_text'] = parish_O
        data_row['freguesia_D_text'] = parish_D
    
    
        mask_OD = (df_IMOB_points['freguesia_or'] == int(index_O)) & (df_IMOB_points['freguesia_de'] == int(index_D))
        
        data_row['haversine_dist_mean']   = data_IMOB['2020'].loc[mask_OD]['haversine_dist'].mean()
        data_row['haversine_dist_median'] = data_IMOB['2020'].loc[mask_OD]['haversine_dist'].median()
        
        for year in years:
            data_IMOB_year = data_IMOB[year].loc[mask_OD]
            print('O:', parish_O, '\tD:', parish_D, '\tSize:', data_IMOB_year.shape[0])
            
            data_row[year+'_d-all_cycling_circuity_mean'] = data_IMOB_year['circuity_cycling-regular'].mean()
            data_row[year+'_d-all_cycling_circuity_median'] = data_IMOB_year['circuity_cycling-regular'].median()    
            data_row[year+'_d-all_driving_circuity_mean'] = data_IMOB_year['circuity_driving-car'].mean()
            data_row[year+'_d-all_driving_circuity_median'] = data_IMOB_year['circuity_driving-car'].median()     
            data_row[year+'_d-all_walking_circuity_mean'] = data_IMOB_year['circuity_foot-walking'].mean()
            data_row[year+'_d-all_walking_circuity_median'] = data_IMOB_year['circuity_foot-walking'].median() 
        
            for dist in dist_thresholds:
                mask_dist = (data_IMOB_year['haversine_dist']>=dist[0]) & (data_IMOB_year['haversine_dist']<dist[1])

                data_IMOB_year_dist = data_IMOB_year.loc[mask_dist]

                data_row[year+'_d-'+str(dist[0])+'-'+str(dist[1])+'_cycling_circuity_mean']   = data_IMOB_year_dist['circuity_cycling-regular'].mean()
                data_row[year+'_d-'+str(dist[0])+'-'+str(dist[1])+'_cycling_circuity_median'] = data_IMOB_year_dist['circuity_cycling-regular'].median()    
                data_row[year+'_d-'+str(dist[0])+'-'+str(dist[1])+'_driving_circuity_mean']   = data_IMOB_year_dist['circuity_driving-car'].mean()
                data_row[year+'_d-'+str(dist[0])+'-'+str(dist[1])+'_driving_circuity_median'] = data_IMOB_year_dist['circuity_driving-car'].median()     
                data_row[year+'_d-'+str(dist[0])+'-'+str(dist[1])+'_walking_circuity_mean']   = data_IMOB_year_dist['circuity_foot-walking'].mean()
                data_row[year+'_d-'+str(dist[0])+'-'+str(dist[1])+'_walking_circuity_median'] = data_IMOB_year_dist['circuity_foot-walking'].median()
    
        df_OD_parishes = df_OD_parishes.append(data_row, ignore_index=True, sort=False)
             
df_OD_parishes.head()        

In [None]:
matrix = df_OD_parishes['2020_d-all_cycling_circuity_median'].to_numpy().reshape((len(df_OD_parishes['freguesia_O_text'].unique()), len(df_OD_parishes['freguesia_D_text'].unique())))

fig = go.Figure(data=go.Heatmap(
        z=matrix,
        x=df_OD_parishes.freguesia_O_text.unique(),
        y=df_OD_parishes.freguesia_D_text.unique(),
        colorscale='Viridis'))

fig.update_layout(
    title="IMOB Median Circuity for D=[all] across for 2020",
    xaxis_title="Destination",
    yaxis_title="Origin",
    legend_title="Mode",
    width=1000,
    height=1000,
    margin=dict(
        l=50,
        r=50,
        b=100,
        t=100,
        pad=4
    ),
    font=dict(
        family="Courier New, monospace",
        size=12,
        color="Black"
    ))
fig.show()

In [None]:
df_OD_parishes_noSantaClara = df_OD_parishes[(df_OD_parishes['freguesia_D'] != '110664') & (df_OD_parishes['freguesia_O'] != '110664')]

matrix = df_OD_parishes_noSantaClara['2020_d-all_cycling_circuity_median'].to_numpy().reshape((len(df_OD_parishes_noSantaClara['freguesia_O_text'].unique()), len(df_OD_parishes_noSantaClara['freguesia_D_text'].unique())))

fig = go.Figure(data=go.Heatmap(
        z=matrix,
        x=df_OD_parishes_noSantaClara.freguesia_O_text.unique(),
        y=df_OD_parishes_noSantaClara.freguesia_D_text.unique(),
        colorscale='Viridis'))

fig.update_layout(
    title="IMOB Median Circuity for CYCLING,D=[all] across for 2020",
    xaxis_title="Destination",
    yaxis_title="Origin",
    legend_title="Mode",
    width=1000,
    height=1000,
    margin=dict(
        l=50,
        r=50,
        b=100,
        t=100,
        pad=4
    ),
    font=dict(
        family="Courier New, monospace",
        size=12,
        color="Black"
    ))
fig.show()

matrix = df_OD_parishes_noSantaClara['2020_d-2-8_cycling_circuity_median'].to_numpy().reshape((len(df_OD_parishes_noSantaClara['freguesia_O_text'].unique()), len(df_OD_parishes_noSantaClara['freguesia_D_text'].unique())))

fig = go.Figure(data=go.Heatmap(
        z=matrix,
        x=df_OD_parishes_noSantaClara.freguesia_O_text.unique(),
        y=df_OD_parishes_noSantaClara.freguesia_D_text.unique(),
        colorscale='Viridis'))

fig.update_layout(
    title="IMOB Median Circuity for CYCLING,D=[2,8] across for 2020",
    xaxis_title="Destination",
    yaxis_title="Origin",
    legend_title="Mode",
    width=1000,
    height=1000,
    margin=dict(
        l=50,
        r=50,
        b=100,
        t=100,
        pad=4
    ),
    font=dict(
        family="Courier New, monospace",
        size=12,
        color="Black"
    ))
fig.show()

matrix = df_OD_parishes_noSantaClara['2020_d-2-8_driving_circuity_median'].to_numpy().reshape((len(df_OD_parishes_noSantaClara['freguesia_O_text'].unique()), len(df_OD_parishes_noSantaClara['freguesia_D_text'].unique())))

fig = go.Figure(data=go.Heatmap(
        z=matrix,
        x=df_OD_parishes_noSantaClara.freguesia_O_text.unique(),
        y=df_OD_parishes_noSantaClara.freguesia_D_text.unique(),
        colorscale='Viridis'))

fig.update_layout(
    title="IMOB Median Circuity for DRIVING,D=[2,8] across for 2020",
    xaxis_title="Destination",
    yaxis_title="Origin",
    legend_title="Mode",
    width=1000,
    height=1000,
    margin=dict(
        l=50,
        r=50,
        b=100,
        t=100,
        pad=4
    ),
    font=dict(
        family="Courier New, monospace",
        size=12,
        color="Black"
    ))
fig.show()

matrix = df_OD_parishes_noSantaClara['2020_d-2-8_walking_circuity_median'].to_numpy().reshape((len(df_OD_parishes_noSantaClara['freguesia_O_text'].unique()), len(df_OD_parishes_noSantaClara['freguesia_D_text'].unique())))

fig = go.Figure(data=go.Heatmap(
        z=matrix,
        x=df_OD_parishes_noSantaClara.freguesia_O_text.unique(),
        y=df_OD_parishes_noSantaClara.freguesia_D_text.unique(),
        colorscale='Viridis'))

fig.update_layout(
    title="IMOB Median Circuity for WALKING,D=[2,8] across for 2020",
    xaxis_title="Destination",
    yaxis_title="Origin",
    legend_title="Mode",
    width=1000,
    height=1000,
    margin=dict(
        l=50,
        r=50,
        b=100,
        t=100,
        pad=4
    ),
    font=dict(
        family="Courier New, monospace",
        size=12,
        color="Black"
    ))
fig.show()

In [None]:
data_IMOB_withFreguesias = data_IMOB['2020']
data_IMOB_withFreguesias['freguesia_or'] = df_IMOB_points['freguesia_or']
data_IMOB_withFreguesias['freguesia_de'] = df_IMOB_points['freguesia_de']

columns = ['walking', 'cycling', 'driving', 'freguesia', 'size']
median_freguesias = pd.DataFrame(columns=columns)

for freguesia in df_IMOB_points['freguesia_or'].unique():
    data_freguesia = data_IMOB_withFreguesias[data_IMOB_withFreguesias['freguesia_or'] == freguesia]

    data_row = {
        'cycling': data_freguesia['circuity_cycling-regular'].median(),
        'walking': data_freguesia['circuity_foot-walking'].median(),
        'driving': data_freguesia['circuity_driving-car'].median(),
        'freguesia': str(gdf_freguesias['Freguesia'][str(freguesia)]),
        'size': data_freguesia.shape[0]
        
    }
    median_freguesias = median_freguesias.append(data_row, ignore_index=True, sort=False)
    
median_freguesias

In [None]:
data_IMOB_withFreguesias_aux = data_IMOB_withFreguesias.sort_values(by=['circuity_cycling-regular'], ascending=True)
data_IMOB_withFreguesias_aux = data_IMOB_withFreguesias_aux[data_IMOB_withFreguesias_aux['circuity_cycling-regular'] < 10]
data_IMOB_withFreguesias_aux = data_IMOB_withFreguesias_aux[data_IMOB_withFreguesias_aux['circuity_cycling-regular'] >= 1]
data_IMOB_withFreguesias_aux = data_IMOB_withFreguesias_aux.reset_index()


fig = go.Figure()
fig.add_trace(go.Scatter(y=data_IMOB_withFreguesias_aux.index,
                         x=data_IMOB_withFreguesias_aux['circuity_cycling-regular'],
                    mode='lines+markers',
                    name='cycling'))

aaa

fig.update_layout(
    title="",
    xaxis_title="Cycling Circuity",
   yaxis_title="# Trips ",
    legend_title="Transport Mode",
    font=dict(
        family="Times New Roman",
        size=18,
        color="Black"
    )
    )

fig.show()
#plt.show()

In [None]:
for freguesia in df_IMOB_points['freguesia_or'].unique():
    data_freguesia = data_IMOB_withFreguesias_aux[data_IMOB_withFreguesias_aux['freguesia_or'] == freguesia].reset_index()
    fig = go.Figure()
    fig.add_trace(go.Scatter(y=data_freguesia.index,
                             x=data_freguesia['circuity_cycling-regular'],
                    mode='lines+markers',
                    name='cycling'))
    fig.update_layout(
        title=str(gdf_freguesias['Freguesia'][str(freguesia)]),
        xaxis_title="Circuity Cycling",
        yaxis_title="# Trips",
        legend_title="Transport Mode",
        font=dict(
            family="Courier New, monospace",
            size=12,
            color="Black"
        )
    )

    fig.show()
    plt.show()

In [None]:
data_IMOB_withFreguesias_aux = data_IMOB_withFreguesias.sort_values(by=['circuity_cycling-regular'], ascending=True)
data_IMOB_withFreguesias_aux = data_IMOB_withFreguesias_aux[data_IMOB_withFreguesias_aux['circuity_cycling-regular'] < 10]
data_IMOB_withFreguesias_aux = data_IMOB_withFreguesias_aux[data_IMOB_withFreguesias_aux['circuity_cycling-regular'] >= 1]
data_IMOB_withFreguesias_aux = data_IMOB_withFreguesias_aux[data_IMOB_withFreguesias_aux['haversine_dist'] >= 2]
data_IMOB_withFreguesias_aux = data_IMOB_withFreguesias_aux[data_IMOB_withFreguesias_aux['haversine_dist'] <= 8]
data_IMOB_withFreguesias_aux = data_IMOB_withFreguesias_aux.reset_index()

aa
fig = go.Figure()
fig.add_trace(go.Scatter(y=data_IMOB_withFreguesias_aux.index,
                         x=data_IMOB_withFreguesias_aux['circuity_cycling-regular'],
                    mode='lines+markers',
                    name='walking'))

       
fig.update_layout(
    title="",
    xaxis_title="Circuity Cycling",
    yaxis_title="# Trips",
    legend_title="Transport Mode",
    font=dict(
        family="Courier New, monospace",
        size=12,
        color="Black"
    )
    )

#fig.show()
#plt.show()

In [None]:

fig = go.Figure()
for freguesia in df_IMOB_points['freguesia_or'].unique():
    data_freguesia = data_IMOB_withFreguesias_aux[data_IMOB_withFreguesias_aux['freguesia_or'] == freguesia].reset_index()
    
    quantiles_cycling = data_freguesia['circuity_cycling-regular'].quantile([0.25,0.5,0.75])
    quantiles_walking = data_freguesia['circuity_foot-walking'].quantile([0.25,0.5,0.75])
    quantiles_driving = data_freguesia['circuity_driving-car'].quantile([0.25,0.5,0.75])
    
    #print(str(gdf_freguesias['Freguesia'][str(freguesia)]).ljust(24))
    #print('\tCycling: Q1: {:.2f}, Median:{:.2f}, Q3:{:.2f}'.format(quantiles_cycling.iloc[0], quantiles_cycling.iloc[1], quantiles_cycling.iloc[2]))
    #print('\tWalking: Q1: {:.2f}, Median:{:.2f}, Q3:{:.2f}'.format(quantiles_walking.iloc[0], quantiles_walking.iloc[1], quantiles_walking.iloc[2]))
    #print('\tDriving: Q1: {:.2f}, Median:{:.2f}, Q3:{:.2f}'.format(quantiles_driving.iloc[0], quantiles_driving.iloc[1], quantiles_driving.iloc[2]))
    fig = go.Figure()
    fig.add_trace(go.Box(y=data_freguesia['circuity_cycling-regular'],
                         boxpoints='all', # can also be outliers, or suspectedoutliers, or False
                         jitter=0.3, # add some jitter for a better separation between points
                         pointpos=-1.8, # relative position of points wrt box
                         name='Cycling',
                          )
                 )
    fig.add_trace(go.Box(y=data_freguesia['circuity_foot-walking'],
                         boxpoints='all', # can also be outliers, or suspectedoutliers, or False
                         jitter=0.3, # add some jitter for a better separation between points
                         pointpos=-1.8, # relative position of points wrt box
                         name='Walking',
                          )
                 )
    fig.add_trace(go.Box(y=data_freguesia['circuity_driving-car'],
                         boxpoints='all', # can also be outliers, or suspectedoutliers, or False
                         jitter=0.3, # add some jitter for a better separation between points
                         pointpos=-1.8, # relative position of points wrt box
                         name='Driving',
                          )
                 )

       
    fig.update_layout(
        title=str(gdf_freguesias['Freguesia'][str(freguesia)]),
        xaxis_title="Circuity Distribution",
        yaxis_title="Circuity",
        legend_title="Transport Mode",
        font=dict(
            family="Courier New, monospace",
            size=12,
            color="Black"
        )
    )
    image_output = 'circuity_distribution_box_plot_'+str(gdf_freguesias['Freguesia'][str(freguesia)])
    
    fig.write_html("images/"+image_output+".html")
    #fig.show()
    #plt.show()  
    
    

In [None]:
fig = go.Figure()
for freguesia in df_IMOB_points['freguesia_or'].unique():
    data_freguesia = data_IMOB_withFreguesias_aux[data_IMOB_withFreguesias_aux['freguesia_or'] == freguesia].reset_index()
        
    fig.add_trace(go.Box(y=data_freguesia['circuity_cycling-regular'],
                         boxpoints='all', # can also be outliers, or suspectedoutliers, or False
                         jitter=0.3, # add some jitter for a better separation between points
                         pointpos=-1.8, # relative position of points wrt box
                         name=str(gdf_freguesias['Freguesia'][str(freguesia)]),
                          )
    )     
fig.update_layout(
    title="Cycling Circuity Distributions",
    xaxis_title="Parishes",
    yaxis_title="Circuity",
    legend_title="Transport Mode",
    font=dict(
        family="Courier New, monospace",
        size=12,
        color="Black"
    )
    )
image_output = 'circuity_distribution_box_plot_cycling_all_parishes'
fig.write_html("images/"+image_output+".html")
#fig.show()
#plt.show()

fig = go.Figure()
for freguesia in df_IMOB_points['freguesia_or'].unique():
    data_freguesia = data_IMOB_withFreguesias_aux[data_IMOB_withFreguesias_aux['freguesia_or'] == freguesia].reset_index()
        
    fig.add_trace(go.Box(y=data_freguesia['circuity_foot-walking'],
                         boxpoints='all', # can also be outliers, or suspectedoutliers, or False
                         jitter=0.3, # add some jitter for a better separation between points
                         pointpos=-1.8, # relative position of points wrt box
                         name=str(gdf_freguesias['Freguesia'][str(freguesia)]),
                          )
    )     
fig.update_layout(
    title="Cycling Circuity Distributions",
    xaxis_title="Parishes",
    yaxis_title="Circuity",
    legend_title="Transport Mode",
    font=dict(
        family="Courier New, monospace",
        size=12,
        color="Black"
    )
    )
image_output = 'circuity_distribution_box_plot_walking_all_parishes'
fig.write_html("images/"+image_output+".html")
#fig.show()
#plt.show()

fig = go.Figure()
for freguesia in df_IMOB_points['freguesia_or'].unique():
    data_freguesia = data_IMOB_withFreguesias_aux[data_IMOB_withFreguesias_aux['freguesia_or'] == freguesia].reset_index()
        
    fig.add_trace(go.Box(y=data_freguesia['circuity_driving-car'],
                         boxpoints='all', # can also be outliers, or suspectedoutliers, or False
                         jitter=0.3, # add some jitter for a better separation between points
                         pointpos=-1.8, # relative position of points wrt box
                         name=str(gdf_freguesias['Freguesia'][str(freguesia)]),
                          )
    )     
fig.update_layout(
    title="Cycling Circuity Distributions",
    xaxis_title="Parishes",
    yaxis_title="Circuity",
    legend_title="Transport Mode",
    font=dict(
        family="Courier New, monospace",
        size=12,
        color="Black"
    )
    )
image_output = 'circuity_distribution_box_plot_driving_all_parishes'
fig.write_html("images/"+image_output+".html")
#fig.show()
#plt.show()

# Analyze type of IMOB mode with circuity

In [None]:
mask_IMOB_driving = (data_IMOB['2020']['vehicle'] == 'passenger car - as passenger') | \
                    (data_IMOB['2020']['vehicle'] == 'passenger car - as driver') | \
                    (data_IMOB['2020']['vehicle'] == 'van/lorry/tractor/camper') | \
                    (data_IMOB['2020']['vehicle'] == 'motorcycle and moped') | \
                    (data_IMOB['2020']['vehicle'] == 'Táxi (como passageiro)')
mask_IMOB_walking = (data_IMOB['2020']['vehicle'] == 'passenger car - as passenger')
mask_IMOB_cycling = (data_IMOB['2020']['vehicle'] == 'Cycling')

In [None]:
data_IMOB_driving = data_IMOB['2020'].loc[mask_IMOB_driving]
print('Size of driving data:', data_IMOB_driving.shape)
data_IMOB_walking = data_IMOB['2020'].loc[mask_IMOB_walking]
print('Size of walking data:', data_IMOB_walking.shape)
data_IMOB_cycling = data_IMOB['2020'].loc[mask_IMOB_cycling]
print('Size of cycling data:', data_IMOB_cycling.shape)

In [None]:

data_IMOB_cycling = data_IMOB_cycling.sort_values(by=['circuity_cycling-regular'], ascending=True).reset_index(drop=True)
fig = go.Figure()
fig.add_trace(go.Scatter(y=data_IMOB_cycling.index,
                         x=data_IMOB_cycling['circuity_cycling-regular'],
                mode='lines',
                name='cycling'))

data_IMOB_cycling = data_IMOB_cycling.sort_values(by=['circuity_driving-car'], ascending=True).reset_index(drop=True)
fig.add_trace(go.Scatter(y=data_IMOB_cycling.index,
                         x=data_IMOB_cycling['circuity_driving-car'],
                mode='lines',
                name='driving'))

data_IMOB_cycling = data_IMOB_cycling.sort_values(by=['circuity_foot-walking'], ascending=True).reset_index(drop=True)
fig.add_trace(go.Scatter(y=data_IMOB_cycling.index,
                         x=data_IMOB_cycling['circuity_foot-walking'],
                mode='lines',
                name='walking'))


fig.update_layout(
    title='IMOB Cycling Trips',
    xaxis_title="Circuity",
    yaxis_title="# Trips",
    legend_title="Transport Mode",
    font=dict(
        family="Courier New, monospace",
        size=12,
        color="Black"
    )
)

fig.show()
plt.show()

In [None]:

data_IMOB_walking = data_IMOB_walking.sort_values(by=['circuity_cycling-regular'], ascending=True).reset_index(drop=True)
fig = go.Figure()
fig.add_trace(go.Scatter(y=data_IMOB_walking.index,
                         x=data_IMOB_walking['circuity_cycling-regular'],
                mode='lines',
                name='cycling'))

data_IMOB_walking = data_IMOB_walking.sort_values(by=['circuity_driving-car'], ascending=True).reset_index(drop=True)
fig.add_trace(go.Scatter(y=data_IMOB_walking.index,
                         x=data_IMOB_walking['circuity_driving-car'],
                mode='lines',
                name='driving'))

data_IMOB_walking = data_IMOB_walking.sort_values(by=['circuity_foot-walking'], ascending=True).reset_index(drop=True)
fig.add_trace(go.Scatter(y=data_IMOB_walking.index,
                         x=data_IMOB_walking['circuity_foot-walking'],
                mode='lines',
                name='walking'))


fig.update_layout(
    title='IMOB Walking Trips',
    xaxis_title="Circuity",
    yaxis_title="# Trips",
    legend_title="Transport Mode",
    font=dict(
        family="Courier New, monospace",
        size=12,
        color="Black"
    )
)

fig.show()
plt.show()

In [None]:

data_IMOB_driving = data_IMOB_driving.sort_values(by=['circuity_cycling-regular'], ascending=True).reset_index(drop=True)
fig = go.Figure()
fig.add_trace(go.Scatter(y=data_IMOB_driving.index,
                         x=data_IMOB_driving['circuity_cycling-regular'],
                mode='lines',
                name='cycling'))

data_IMOB_driving = data_IMOB_driving.sort_values(by=['circuity_driving-car'], ascending=True).reset_index(drop=True)
fig.add_trace(go.Scatter(y=data_IMOB_driving.index,
                         x=data_IMOB_driving['circuity_driving-car'],
                mode='lines',
                name='driving'))

data_IMOB_driving = data_IMOB_driving.sort_values(by=['circuity_foot-walking'], ascending=True).reset_index(drop=True)
fig.add_trace(go.Scatter(y=data_IMOB_driving.index,
                         x=data_IMOB_driving['circuity_foot-walking'],
                mode='lines',
                name='walking'))


fig.update_layout(
    title='IMOB Driving Trips',
    xaxis_title="Circuity",
    yaxis_title="# Trips",
    legend_title="Transport Mode",
    font=dict(
        family="Courier New, monospace",
        size=12,
        color="Black"
    )
)

fig.show()
plt.show()