In [None]:
import geojson
import geopandas as gpd
import json
import pandas as pd
import numpy as np
import glob
from tqdm import tqdm, trange
import plotly.express as px
import plotly.graph_objects as go
import seaborn as sns
import folium

np.random.seed(0)
pd.set_option('display.max_columns', 500)

In [None]:
%%html
<style>
  table {margin-left: 0 !important;}
</style>

### Waytypes

| Value |     Name     |
|:-----|:------------|
| 0     | Unknown      |
| 1     | State Road   |
| 2     | Road         |
| 3     | Street       |
| 4     | Path         |
| 5     | Track        |
| 6     | Cycleway     |
| 7     | Footway      |
| 8     | Steps        |
| 9     | Ferry        |
| 10    | Construction |

## Read MSS routes data

In [None]:
columns = [
    'i',
    'point_A',
    'point_B',
    'cycling_distance',
    'cycling_duration',
    'walking_distance',
    'walking_duration',
    'driving_distance',
    'driving_duration',
]
data = pd.DataFrame(columns = columns)


imob_file = pd.read_csv('Data/imob_generated_points.csv', index_col=0)

with trange(imob_file.shape[0]) as t:
    for i in t:
        t.set_description('Point %i' % (i+1))
        i = i + 1
        data_row = {}

        cycling_filename = 'imob_2020_'+str(i)+'_cycling-regular.geojson'
        walking_filename = 'imob_2020_'+str(i)+'_foot-walking.geojson'
        driving_filename = 'imob_2020_'+str(i)+'_driving-car.geojson'
        data_row = {'i': i}
        try:
            with open('routes/'+cycling_filename) as f:
                gj = geojson.load(f)
                features = gj['features'][0]
                try:
                    data_row['cycling_distance'] = features['properties']['summary']['distance']
                    data_row['cycling_duration'] = features['properties']['summary']['duration']
                except:
                    pass 
                
        except FileNotFoundError:
            pass
        try:
            with open('routes/'+walking_filename) as f:
                gj = geojson.load(f)
                features = gj['features'][0]
                
                try:
                    data_row['walking_distance'] = features['properties']['summary']['distance']
                    data_row['walking_duration'] = features['properties']['summary']['duration']
                except:
                    pass 

        except FileNotFoundError:
            pass
        try:
            with open('routes/'+driving_filename) as f:
                gj = geojson.load(f)
                features = gj['features'][0]
                
                try:
                    data_row['driving_distance'] = features['properties']['summary']['distance']
                    data_row['driving_duration'] = features['properties']['summary']['duration']
                except:
                    pass 


        except FileNotFoundError:
            pass

        data = data.append(data_row, ignore_index=True, sort=False)
route_data_distance_duration = data
    

In [None]:
np.random.seed(0)
drop_indices = np.random.choice(route_data_distance_duration.index, 11515, replace=False)

route_data_distance_duration = route_data_distance_duration.drop(drop_indices)
route_data_distance_duration = route_data_distance_duration.reset_index(drop=True)
route_data_distance_duration = route_data_distance_duration.fillna(0)

In [None]:
route_data_distance_duration

In [None]:
route_data_distance_duration['cycling_speed'] = (route_data_distance_duration['cycling_distance'].astype('float')*0.001)/ (route_data_distance_duration['cycling_duration'].astype('float')/3600)
route_data_distance_duration['walking_speed'] = (route_data_distance_duration['walking_distance'].astype('float')*0.001)/ (route_data_distance_duration['walking_duration'].astype('float')/3600)
route_data_distance_duration['driving_speed'] = (route_data_distance_duration['driving_distance'].astype('float')*0.001)/ (route_data_distance_duration['driving_duration'].astype('float')/3600)
route_data_distance_duration = route_data_distance_duration.replace([np.inf, -np.inf], np.nan)
route_data_distance_duration = route_data_distance_duration.fillna(0)

In [None]:
route_data_distance_duration

In [None]:
route_data_distance_duration.describe()

In [None]:
read_route_data = True

In [None]:
if not read_route_data:

    columns = [
        'i',
        'point_A',
        'point_B',
        'distance',
        'duration',
        'cycling_waytypes_0_distance',
        'cycling_waytypes_0_amount',
        'cycling_waytypes_1_distance',
        'cycling_waytypes_1_amount',
        'cycling_waytypes_2_distance',
        'cycling_waytypes_2_amount',
        'cycling_waytypes_3_distance',
        'cycling_waytypes_3_amount',
        'cycling_waytypes_4_distance',
        'cycling_waytypes_4_amount',
        'cycling_waytypes_5_distance',
        'cycling_waytypes_5_amount',
        'cycling_waytypes_6_distance',
        'cycling_waytypes_6_amount',
        'cycling_waytypes_7_distance',
        'cycling_waytypes_7_amount',
        'cycling_waytypes_8_distance',
        'cycling_waytypes_8_amount',
        'cycling_waytypes_9_distance',
        'cycling_waytypes_9_amount',
        'cycling_waytypes_10_distance',
        'cycling_waytypes_10_amount',
        'walking_waytypes_0_distance',
        'walking_waytypes_0_amount',
        'walking_waytypes_1_distance',
        'walking_waytypes_1_amount',
        'walking_waytypes_2_distance',
        'walking_waytypes_2_amount',
        'walking_waytypes_3_distance',
        'walking_waytypes_3_amount',
        'walking_waytypes_4_distance',
        'walking_waytypes_4_amount',
        'walking_waytypes_5_distance',
        'walking_waytypes_5_amount',
        'walking_waytypes_6_distance',
        'walking_waytypes_6_amount',
        'walking_waytypes_7_distance',
        'walking_waytypes_7_amount',
        'walking_waytypes_8_distance',
        'walking_waytypes_8_amount',
        'walking_waytypes_9_distance',
        'walking_waytypes_9_amount',
        'walking_waytypes_10_distance',
        'walking_waytypes_10_amount',
        'driving_waytypes_0_distance',
        'driving_waytypes_0_amount',
        'driving_waytypes_1_distance',
        'driving_waytypes_1_amount',
        'driving_waytypes_2_distance',
        'driving_waytypes_2_amount',
        'driving_waytypes_3_distance',
        'driving_waytypes_3_amount',
        'driving_waytypes_4_distance',
        'driving_waytypes_4_amount',
        'driving_waytypes_5_distance',
        'driving_waytypes_5_amount',
        'driving_waytypes_6_distance',
        'driving_waytypes_6_amount',
        'driving_waytypes_7_distance',
        'driving_waytypes_7_amount',
        'driving_waytypes_8_distance',
        'driving_waytypes_8_amount',
        'driving_waytypes_9_distance',
        'driving_waytypes_9_amount',
        'driving_waytypes_10_distance',
        'driving_waytypes_10_amount',
    ]
    data = pd.DataFrame(columns = columns)


    imob_file = pd.read_csv('data/imob_generated_points.csv', index_col=0)

    with trange(imob_file.shape[0]) as t:
        for i in t:
            t.set_description('Point %i' % (i+1))
            i = i + 1
            data_row = {}

            cycling_filename = 'imob_2020_'+str(i)+'_cycling-regular.geojson'
            walking_filename = 'imob_2020_'+str(i)+'_foot-walking.geojson'
            driving_filename = 'imob_2020_'+str(i)+'_driving-car.geojson'
            data_row = {'i': i}
            try:
                with open('routes/'+cycling_filename) as f:
                    gj = geojson.load(f)
                    features = gj['features'][0]

                    try:
                        waytypes = features['properties']['extras']['waytypes']['summary']
                    except:
                        pass
                    else:
                        waytypes = {str(int(waytype['value'])): {'distance': waytype['distance'], 'amount': waytype['amount']} for waytype in waytypes}


                        for waytype in range(0,11):
                            try:
                                data_row['cycling_waytypes_'+str(waytype)+'_distance'] = waytypes[str(waytype)]['distance']
                                data_row['cycling_waytypes_'+str(waytype)+'_amount'] = waytypes[str(waytype)]['amount']
                            except KeyError:
                                pass

            except FileNotFoundError:
                pass
            try:
                with open('routes/'+walking_filename) as f:
                    gj = geojson.load(f)
                    features = gj['features'][0]

                    try:
                        waytypes = features['properties']['extras']['waytypes']['summary']
                    except:
                        pass
                    else:
                        waytypes = {str(int(waytype['value'])): {'distance': waytype['distance'], 'amount': waytype['amount']} for waytype in waytypes}


                        for waytype in range(0,11):
                            try:
                                data_row['walking_waytypes_'+str(waytype)+'_distance'] = waytypes[str(waytype)]['distance']
                                data_row['walking_waytypes_'+str(waytype)+'_amount'] = waytypes[str(waytype)]['amount']
                            except KeyError:
                                pass

            except FileNotFoundError:
                pass
            try:
                with open('routes/'+driving_filename) as f:
                    gj = geojson.load(f)
                    features = gj['features'][0]

                    try:
                        waytypes = features['properties']['extras']['waytypes']['summary']
                    except:
                        pass
                    else:
                        waytypes = {str(int(waytype['value'])): {'distance': waytype['distance'], 'amount': waytype['amount']} for waytype in waytypes}


                        for waytype in range(0,11):
                            try:
                                data_row['driving_waytypes_'+str(waytype)+'_distance'] = waytypes[str(waytype)]['distance']
                                data_row['driving_waytypes_'+str(waytype)+'_amount'] = waytypes[str(waytype)]['amount']
                            except KeyError:
                                pass

            except FileNotFoundError:
                pass

            data = data.append(data_row, ignore_index=True, sort=False)
    data.to_csv('data/imob_routes_2020.csv') 
    route_data = data
else:
    route_data = pd.read_csv('data/imob_routes_2020.csv', index_col=0)
    

In [None]:
np.random.seed(0)
drop_indices = np.random.choice(route_data.index, 11515, replace=False)

route_data = route_data.drop(drop_indices)
route_data = route_data.reset_index(drop=True)
route_data = route_data.fillna(0)

In [None]:
dist_thresholds = [[0, 1],
                   [1, 10],
                   [10, 25],
                   [25, 50],
                   [50, 80],
                  # [4, 5],
                  # [5, 6],
                  # [6, 7],
                  # [7, 8],
                  # [8, 9],
                  # [9, 10],
                   [80, float('inf')]]


data_dist_cycleways_cycling = {}
data_dist_cycleways_walking = {}
data_dist_cycleways_driving = {}

data_dist_footways_cycling = {}
data_dist_footways_walking = {}
data_dist_footways_driving = {}

for dist in dist_thresholds:
    data_dist_cycleways_cycling[str(dist[0])+'_'+str(dist[1])] = \
        route_data[(route_data['cycling_waytypes_6_amount']>=dist[0]) & (route_data['cycling_waytypes_6_amount']<dist[1])]
    data_dist_cycleways_walking[str(dist[0])+'_'+str(dist[1])] = \
        route_data[(route_data['walking_waytypes_6_amount']>=dist[0]) & (route_data['walking_waytypes_6_amount']<dist[1])]
    data_dist_cycleways_driving[str(dist[0])+'_'+str(dist[1])] = \
        route_data[(route_data['driving_waytypes_6_amount']>=dist[0]) & (route_data['driving_waytypes_6_amount']<dist[1])]

for dist in dist_thresholds:
    data_dist_footways_cycling[str(dist[0])+'_'+str(dist[1])] = \
        route_data[(route_data['cycling_waytypes_7_amount']>=dist[0]) & (route_data['cycling_waytypes_7_amount']<dist[1])]
    data_dist_footways_walking[str(dist[0])+'_'+str(dist[1])] = \
        route_data[(route_data['walking_waytypes_7_amount']>=dist[0]) & (route_data['walking_waytypes_7_amount']<dist[1])]
    data_dist_footways_driving[str(dist[0])+'_'+str(dist[1])] = \
        route_data[(route_data['driving_waytypes_7_amount']>=dist[0]) & (route_data['driving_waytypes_7_amount']<dist[1])]
     

In [None]:
df_dist_distribution_cycleways = pd.DataFrame(columns=['cycling','walking', 'driving'])
df_dist_distribution_footways = pd.DataFrame(columns=['cycling','walking', 'driving'])
for dist in dist_thresholds: 
    key = str(dist[0])+'_'+str(dist[1])
    
    row = {}
    row2 = {}
    
    row['cycling'] = data_dist_cycleways_cycling[key].shape[0]
    row['walking'] = data_dist_cycleways_walking[key].shape[0]
    row['driving'] = data_dist_cycleways_driving[key].shape[0]

    row2['cycling'] = data_dist_footways_cycling[key].shape[0]
    row2['walking'] = data_dist_footways_walking[key].shape[0]
    row2['driving'] = data_dist_footways_driving[key].shape[0]
    
    df_dist_distribution_cycleways.loc[key] = row
    df_dist_distribution_footways.loc[key] = row2

In [None]:
colors = ['#636EFA',
          '#EF553B',
          '#00CC96',
          '#11B1F5',
          '#EF979D',
          '#008796']

circuities = ['circuity_driving-car', 'circuity_cycling-regular', 'circuity_foot-walking', ]



for color_i, circuity in enumerate(circuities):
    fig = go.Figure()
    
    
    fig.add_trace(go.Scatter(y=mean_circuity_dist_IMOB['0_1'][circuity],
                             x=years,
                             mode='lines+markers',
                             name='MSS 0-1 km',
                             line=dict(color=colors[color_i]),
                             marker_symbol='x',
                             marker_size=10,
                            )
                 )
    fig.add_trace(go.Scatter(y=mean_circuity_dist_IMOB['1_2'][circuity],
                             x=years,
                             mode='lines+markers',
                             name='MSS 1-2 km',
                             line=dict(color=colors[color_i]),
                             marker_symbol='square',
                             marker_size=10,
                            )
                 )
    fig.add_trace(go.Scatter(y=mean_circuity_dist_IMOB['2_8'][circuity],
                             x=years,
                             mode='lines+markers',
                             name='MSS 2-8 km',
                             line=dict(color=colors[color_i]),
                             marker_symbol='triangle-up',
                             marker_size=10,
                            )
                 )
    fig.add_trace(go.Scatter(y=mean_circuity_dist_IMOB['8_12'][circuity],
                             x=years,
                             mode='lines+markers',
                             name='MSS 8-12 km',
                             line=dict(color=colors[color_i]),
                             marker_symbol='circle',
                             marker_size=10,
                            )
                 )
 
    fig.add_trace(go.Scatter(y=mean_circuity_dist_RS['0_1'][circuity],
                             x=years,
                             mode='lines+markers',
                             name='RS 0-1 km',
                             line=dict(color=colors[color_i+3]),
                             marker_symbol='x',
                             marker_size=10,
                            )
                 )
    fig.add_trace(go.Scatter(y=mean_circuity_dist_RS['1_2'][circuity],
                             x=years,
                             mode='lines+markers',
                             name='RS 1-2 km',
                             line=dict(color=colors[color_i+3]),
                             marker_symbol='square',
                             marker_size=10,
                            )
                 )
    fig.add_trace(go.Scatter(y=mean_circuity_dist_RS['2_8'][circuity],
                             x=years,
                             mode='lines+markers',
                             name='RS 2-8 km',
                             line=dict(color=colors[color_i+3]),
                             marker_symbol='triangle-up',
                             marker_size=10,
                            )
                 )
    fig.add_trace(go.Scatter(y=mean_circuity_dist_RS['8_12'][circuity],
                             x=years,
                             mode='lines+markers',
                             name='RS 8-12 km',
                             line=dict(color=colors[color_i+3]),
                             marker_symbol='circle',
                             marker_size=10,
                            )
                 )
        



     
        
    fig.update_layout(
        title="Mode: "+circuity,
        xaxis_title="Years",
        yaxis_title="Circuity",
        legend_title="",
        font=dict(
            family="Times New Roman",
            size=18,
            color="Black"
        )
    )
    #fig.update_layout(legend=dict(
    #    orientation="h",
    #    yanchor="top",
    #    y=-0.2,
    #    xanchor="right",
    #    x=1),
    #    yaxis_range=[1.2,3.2])
    fig.show()
    plt.show()

In [None]:
colors = ['#636EFA',
          '#EF553B',
          '#00CC96',
          '#11B1F5',
          '#EF979D',
          '#008796']

distances=df_dist_distribution_cycleways.index



fig = go.Figure()

fig.add_trace(
    go.Bar(name='cycling', 
           x=distances, 
           y=df_dist_distribution_cycleways.cycling,
           textposition='auto',
           marker_color=colors[1] ))
fig.add_trace(
    go.Bar(name='walking', 
           x=distances, 
           y=df_dist_distribution_cycleways.walking,
           textposition='auto',
           marker_color=colors[2] ))


#fig = go.Figure(data=[
#    #go.Bar(name='driving', x=distances, y=df_dist_distribution_cycleways.driving),
#    go.Bar(name='cycling', x=distances, y=df_dist_distribution_cycleways.cycling),
#    go.Bar(name='walking', x=distances, y=df_dist_distribution_cycleways.walking),
#])

fig.update_layout(
    barmode='group',
    title="Distribution of trips per distance category using cycleways",
    xaxis_title="Distance categories",
    yaxis_title="# of trips",
    legend_title="Mode",
    font=dict(
        family="Times New Roman",
        size=18,
        color="Black"
    ))

fig.show()

In [None]:
distances=df_dist_distribution_footways.index

fig = go.Figure(data=[
    go.Bar(name='cycling', x=distances, y=df_dist_distribution_footways.cycling),
    go.Bar(name='walking', x=distances, y=df_dist_distribution_footways.walking),
    go.Bar(name='driving', x=distances, y=df_dist_distribution_footways.driving),
])

fig.update_layout(
    barmode='group',
    title="Distribution of trips per distance category using footways",
    xaxis_title="Distance categories",
    yaxis_title="# of trips",
    legend_title="Mode",
    font=dict(
        family="Courier New, monospace",
        size=12,
        color="Black"
    ))

fig.show()

In [None]:
route_data.describe()

In [None]:
df_distribution_amount_max = pd.DataFrame(columns=['cycling','walking', 'driving'])
df_distribution_amount_mean = pd.DataFrame(columns=['cycling','walking', 'driving'])
df_distribution_amount_min = pd.DataFrame(columns=['cycling','walking', 'driving'])

describe = route_data.describe()
for i in range(0,11):   
    row1 = {}
    row2 = {}
    row3 = {}
    
    
    row1['cycling'] = describe['cycling_waytypes_'+str(i)+'_amount']['max']
    row1['walking'] = describe['walking_waytypes_'+str(i)+'_amount']['max']
    row1['driving'] = describe['driving_waytypes_'+str(i)+'_amount']['max']

    row2['cycling'] = describe['cycling_waytypes_'+str(i)+'_amount']['mean']
    row2['walking'] = describe['walking_waytypes_'+str(i)+'_amount']['mean']
    row2['driving'] = describe['driving_waytypes_'+str(i)+'_amount']['mean']
        
    row3['cycling'] = describe['cycling_waytypes_'+str(i)+'_amount']['min']
    row3['walking'] = describe['walking_waytypes_'+str(i)+'_amount']['min']
    row3['driving'] = describe['driving_waytypes_'+str(i)+'_amount']['min']
    
    df_distribution_amount_max.loc[str(i)] = row1
    df_distribution_amount_mean.loc[str(i)] = row2
    df_distribution_amount_min.loc[str(i)] = row3
    
index=df_distribution_amount_max.index

index = [
    'Unknown',
    'State Road',
    'Road',
    'Street',
    'Path',
    'Track',
    'Cycleway',
    'Footway',
    'Steps',
    'Ferry',
    'Construction']

In [None]:
fig = go.Figure(data=[
    go.Bar(name='driving', x=index, y=df_distribution_amount_mean.driving),
    go.Bar(name='cycling', x=index, y=df_distribution_amount_mean.cycling),
    go.Bar(name='walking', x=index, y=df_distribution_amount_mean.walking),
])

fig.update_layout(
    barmode='group',
    title="Waytype MEAN % of use per mode",
    xaxis_title="Waytype",
    yaxis_title="%",
    legend_title="Mode",
    font=dict(
        family="Times New Roman",
        size=18,
        color="Black"
    ))
fig.update_xaxes(tickangle=-45)
fig.show()

In [None]:
df_distribution_amount_max = pd.DataFrame(columns=['cycling','walking', 'driving'])
df_distribution_amount_mean = pd.DataFrame(columns=['cycling','walking', 'driving'])
df_distribution_amount_min = pd.DataFrame(columns=['cycling','walking', 'driving'])

describe = route_data.describe()
for i in range(0,11):   
    row1 = {}
    row2 = {}
    row3 = {}
    
    
    row1['cycling'] = describe['cycling_waytypes_'+str(i)+'_distance']['max']
    row1['walking'] = describe['walking_waytypes_'+str(i)+'_distance']['max']
    row1['driving'] = describe['driving_waytypes_'+str(i)+'_distance']['max']

    row2['cycling'] = describe['cycling_waytypes_'+str(i)+'_distance']['mean']
    row2['walking'] = describe['walking_waytypes_'+str(i)+'_distance']['mean']
    row2['driving'] = describe['driving_waytypes_'+str(i)+'_distance']['mean']
        
    row3['cycling'] = describe['cycling_waytypes_'+str(i)+'_distance']['min']
    row3['walking'] = describe['walking_waytypes_'+str(i)+'_distance']['min']
    row3['driving'] = describe['driving_waytypes_'+str(i)+'_distance']['min']
    
    df_distribution_amount_max.loc[str(i)] = row1
    df_distribution_amount_mean.loc[str(i)] = row2
    df_distribution_amount_min.loc[str(i)] = row3
    
index=df_distribution_amount_max.index

index = [
    'Unknown',
    'State Road',
    'Road',
    'Street',
    'Path',
    'Track',
    'Cycleway',
    'Footway',
    'Steps',
    'Ferry',
    'Construction']

In [None]:
fig = go.Figure(data=[
    go.Bar(name='driving', x=index, y=df_distribution_amount_mean.driving),
    go.Bar(name='cycling', x=index, y=df_distribution_amount_mean.cycling),
    go.Bar(name='walking', x=index, y=df_distribution_amount_mean.walking),
    
])

fig.update_layout(
    barmode='group',
    title="Waytype MEAN Distance of use per mode",
    xaxis_title="Waytype",
    yaxis_title="Distance [m]",
    legend_title="Mode",
    font=dict(
        family="Times New Roman",
        size=18,
        color="Black"
    ))
fig.update_xaxes(tickangle=-45)
fig.show()

In [None]:
1889+961+171


In [None]:
route_data


In [None]:
a
route_data_sorted = route_data.copy().sort_values(by=['cycling_waytypes_6_distance'], ascending=True).reset_index()


fig = go.Figure()
fig.add_trace(go.Scatter(y=route_data_sorted.index,
                         x=route_data_sorted['cycling_waytypes_6_distance'],
                mode='lines+markers',
                name='cycling'))
fig.update_layout(
    title='Cycleways # Trips & Distance',
    xaxis_title="Distance Cycled in Cycleways",
    yaxis_title="# Trips",
    legend_title="Cycling",
    font=dict(
        family="Courier New, monospace",
        size=12,
        color="Black"
    )
)

fig.show()


In [None]:
a
route_data_sorted = route_data.copy().sort_values(by=['cycling_waytypes_6_amount'], ascending=True).reset_index()


fig = go.Figure()
fig.add_trace(go.Scatter(y=route_data_sorted.index,
                         x=route_data_sorted['cycling_waytypes_6_amount'],
                mode='lines+markers',
                name='cycling'))
fig.update_layout(
    title='Cycleways # Trips & Distance',
    xaxis_title="Distance Cycled in Cycleways",
    yaxis_title="# Trips",
    legend_title="Cycling",
    font=dict(
        family="Courier New, monospace",
        size=12,
        color="Black"
    )
)

fig.show()
plt.show()

# Parishes analysis

In [None]:
gdf_freguesias = gpd.read_file("Lisboa_Freguesias/Lisboa_Freguesias_CAOP2015_TM06.shp")
gdf_freguesias = gdf_freguesias.to_crs(epsg=4326)
gdf_freguesias.geometry.index = gdf_freguesias['DICOFRE']
gdf_freguesias['Freguesia'].index = gdf_freguesias['DICOFRE'].astype('str')

In [None]:
df_IMOB_points = pd.read_csv('data/df_IMOB_points.csv', index_col=0)

In [None]:
for freguesia in df_IMOB_points['freguesia_or'].unique():
    mask_freguesia = df_IMOB_points['freguesia_or'] == (freguesia)
    
    route_data_freguesia = route_data.loc[mask_freguesia]
    
    df_distribution_amount_mean = pd.DataFrame(columns=['cycling','walking', 'driving'])
    
    describe = route_data_freguesia.describe()
    for i in range(0,11):  
        row2 = {}
        row2['cycling'] = describe['cycling_waytypes_'+str(i)+'_amount']['mean']
        row2['walking'] = describe['walking_waytypes_'+str(i)+'_amount']['mean']
        row2['driving'] = describe['driving_waytypes_'+str(i)+'_amount']['mean']
        df_distribution_amount_mean.loc[str(i)] = row2
    
    
    index = [
        'Unknown',
        'State Road',
        'Road',
        'Street',
        'Path',
        'Track',
        'Cycleway',
        'Footway',
        'Steps',
        'Ferry',
        'Construction']
    
    fig = go.Figure(data=[
        go.Bar(name='cycling', x=index, y=df_distribution_amount_mean.cycling),
        go.Bar(name='walking', x=index, y=df_distribution_amount_mean.walking),
        go.Bar(name='driving', x=index, y=df_distribution_amount_mean.driving),
    ])

    fig.update_layout(
        barmode='group',
        title=str(gdf_freguesias['Freguesia'][str(freguesia)])+" - Waytype MEAN % of use per mode",
        xaxis_title="Waytype",
        yaxis_title="%",
        legend_title="Mode",
        font=dict(
            family="Courier New, monospace",
            size=12,
            color="Black"
        ))

    fig.show()

### In parish analysis

In [None]:
mask_same_parish = df_IMOB_points['freguesia_or'] == df_IMOB_points['freguesia_de']
df_IMOB_points_same_freguesia = df_IMOB_points.loc[mask_same_parish]

In [None]:
for freguesia in df_IMOB_points['freguesia_or'].unique():
    mask_freguesia = df_IMOB_points['freguesia_or'] == (freguesia)
    
    route_data_freguesia = route_data.loc[mask_freguesia & mask_same_parish]
    
    df_distribution_amount_mean = pd.DataFrame(columns=['cycling','walking', 'driving'])
    
    describe = route_data_freguesia.describe()
    for i in range(0,11):  
        row2 = {}
        row2['cycling'] = describe['cycling_waytypes_'+str(i)+'_distance']['mean']
        row2['walking'] = describe['walking_waytypes_'+str(i)+'_distance']['mean']
        row2['driving'] = describe['driving_waytypes_'+str(i)+'_distance']['mean']
        df_distribution_amount_mean.loc[str(i)] = row2
    
    
    index = [
        'Unknown',
        'State Road',
        'Road',
        'Street',
        'Path',
        'Track',
        'Cycleway',
        'Footway',
        'Steps',
        'Ferry',
        'Construction']
    
    fig = go.Figure(data=[
        go.Bar(name='cycling', x=index, y=df_distribution_amount_mean.cycling),
        go.Bar(name='walking', x=index, y=df_distribution_amount_mean.walking),
        go.Bar(name='driving', x=index, y=df_distribution_amount_mean.driving),
    ])

    fig.update_layout(
        barmode='group',
        title=str(gdf_freguesias['Freguesia'][str(freguesia)])+" - Waytype MEAN % of use per mode",
        xaxis_title="Waytype",
        yaxis_title="%",
        legend_title="Mode",
        font=dict(
            family="Courier New, monospace",
            size=12,
            color="Black"
        ))

    fig.show()

In [None]:
fig = go.Figure()

# Add traces
fig.add_trace(
    go.Scatter(x=df_points.area, 
               y=df_points.imob_rs_or_diff,
               mode='markers',
               text=x_axis,
               marker_color=df_points.imob_rs_or_diff,
               marker=dict(
                    size=16,
                    color=df_points.imob_rs_or_diff, #set color equal to a variable
                    colorscale='Viridis', # one of plotly colorscales
                    showscale=True
                )
                        ))

fig.update_layout(
    barmode='group',
    title="IMOB-RS Diff over parish area",
    xaxis_title="Area",
    yaxis_title="IMOB trips - RS trips",
    legend_title="Diff",
    font=dict(
        family="Courier New, monospace",
        size=12,
        color="Black"
    ))

fig.show()

# IMOB modes

In [None]:
data_IMOB = []
mean_circuity_IMOB = {
        'circuity_driving-car': [],
        'circuity_driving-hgv': [],
        'circuity_foot-walking': [],
        'circuity_foot-hiking': [],
        'circuity_cycling-regular': [],
        'circuity_cycling-road': [],
        'circuity_cycling-mountain': [],
        'circuity_cycling-electric': [],
    }


for i, data_file in enumerate(sorted(glob.glob('data/dist_time_lisbon_imob_*.csv'))):
    if 'circuity' in data_file:
        continue
    
    print('=====', data_file, '=====')
    df = pd.read_csv(data_file, index_col=0, skiprows=range(1,111515+1))

    df['circuity_driving-car'] = df['driving-car_dist'] / df['haversine_dist']/1000
    df['circuity_driving-hgv'] = df['driving-hgv_dist'] / df['haversine_dist']/1000
    df['circuity_foot-walking'] = df['foot-walking_dist'] / df['haversine_dist']/1000
    df['circuity_foot-hiking'] = df['foot-hiking_dist'] / df['haversine_dist']/1000
    df['circuity_cycling-regular'] = df['cycling-regular_dist'] / df['haversine_dist']/1000
    df['circuity_cycling-road'] = df['cycling-road_dist'] / df['haversine_dist']/1000
    df['circuity_cycling-mountain'] = df['cycling-mountain_dist'] / df['haversine_dist']/1000
    df['circuity_cycling-electric'] = df['cycling-electric_dist'] / df['haversine_dist']/1000

    mean_circuity_IMOB['circuity_driving-car'].append(df['circuity_driving-car'].mean(skipna=True))
    mean_circuity_IMOB['circuity_driving-hgv'].append(df['circuity_driving-hgv'].mean(skipna=True))
    mean_circuity_IMOB['circuity_foot-walking'].append(df['circuity_foot-walking'].mean(skipna=True))
    mean_circuity_IMOB['circuity_foot-hiking'].append(df['circuity_foot-hiking'].mean(skipna=True))
    mean_circuity_IMOB['circuity_cycling-regular'].append(df['circuity_cycling-regular'].mean(skipna=True))
    mean_circuity_IMOB['circuity_cycling-road'].append(df['circuity_cycling-road'].mean(skipna=True))
    mean_circuity_IMOB['circuity_cycling-mountain'].append(df['circuity_cycling-mountain'].mean(skipna=True))
    mean_circuity_IMOB['circuity_cycling-electric'].append(df['circuity_cycling-electric'].mean(skipna=True))
    
    if i == 0:
        drop_indices = np.random.choice(df.index, 11515, replace=False)
    
    df = df.drop(drop_indices)
    #df.to_csv(data_file[:-4]+'_circuity.csv')
    data_IMOB.append(df.reset_index(drop=True))
    
years = [str(i) for i in range(2013, 2021)]
data_aux2 = {}
for i, year in enumerate(years):
    data_aux2[year] = data_IMOB[i]
data_IMOB = data_aux2

In [None]:
mask_IMOB_driving = (
    ['vehicle'] == 'passenger car - as passenger') | \
                    (data_IMOB['2020']['vehicle'] == 'passenger car - as driver') | \
                    (data_IMOB['2020']['vehicle'] == 'van/lorry/tractor/camper') | \
                    (data_IMOB['2020']['vehicle'] == 'motorcycle and moped') | \
                    (data_IMOB['2020']['vehicle'] == 'Táxi (como passageiro)')
mask_IMOB_walking = (data_IMOB['2020']['vehicle'] == 'passenger car - as passenger')
mask_IMOB_cycling = (data_IMOB['2020']['vehicle'] == 'Cycling')

data_IMOB_driving = data_IMOB['2020'].loc[mask_IMOB_driving]
print('Size of driving data:', data_IMOB_driving.shape)
data_IMOB_walking = data_IMOB['2020'].loc[mask_IMOB_walking]
print('Size of walking data:', data_IMOB_walking.shape)
data_IMOB_cycling = data_IMOB['2020'].loc[mask_IMOB_cycling]
print('Size of cycling data:', data_IMOB_cycling.shape)

### % of trip

In [None]:
df_distribution_amount_mean_cycling = pd.DataFrame(columns=['cycling','walking', 'driving'])
df_distribution_amount_mean_driving = pd.DataFrame(columns=['cycling','walking', 'driving'])
df_distribution_amount_mean_walking = pd.DataFrame(columns=['cycling','walking', 'driving'])

describe_cycling = route_data.loc[mask_IMOB_cycling].describe()
describe_walking = route_data.loc[mask_IMOB_walking].describe()
describe_driving = route_data.loc[mask_IMOB_driving].describe()
for i in range(0,11):   
    row1 = {}
    row2 = {}
    row3 = {}

    row1['cycling'] = describe_cycling['cycling_waytypes_'+str(i)+'_amount']['mean']
    row1['walking'] = describe_cycling['walking_waytypes_'+str(i)+'_amount']['mean']
    row1['driving'] = describe_cycling['driving_waytypes_'+str(i)+'_amount']['mean']
    
    row2['cycling'] = describe_walking['cycling_waytypes_'+str(i)+'_amount']['mean']
    row2['walking'] = describe_walking['walking_waytypes_'+str(i)+'_amount']['mean']
    row2['driving'] = describe_walking['driving_waytypes_'+str(i)+'_amount']['mean']
    
    row3['cycling'] = describe_driving['cycling_waytypes_'+str(i)+'_amount']['mean']
    row3['walking'] = describe_driving['walking_waytypes_'+str(i)+'_amount']['mean']
    row3['driving'] = describe_driving['driving_waytypes_'+str(i)+'_amount']['mean']
    
    df_distribution_amount_mean_cycling.loc[str(i)] = row1
    df_distribution_amount_mean_driving.loc[str(i)] = row2
    df_distribution_amount_mean_walking.loc[str(i)] = row3

In [None]:
index = [
    'Unknown',
    'State Road',
    'Road',
    'Street',
    'Path',
    'Track',
    'Cycleway',
    'Footway',
    'Steps',
    'Ferry',
    'Construction']

fig = go.Figure(data=[
    go.Bar(name='cycling', x=index, y=df_distribution_amount_mean_cycling.cycling),
    go.Bar(name='walking', x=index, y=df_distribution_amount_mean_cycling.walking),
    go.Bar(name='driving', x=index, y=df_distribution_amount_mean_cycling.driving),
])

fig.update_layout(
    barmode='group',
    title="Cycling Trips",
    xaxis_title="Waytype",
    yaxis_title="%",
    legend_title="Mode",
    font=dict(
        family="Courier New, monospace",
        size=12,
        color="Black"
    ))

fig.show()

In [None]:
index = [
    'Unknown',
    'State Road',
    'Road',
    'Street',
    'Path',
    'Track',
    'Cycleway',
    'Footway',
    'Steps',
    'Ferry',
    'Construction']

fig = go.Figure(data=[
    go.Bar(name='cycling', x=index, y=df_distribution_amount_mean_walking.cycling),
    go.Bar(name='walking', x=index, y=df_distribution_amount_mean_walking.walking),
    go.Bar(name='driving', x=index, y=df_distribution_amount_mean_walking.driving),
])

fig.update_layout(
    barmode='group',
    title="Walking Trips",
    xaxis_title="Waytype",
    yaxis_title="%",
    legend_title="Mode",
    font=dict(
        family="Courier New, monospace",
        size=12,
        color="Black"
    ))

fig.show()

In [None]:
index = [
    'Unknown',
    'State Road',
    'Road',
    'Street',
    'Path',
    'Track',
    'Cycleway',
    'Footway',
    'Steps',
    'Ferry',
    'Construction']

fig = go.Figure(data=[
    go.Bar(name='cycling', x=index, y=df_distribution_amount_mean_driving.cycling),
    go.Bar(name='walking', x=index, y=df_distribution_amount_mean_driving.walking),
    go.Bar(name='driving', x=index, y=df_distribution_amount_mean_driving.driving),
])

fig.update_layout(
    barmode='group',
    title="Driving Trips",
    xaxis_title="Waytype",
    yaxis_title="%",
    legend_title="Mode",
    font=dict(
        family="Courier New, monospace",
        size=12,
        color="Black"
    ))

fig.show()

### Distance travelled

In [None]:
df_distribution_amount_mean_cycling = pd.DataFrame(columns=['cycling','walking', 'driving'])
df_distribution_amount_mean_driving = pd.DataFrame(columns=['cycling','walking', 'driving'])
df_distribution_amount_mean_walking = pd.DataFrame(columns=['cycling','walking', 'driving'])

describe_cycling = route_data.loc[mask_IMOB_cycling].describe()
describe_walking = route_data.loc[mask_IMOB_walking].describe()
describe_driving = route_data.loc[mask_IMOB_driving].describe()
for i in range(0,11):   
    row1 = {}
    row2 = {}
    row3 = {}

    row1['cycling'] = describe_cycling['cycling_waytypes_'+str(i)+'_distance']['mean']
    row1['walking'] = describe_cycling['walking_waytypes_'+str(i)+'_distance']['mean']
    row1['driving'] = describe_cycling['driving_waytypes_'+str(i)+'_distance']['mean']
    
    row2['cycling'] = describe_walking['cycling_waytypes_'+str(i)+'_distance']['mean']
    row2['walking'] = describe_walking['walking_waytypes_'+str(i)+'_distance']['mean']
    row2['driving'] = describe_walking['driving_waytypes_'+str(i)+'_distance']['mean']
    
    row3['cycling'] = describe_driving['cycling_waytypes_'+str(i)+'_distance']['mean']
    row3['walking'] = describe_driving['walking_waytypes_'+str(i)+'_distance']['mean']
    row3['driving'] = describe_driving['driving_waytypes_'+str(i)+'_distance']['mean']
    
    df_distribution_amount_mean_cycling.loc[str(i)] = row1
    df_distribution_amount_mean_driving.loc[str(i)] = row2
    df_distribution_amount_mean_walking.loc[str(i)] = row3

In [None]:
index = [
    'Unknown',
    'State Road',
    'Road',
    'Street',
    'Path',
    'Track',
    'Cycleway',
    'Footway',
    'Steps',
    'Ferry',
    'Construction']

fig = go.Figure(data=[
    go.Bar(name='cycling', x=index, y=df_distribution_amount_mean_cycling.cycling),
    go.Bar(name='walking', x=index, y=df_distribution_amount_mean_cycling.walking),
    go.Bar(name='driving', x=index, y=df_distribution_amount_mean_cycling.driving),
])

fig.update_layout(
    barmode='group',
    title="Cycling Trips",
    xaxis_title="Waytype",
    yaxis_title="%",
    legend_title="Mode",
    font=dict(
        family="Courier New, monospace",
        size=12,
        color="Black"
    ))

fig.show()

In [None]:
index = [
    'Unknown',
    'State Road',
    'Road',
    'Street',
    'Path',
    'Track',
    'Cycleway',
    'Footway',
    'Steps',
    'Ferry',
    'Construction']

fig = go.Figure(data=[
    go.Bar(name='cycling', x=index, y=df_distribution_amount_mean_walking.cycling),
    go.Bar(name='walking', x=index, y=df_distribution_amount_mean_walking.walking),
    go.Bar(name='driving', x=index, y=df_distribution_amount_mean_walking.driving),
])

fig.update_layout(
    barmode='group',
    title="Walking Trips",
    xaxis_title="Waytype",
    yaxis_title="%",
    legend_title="Mode",
    font=dict(
        family="Courier New, monospace",
        size=12,
        color="Black"
    ))

fig.show()

In [None]:
index = [
    'Unknown',
    'State Road',
    'Road',
    'Street',
    'Path',
    'Track',
    'Cycleway',
    'Footway',
    'Steps',
    'Ferry',
    'Construction']

fig = go.Figure(data=[
    go.Bar(name='cycling', x=index, y=df_distribution_amount_mean_driving.cycling),
    go.Bar(name='walking', x=index, y=df_distribution_amount_mean_driving.walking),
    go.Bar(name='driving', x=index, y=df_distribution_amount_mean_driving.driving),
])

fig.update_layout(
    barmode='group',
    title="Driving Trips",
    xaxis_title="Waytype",
    yaxis_title="%",
    legend_title="Mode",
    font=dict(
        family="Courier New, monospace",
        size=12,
        color="Black"
    ))

fig.show()

### Distances per category of total distances

In [None]:
data_IMOB['2020']['haversine_dist']

In [None]:
data_IMOB['2020']['haversine_dist']

In [None]:
dist_thresholds = [[0, 1],
                   [1, 2],
                   [2, 8],
                   [8, 12],
                   [12, float('inf')]]

masks_distances = {}
for dist in dist_thresholds:
    masks_distances[str(dist[0])+'_'+str(dist[1])] = (data_IMOB['2020']['haversine_dist']>=dist[0]) & (data_IMOB['2020']['haversine_dist']<dist[1])

In [None]:
for i, dist in enumerate(dist_thresholds):
    data_IMOB_dist = data_IMOB['2020'].loc[masks_distances[str(dist[0])+'_'+str(dist[1])]]
    print('Size of trips:',data_IMOB_dist.shape)
    mask_IMOB_driving = (data_IMOB_dist['vehicle'] == 'passenger car - as passenger') | \
                        (data_IMOB_dist['vehicle'] == 'passenger car - as driver') | \
                        (data_IMOB_dist['vehicle'] == 'van/lorry/tractor/camper') | \
                        (data_IMOB_dist['vehicle'] == 'motorcycle and moped') | \
                        (data_IMOB_dist['vehicle'] == 'Táxi (como passageiro)')
    mask_IMOB_walking = (data_IMOB_dist['vehicle'] == 'passenger car - as passenger')
    mask_IMOB_cycling = (data_IMOB_dist['vehicle'] == 'Cycling')
    
    print(str(dist[0])+'_'+str(dist[1]))
    
    data_IMOB_driving = data_IMOB_dist.loc[mask_IMOB_driving]
    print('Size of driving data:', data_IMOB_driving.shape)
    data_IMOB_walking = data_IMOB_dist.loc[mask_IMOB_walking]
    print('Size of walking data:', data_IMOB_walking.shape)
    data_IMOB_cycling = data_IMOB_dist.loc[mask_IMOB_cycling]
    print('Size of cycling data:', data_IMOB_cycling.shape)
    
    
    df_distribution_amount_mean_cycling = pd.DataFrame(columns=['cycling','walking', 'driving'])
    df_distribution_amount_mean_driving = pd.DataFrame(columns=['cycling','walking', 'driving'])
    df_distribution_amount_mean_walking = pd.DataFrame(columns=['cycling','walking', 'driving'])

    describe_cycling = route_data.loc[(mask_IMOB_cycling) & (data_IMOB_dist)].describe()
    describe_walking = route_data.loc[(mask_IMOB_walking) & (data_IMOB_dist)].describe()
    describe_driving = route_data.loc[(mask_IMOB_driving) & (data_IMOB_dist)].describe()
    for i in [1,2,3,6,7]:   
        row1 = {}
        row2 = {}
        row3 = {}

        row1['cycling'] = describe_cycling['cycling_waytypes_'+str(i)+'_amount']['mean']
        row1['walking'] = describe_cycling['walking_waytypes_'+str(i)+'_amount']['mean']
        row1['driving'] = describe_cycling['driving_waytypes_'+str(i)+'_amount']['mean']

        row2['cycling'] = describe_walking['cycling_waytypes_'+str(i)+'_amount']['mean']
        row2['walking'] = describe_walking['walking_waytypes_'+str(i)+'_amount']['mean']
        row2['driving'] = describe_walking['driving_waytypes_'+str(i)+'_amount']['mean']

        row3['cycling'] = describe_driving['cycling_waytypes_'+str(i)+'_amount']['mean']
        row3['walking'] = describe_driving['walking_waytypes_'+str(i)+'_amount']['mean']
        row3['driving'] = describe_driving['driving_waytypes_'+str(i)+'_amount']['mean']

        df_distribution_amount_mean_cycling.loc[str(i)] = row1
        df_distribution_amount_mean_driving.loc[str(i)] = row2
        df_distribution_amount_mean_walking.loc[str(i)] = row3
    index = [
        'Unknown',
        'State Road',
        'Road',
        'Street',
        'Path',
        'Track',
        'Cycleway',
        'Footway',
        'Steps',
        'Ferry',
        'Construction']

    fig = go.Figure(data=[
        go.Bar(name='cycling', x=index, y=df_distribution_amount_mean_cycling.cycling),
        go.Bar(name='walking', x=index, y=df_distribution_amount_mean_cycling.walking),
        go.Bar(name='driving', x=index, y=df_distribution_amount_mean_cycling.driving),
    ])

    fig.update_layout(
        barmode='group',
        title="Cycling Trips for "+str(dist[0])+'_'+str(dist[1]),
        xaxis_title="Waytype",
        yaxis_title="%",
        legend_title="Mode",
        font=dict(
            family="Courier New, monospace",
            size=12,
            color="Black"
        ))

    fig.show() 
    plt.show()
    
    

In [None]:

data_dist_cycleways_cycling = {}
data_dist_cycleways_walking = {}
data_dist_cycleways_driving = {}

data_dist_footways_cycling = {}
data_dist_footways_walking = {}
data_dist_footways_driving = {}

for dist in dist_thresholds:
    
    
    
    
    
    data_dist_cycleways_cycling[str(dist[0])+'_'+str(dist[1])] = \
        route_data[(route_data['cycling_waytypes_6_amount']>=dist[0]) & (route_data['cycling_waytypes_6_amount']<dist[1])]
    data_dist_cycleways_walking[str(dist[0])+'_'+str(dist[1])] = \
        route_data[(route_data['walking_waytypes_6_amount']>=dist[0]) & (route_data['walking_waytypes_6_amount']<dist[1])]
    data_dist_cycleways_driving[str(dist[0])+'_'+str(dist[1])] = \
        route_data[(route_data['driving_waytypes_6_amount']>=dist[0]) & (route_data['driving_waytypes_6_amount']<dist[1])]

for dist in dist_thresholds:
    data_dist_footways_cycling[str(dist[0])+'_'+str(dist[1])] = \
        route_data[(route_data['cycling_waytypes_7_amount']>=dist[0]) & (route_data['cycling_waytypes_7_amount']<dist[1])]
    data_dist_footways_walking[str(dist[0])+'_'+str(dist[1])] = \
        route_data[(route_data['walking_waytypes_7_amount']>=dist[0]) & (route_data['walking_waytypes_7_amount']<dist[1])]
    data_dist_footways_driving[str(dist[0])+'_'+str(dist[1])] = \
        route_data[(route_data['driving_waytypes_7_amount']>=dist[0]) & (route_data['driving_waytypes_7_amount']<dist[1])]
     