In [None]:
from glob import glob
import json
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.figure_factory as ff
import plotly.graph_objects as go
from plotly.subplots import make_subplots
from tqdm.notebook import tqdm
from sklearn.linear_model import LinearRegression

In [None]:
pd.set_option('display.max_columns', None)

In [None]:
cluster_colors = [
    'darkgreen', 
    'chocolate', 
    'dodgerblue', 
    'mediumvioletred', 
    'midnightblue',
]

## Read data

In [None]:
city_data = {}
for csv_file in sorted(glob('simulated_routes-on_streets/data_*.csv')):
    city_name = csv_file.split('_')[-1].replace('.csv', '')
    city_data[city_name] = {'data': pd.read_csv(csv_file)}
    print('City:', city_name.ljust(15), 'Points:', str(city_data[city_name]['data'].shape[0]).ljust(10),)

Total number of points

In [None]:
total_points = 0
for city in city_data.keys():
    total_points += city_data[city_name]['data'].shape[0]
print('Total OD Pairs:', total_points, '\tTotal Trips:', total_points*3)

Trips above 12 km

In [None]:
big_trips = 0
for city in city_data.keys():
    big_trips += (city_data[city_name]['data']['haversine_dist'] > 12).sum()

print('Total # trips above 12 km:', big_trips, '({})'.format(big_trips/total_points*100))

## Compute circuity values

In [None]:
for city_name in city_data.keys():
    city = city_data[city_name]['data']
    city['circuity_driving'] = city['driving-car_dist'] / city['haversine_dist']/1000
    city['circuity_cycling'] = city['cycling-regular_dist'] / city['haversine_dist']/1000
    city['circuity_walking'] = city['foot-walking_dist'] / city['haversine_dist']/1000

### Discard circuity errors (less than 1 & larger than 10)

In [None]:
for city_name in city_data.keys():
    city_data[city_name]['data'] = city_data[city_name]['data'][(city_data[city_name]['data']['circuity_driving']<10) & (city_data[city_name]['data']['circuity_driving']>=1) & 
                                                                (city_data[city_name]['data']['circuity_cycling']<10) & (city_data[city_name]['data']['circuity_cycling']>=1) & 
                                                                (city_data[city_name]['data']['circuity_walking']<10) & (city_data[city_name]['data']['circuity_walking']>=1) ]

### Compute average results

In [None]:
city_data_avg = {}

pbar = tqdm(city_data.keys())
for city_name in pbar:
    pbar.set_description(city_name.ljust(20))

    city = city_data[city_name]['data']
    
    city_data_avg[city_name] = {}
    metrics = city.describe()
    
    for i, row in metrics.iterrows():
        if i == 'count':
            continue

        for j, col in metrics.items():
            if j == 'Unnamed: 0':
                continue

            key = i + '_' + j

            city_data_avg[city_name][key] = col[i]

Print results

In [None]:
#print(json.dumps(city_data_avg, indent=2, ))

### Compute circuities for different trip distances

In [None]:
city_data_dist = {}
dist_thresholds = [[0, 2],
                   [2, 8],
                   [8, 12],
                   [12, float('inf')]]

pbar = tqdm(city_data.keys())
for city_name in pbar:
    pbar.set_description(city_name.ljust(20))
    city_data_dist[city_name] = {}
    
    for dist in dist_thresholds:
        city_data_dist[city_name][str(dist[0])+'_'+str(dist[1])] = \
            city_data[city_name]['data'][ (city_data[city_name]['data']['haversine_dist']>=dist[0]) & (city_data[city_name]['data']['haversine_dist']<dist[1]) ]

# Plots

## Plot circuity distributions for each city

In [None]:
pbar = tqdm(city_data.keys())
for city_name in pbar:
    pbar.set_description(city_name.ljust(20))
    
    city = city_data[city_name]['data']
    cir_driving = city['circuity_driving']
    cir_cycling = city['circuity_cycling']
    cir_walking = city['circuity_walking']
    
    hist_data = [cir_driving, cir_cycling, cir_walking]

    group_labels = ['Driving', 'Cycling', 'Walking']
    colors = ['#E3421B', '#445EFF', '#4FD76A']

    fig = ff.create_distplot(hist_data, group_labels, colors=colors,
                             bin_size=[0.2, 0.2, 0.2], show_curve=False)

    # Add title
    fig.update(layout_title_text=city_name)
    #fig.show()
    fig.write_html('images/circuity_{}.html'.format(city_name))

### Individual circuity plot per mode

In [None]:
cities = []
driving = []
cycling = []
walking = []

for city_name in pbar:
    cities.append(city_name.capitalize())
    driving.append(city_data_avg[city_name]['mean_circuity_driving'])
    cycling.append(city_data_avg[city_name]['mean_circuity_cycling'])
    walking.append(city_data_avg[city_name]['mean_circuity_walking'])

city_data_avg_df = pd.DataFrame({'City': cities,
                                 'Driving': driving,
                                 'Cycling': cycling,
                                 'Walking': walking,
                                })

In [None]:
city_data_avg_df = city_data_avg_df.sort_values(by='Cycling')

data=[
    go.Scatter(x=city_data_avg_df['City'], 
               y=city_data_avg_df['Driving'],
               mode='markers',
               name='Driving',
               marker=dict(size=12,
                           color='indianred',
                           line=dict(width=2,
                                     color='black',
                                    )
                          ),
              ),
    go.Scatter(x=city_data_avg_df['City'], 
               y=city_data_avg_df['Cycling'],
               mode='lines+markers',
               name='Cycling',
               marker=dict(size=12,
                           color='cornflowerblue',
                           line=dict(width=2,
                                     color='black',
                                    )
                          ),
              ),
    go.Scatter(x=city_data_avg_df['City'], 
               y=city_data_avg_df['Walking'],
               mode='markers',
               name='Walking',
               marker=dict(size=12,
                           color='lightseagreen',
                           line=dict(width=2,
                                     color='black',
                                    )
                          ),
              ),
    ]

layout = go.Layout(
#        title='Average Circuity',
        paper_bgcolor='rgba(0,0,0,0)',
        plot_bgcolor='rgba(0,0,0,0)',
        width=1200, height=600,
        template='plotly_white',
        margin=dict(l=20, r=20, t=20, b=20),
#        xaxis_title="Cities",
#        yaxis_title="Circuity",
        font=dict(
            family="Times New Roman",
            size=16,
            color="Black"
        ),
        legend=dict(
            yanchor="top",
            y=0.95,
            xanchor="left",
            x=0.01
        ),
        xaxis=dict(
            title='Cities',
            showline=True, linewidth=2, linecolor='black',
            tickangle=-60,
        ),
        yaxis=dict(
            title='Circuity',
            showline=True, linewidth=2, linecolor='black',
            range=[1, 2],
        ),
    )


fig = go.Figure(data=data,
                layout=layout,
               )

'indianred'
'lightsalmon',
'lightseagreen', 

#fig.update_layout(barmode='group')
fig.show()

Average Driving, Cycling and Walking circuity

In [None]:
print('Average Circuity for:\n\t-Driving:', city_data_avg_df['Driving'].mean(), '\n\t-Cycling:', city_data_avg_df['Cycling'].mean(), '\n\t-Walking:', city_data_avg_df['Walking'].mean())

Spot similar circuities across the modes

In [None]:
city_data_avg_df.index = city_data_avg_df['City']
city_data_avg_df['absdiff_driving_cycling'] = (city_data_avg_df['Driving'] - city_data_avg_df['Cycling']).abs()
city_data_avg_df['absdiff_cycling_walking'] = (city_data_avg_df['Cycling'] - city_data_avg_df['Walking']).abs()

In [None]:
city_data_avg_df[['absdiff_driving_cycling']].T < 0.05

In [None]:
city_data_avg_df[['absdiff_cycling_walking']].T < 0.07

Average circuity across all modes

In [None]:
city_data_avg_df[['Driving', 'Cycling', 'Walking']].mean(axis=1).sort_values().T

# Aggregate data metrics into a single dataframe

Complete df

In [None]:
pbar = tqdm(city_data.keys())
pbar1 = tqdm(dist_thresholds)
for city_name in pbar:
    pbar.set_description(city_name.ljust(20))
    
    for dist in pbar1:
        pbar1.set_description((str(dist[0])+'_'+str(dist[1])).ljust(20))
        dist_code = (str(dist[0])+'_'+str(dist[1]))
        
        city_data_avg[city_name]['mean_circuity_driving:'+dist_code] = city_data_dist[city_name][dist_code]['circuity_driving'].mean()
        city_data_avg[city_name]['mean_circuity_cycling:'+dist_code] = city_data_dist[city_name][dist_code]['circuity_cycling'].mean()
        city_data_avg[city_name]['mean_circuity_walking:'+dist_code] = city_data_dist[city_name][dist_code]['circuity_walking'].mean()

Initialize df with previously computed data

In [None]:
data_df = pd.DataFrame(city_data_avg).T

In [None]:
array = []
for city_name in data_df.index:
    array.append(city_name.capitalize())
data_df.index = array

In [None]:
data_df.head()

In [None]:
data_df.to_csv('simulated_routes-on_streets/metrics.csv')

# Circuity plots per transport mode per distance

In [None]:
data_df = data_df.sort_index()#sort_values(by='mean_circuity_driving:2_8')

data=[
    go.Scatter(x=data_df.index, 
               y=data_df['mean_circuity_driving:0_2'],
               mode='markers',
               name='0–2 km',
               marker=dict(size=12,
                           color='indianred',
                           line=dict(width=2,
                                     color='black',
                                    ),
                           symbol='diamond',
                          ),
              ),
    go.Scatter(x=data_df.index, 
               y=data_df['mean_circuity_driving:2_8'],
               mode='markers',
               name='2–8 km',
               marker=dict(size=12,
                           color='indianred',
                           line=dict(width=2,
                                     color='black',
                                    ),
                           symbol='star-triangle-up',
                          ),
              ),
    go.Scatter(x=data_df.index, 
               y=data_df['mean_circuity_driving:8_12'],
               mode='markers',
               name='8–12 km',
               marker=dict(size=12,
                           color='indianred',
                           line=dict(width=2,
                                     color='black',
                                    ),
                           symbol='hexagon',
                          ),
              ),
    ]

layout = go.Layout(
#        title='Average Circuity',
        paper_bgcolor='rgba(0,0,0,0)',
        plot_bgcolor='rgba(0,0,0,0)',
        width=1200, height=600,
        template='plotly_white',
        margin=dict(l=20, r=20, t=20, b=20),
#        xaxis_title="Cities",
#        yaxis_title="Circuity",
        font=dict(
            family="Times New Roman",
            size=18,
            color="Black"
        ),
        legend=dict(
            yanchor="top",
            y=0.95,
            xanchor="left",
            x=0.01
        ),
        xaxis=dict(
            title='Cities',
            showline=True, linewidth=2, linecolor='black',
            tickangle=-60,
        ),
        yaxis=dict(
            title='Circuity',
            showline=True, linewidth=2, linecolor='black',
            range=[1, 2.5],
        ),
    )


fig = go.Figure(data=data,
                layout=layout,
               )

#fig.update_layout(barmode='group')
fig.show()

In [None]:
#data_df = data_df.sort_values(by='mean_circuity_cycling:2_8')
data_df = data_df.sort_index()

data=[
    go.Scatter(x=data_df.index, 
               y=data_df['mean_circuity_cycling:0_2'],
               mode='markers',
               name='0–2 km',
               marker=dict(size=12,
                           color='cornflowerblue',
                           line=dict(width=2,
                                     color='black',
                                    ),
                           symbol='diamond',
                          ),
              ),
    go.Scatter(x=data_df.index, 
               y=data_df['mean_circuity_cycling:2_8'],
               mode='markers',
               name='2–8 km',
               marker=dict(size=12,
                           color='cornflowerblue',
                           line=dict(width=2,
                                     color='black',
                                    ),
                           symbol='star-triangle-up',
                          ),
              ),
    go.Scatter(x=data_df.index, 
               y=data_df['mean_circuity_cycling:8_12'],
               mode='markers',
               name='8–12 km',
               marker=dict(size=12,
                           color='cornflowerblue',
                           line=dict(width=2,
                                     color='black',
                                    ),
                           symbol='hexagon',
                          ),
              ),
    ]

layout = go.Layout(
#        title='Average Circuity',
        paper_bgcolor='rgba(0,0,0,0)',
        plot_bgcolor='rgba(0,0,0,0)',
        width=1200, height=600,
        template='plotly_white',
        margin=dict(l=20, r=20, t=20, b=20),
#        xaxis_title="Cities",
#        yaxis_title="Circuity",
        font=dict(
            family="Times New Roman",
            size=18,
            color="Black"
        ),
        legend=dict(
            yanchor="top",
            y=0.95,
            xanchor="left",
            x=0.01
        ),
        xaxis=dict(
            title='Cities',
            showline=True, linewidth=2, linecolor='black',
            tickangle=-60,
        ),
        yaxis=dict(
            title='Circuity',
            showline=True, linewidth=2, linecolor='black',
            range=[1, 2.5],
        ),
    )


fig = go.Figure(data=data,
                layout=layout,
               )

#fig.update_layout(barmode='group')
fig.show()

In [None]:
#data_df = data_df.sort_values(by='mean_circuity_walking:2_8')
data_df = data_df.sort_index()

data=[
    go.Scatter(x=data_df.index, 
               y=data_df['mean_circuity_walking:0_2'],
               mode='markers',
               name='0–2 km',
               marker=dict(size=12,
                           color='lightseagreen',
                           line=dict(width=2,
                                     color='black',
                                    ),
                           symbol='diamond',
                          ),
              ),
    go.Scatter(x=data_df.index, 
               y=data_df['mean_circuity_walking:2_8'],
               mode='markers',
               name='2–8 km',
               marker=dict(size=12,
                           color='lightseagreen',
                           line=dict(width=2,
                                     color='black',
                                    ),
                           symbol='star-triangle-up',
                          ),
              ),
    go.Scatter(x=data_df.index, 
               y=data_df['mean_circuity_walking:8_12'],
               mode='markers',
               name='8–12 km',
               marker=dict(size=12,
                           color='lightseagreen',
                           line=dict(width=2,
                                     color='black',
                                    ),
                           symbol='hexagon',
                          ),
              ),
    ]

layout = go.Layout(
#        title='Average Circuity',
        paper_bgcolor='rgba(0,0,0,0)',
        plot_bgcolor='rgba(0,0,0,0)',
        width=1200, height=600,
        template='plotly_white',
        margin=dict(l=20, r=20, t=20, b=20),
#        xaxis_title="Cities",
#        yaxis_title="Circuity",
        font=dict(
            family="Times New Roman",
            size=18,
            color="Black"
        ),
        legend=dict(
            yanchor="top",
            y=0.95,
            xanchor="left",
            x=0.01
        ),
        xaxis=dict(
            title='Cities',
            showline=True, linewidth=2, linecolor='black',
            tickangle=-60,
        ),
        yaxis=dict(
            title='Circuity',
            showline=True, linewidth=2, linecolor='black',
            range=[1, 2.5],
        ),
    )


fig = go.Figure(data=data,
                layout=layout,
               )

#fig.update_layout(barmode='group')
fig.show()

In [None]:
data_df = data_df.sort_values(by='mean_circuity_cycling:2_8')

data=[
    go.Scatter(x=data_df.index, 
               y=data_df['mean_circuity_driving:0_2'],
               mode='markers',
               name='0–2 km',
               marker=dict(size=12,
                           color='indianred',
                           line=dict(width=2,
                                     color='black',
                                    ),
                           symbol='diamond',
                          ),
              ),
    go.Scatter(x=data_df.index, 
               y=data_df['mean_circuity_driving:2_8'],
               mode='lines+markers',
               name='2–8 km',
               marker=dict(size=12,
                           color='indianred',
                           line=dict(width=2,
                                     color='black',
                                    ),
                           symbol='star-triangle-up',
                          ),
              ),
    go.Scatter(x=data_df.index, 
               y=data_df['mean_circuity_driving:8_12'],
               mode='markers',
               name='8–12 km',
               marker=dict(size=12,
                           color='indianred',
                           line=dict(width=2,
                                     color='black',
                                    ),
                           symbol='hexagon',
                          ),
              ),
    go.Scatter(x=data_df.index, 
               y=data_df['mean_circuity_cycling:0_2'],
               mode='markers',
               name='0–2 km',
               marker=dict(size=12,
                           color='cornflowerblue',
                           line=dict(width=2,
                                     color='black',
                                    ),
                           symbol='diamond',
                          ),
              ),
    go.Scatter(x=data_df.index, 
               y=data_df['mean_circuity_cycling:2_8'],
               mode='lines+markers',
               name='2–8 km',
               marker=dict(size=12,
                           color='cornflowerblue',
                           line=dict(width=2,
                                     color='black',
                                    ),
                           symbol='star-triangle-up',
                          ),
              ),
    go.Scatter(x=data_df.index, 
               y=data_df['mean_circuity_cycling:8_12'],
               mode='markers',
               name='8–12 km',
               marker=dict(size=12,
                           color='cornflowerblue',
                           line=dict(width=2,
                                     color='black',
                                    ),
                           symbol='hexagon',
                          ),
              ),
    go.Scatter(x=data_df.index, 
               y=data_df['mean_circuity_walking:0_2'],
               mode='markers',
               name='0–2 km',
               marker=dict(size=12,
                           color='lightseagreen',
                           line=dict(width=2,
                                     color='black',
                                    ),
                           symbol='diamond',
                          ),
              ),
    go.Scatter(x=data_df.index, 
               y=data_df['mean_circuity_walking:2_8'],
               mode='lines+markers',
               name='2–8 km',
               marker=dict(size=12,
                           color='lightseagreen',
                           line=dict(width=2,
                                     color='black',
                                    ),
                           symbol='star-triangle-up',
                          ),
              ),
    go.Scatter(x=data_df.index, 
               y=data_df['mean_circuity_walking:8_12'],
               mode='markers',
               name='8–12 km',
               marker=dict(size=12,
                           color='lightseagreen',
                           line=dict(width=2,
                                     color='black',
                                    ),
                           symbol='hexagon',
                          ),
              ),
    ]

layout = go.Layout(
#        title='Average Circuity',
        paper_bgcolor='rgba(0,0,0,0)',
        plot_bgcolor='rgba(0,0,0,0)',
        width=1200, height=600,
        template='plotly_white',
        margin=dict(l=20, r=20, t=20, b=20),
#        xaxis_title="Cities",
#        yaxis_title="Circuity",
        font=dict(
            family="Times New Roman",
            size=16,
            color="Black"
        ),
        legend=dict(
            yanchor="top",
            y=0.95,
            xanchor="left",
            x=0.01
        ),
        xaxis=dict(
            title='Cities',
            showline=True, linewidth=2, linecolor='black',
            tickangle=-60,
        ),
        yaxis=dict(
            title='Circuity',
            showline=True, linewidth=2, linecolor='black',
            range=[1, 2.5],
        ),
    )


fig = go.Figure(data=data,
                layout=layout,
               )

#fig.update_layout(barmode='group')
fig.show()

Joint Figure

In [None]:
fig = make_subplots(rows=3, cols=1, 
                    specs = [[{}],[{}],[{}]], vertical_spacing = .03)

data_df = data_df.sort_index()#sort_values(by='mean_circuity_driving:2_8')

fig.append_trace(
    go.Scatter(x=data_df.index, 
               y=data_df['mean_circuity_driving:0_2'],
               mode='markers',
               name='0–2 km',
               marker=dict(size=12,
                           color='indianred',
                           line=dict(width=2,
                                     color='black',
                                    ),
                           symbol='diamond',
                          ),
               legendgroup='driving',
               legendgrouptitle_text='Driving',
              ),
    row=1, col=1)
fig.append_trace(
    go.Scatter(x=data_df.index, 
               y=data_df['mean_circuity_driving:2_8'],
               mode='markers',
               name='2–8 km',
               marker=dict(size=12,
                           color='indianred',
                           line=dict(width=2,
                                     color='black',
                                    ),
                           symbol='star-triangle-up',
                          ),
               legendgroup='driving',
              ),
    row=1, col=1)
fig.append_trace(
    go.Scatter(x=data_df.index, 
               y=data_df['mean_circuity_driving:8_12'],
               mode='markers',
               name='8–12 km',
               marker=dict(size=12,
                           color='indianred',
                           line=dict(width=2,
                                     color='black',
                                    ),
                           symbol='hexagon',
                          ),
               legendgroup='driving',
              ),
    row=1, col=1)

# CYCLING
fig.append_trace(
    go.Scatter(x=data_df.index, 
               y=data_df['mean_circuity_cycling:0_2'],
               mode='markers',
               name='0–2 km',
               marker=dict(size=12,
                           color='cornflowerblue',
                           line=dict(width=2,
                                     color='black',
                                    ),
                           symbol='diamond',
                          ),
               legendgroup='cycling',
               legendgrouptitle_text='Cycling',
              ),
    row=2, col=1)
fig.append_trace(
    go.Scatter(x=data_df.index, 
               y=data_df['mean_circuity_cycling:2_8'],
               mode='markers',
               name='2–8 km',
               marker=dict(size=12,
                           color='cornflowerblue',
                           line=dict(width=2,
                                     color='black',
                                    ),
                           symbol='star-triangle-up',
                          ),
               legendgroup='cycling',
              ),
    row=2, col=1)
fig.append_trace(
    go.Scatter(x=data_df.index, 
               y=data_df['mean_circuity_cycling:8_12'],
               mode='markers',
               name='8–12 km',
               marker=dict(size=12,
                           color='cornflowerblue',
                           line=dict(width=2,
                                     color='black',
                                    ),
                           symbol='hexagon',
                          ),
               legendgroup='cycling',
              ),
    row=2, col=1)

# WALKING
fig.append_trace(
    go.Scatter(x=data_df.index, 
               y=data_df['mean_circuity_walking:0_2'],
               mode='markers',
               name='0–2 km',
               marker=dict(size=12,
                           color='lightseagreen',
                           line=dict(width=2,
                                     color='black',
                                    ),
                           symbol='diamond',
                          ),
               legendgroup='walking',
               legendgrouptitle_text='Walking',
              ),
    row=3, col=1)
fig.append_trace(
    go.Scatter(x=data_df.index, 
               y=data_df['mean_circuity_walking:2_8'],
               mode='markers',
               name='2–8 km',
               marker=dict(size=12,
                           color='lightseagreen',
                           line=dict(width=2,
                                     color='black',
                                    ),
                           symbol='star-triangle-up',
                          ),
               legendgroup='walking',
              ),
    row=3, col=1)
fig.append_trace(
    go.Scatter(x=data_df.index, 
               y=data_df['mean_circuity_walking:8_12'],
               mode='markers',
               name='8–12 km',
               marker=dict(size=12,
                           color='lightseagreen',
                           line=dict(width=2,
                                     color='black',
                                    ),
                           symbol='hexagon',
                          ),
               legendgroup='walking',
              ),
    row=3, col=1)

layout = go.Layout(
#        title='Average Circuity',
        paper_bgcolor='rgba(0,0,0,0)',
        plot_bgcolor='rgba(0,0,0,0)',
        width=1200, height=1200,
        template='plotly_white',
#        margin=dict(l=20, r=20, t=20, b=20),
        font=dict(
            family="Times New Roman",
            size=18,
            color="Black"
        ),
        showlegend=True,
        legend=dict(
            orientation='h',
            #yanchor='bottom',
            #y=1.02,
            #xanchor='right',
            #x=1,
            bordercolor='black',
            borderwidth=2,
            x=0.36,
            y=0.1,
            xref="container",
            yref="container",
        )
    )

for i, j in enumerate(['Driving', 'Cycling', 'Walking']):
    fig.update_yaxes(title=f'{j}\nCircuity',
                     showline=True, linewidth=2, linecolor='black',
                     #zeroline=True, zerolinewidth=2, zerolinecolor='black',
                     range=[1, 2.5],
                     row=i+1, col=1)
    fig.update_xaxes(title='Cities',
                     dtick=[],
                     showline=True, linewidth=2, linecolor='black',
                     #zeroline=True, zerolinewidth=2, zerolinecolor='black',
                     tickangle=-60,
                     row=i+1, col=1)

fig.update_layout(layout)
fig.update_layout(xaxis_showticklabels=False, 
                  xaxis2_showticklabels=False, 
                 )




#fig.update_layout(barmode='group')
fig.show()

In [None]:
data_df[['mean_circuity_driving:0_2', 'mean_circuity_driving:2_8', 'mean_circuity_driving:8_12']].mean().diff()

In [None]:
data_df[['mean_circuity_cycling:0_2', 'mean_circuity_cycling:2_8', 'mean_circuity_cycling:8_12']].mean().diff()

In [None]:
data_df[['mean_circuity_walking:0_2', 'mean_circuity_walking:2_8', 'mean_circuity_walking:8_12']].mean().diff()

# Circuity plots for transport modes per distance

In [None]:
for interval in ['0_2', '2_8', '8_12']:
    data_df = data_df.sort_values(by=f'mean_circuity_cycling:{interval}')
    
    data=[
        go.Scatter(x=data_df.index, 
                   y=data_df[f'mean_circuity_driving:{interval}'],
                   mode='markers',
                   name='Driving',
                   marker=dict(size=12,
                               color='indianred',
                               line=dict(width=2,
                                         color='black',
                                        ),
                               symbol='diamond',
                              ),
                  ),
        go.Scatter(x=data_df.index, 
                   y=data_df[f'mean_circuity_cycling:{interval}'],
                   mode='lines+markers',
                   name='Cycling',
                   marker=dict(size=12,
                               color='cornflowerblue',
                               line=dict(width=2,
                                         color='black',
                                        ),
                               symbol='star-triangle-up',
                              ),
                  ),
        go.Scatter(x=data_df.index, 
                   y=data_df[f'mean_circuity_walking:{interval}'],
                   mode='markers',
                   name='Walking',
                   marker=dict(size=12,
                               color='lightseagreen',
                               line=dict(width=2,
                                         color='black',
                                        ),
                               symbol='hexagon',
                              ),
                  ),
        ]
    
    layout = go.Layout(
    #        title='Average Circuity',
            paper_bgcolor='rgba(0,0,0,0)',
            plot_bgcolor='rgba(0,0,0,0)',
            width=1200, height=600,
            template='plotly_white',
            margin=dict(l=20, r=20, t=20, b=20),
    #        xaxis_title="Cities",
    #        yaxis_title="Circuity",
            font=dict(
                family="Times New Roman",
                size=16,
                color="Black"
            ),
            legend=dict(
                yanchor="top",
                y=0.95,
                xanchor="left",
                x=0.01
            ),
            xaxis=dict(
                title='Cities',
                showline=True, linewidth=2, linecolor='black',
                tickangle=-60,
            ),
            yaxis=dict(
                title='Circuity',
                showline=True, linewidth=2, linecolor='black',
                range=[1, 2.5],
            ),
        )
    
    
    fig = go.Figure(data=data,
                    layout=layout,
                   )
    
    #fig.update_layout(barmode='group')
    fig.show()

# Law of Detour Ratios

In [None]:
detour_law = []
for city_name in city_data.keys():

    X = city_data[city_name]['data'][['haversine_dist']]
    city_ = {}
    city_['City'] = city_name.capitalize()
    
    for mode, pretty_mode in zip(['driving-car_dist', 'cycling-regular_dist', 'foot-walking_dist'], ['Driving', 'Cycling', 'Walking']):
        y = city_data[city_name]['data'][mode] / 1000

        model = LinearRegression()
        model.fit(X, y)

        city_[f'{pretty_mode}_alpha'] = model.intercept_
        city_[f'{pretty_mode}_beta'] = model.coef_[0] 
        city_[f'{pretty_mode}_r2'] = model.score(X, y)

    detour_law.append(city_)
detour_law = pd.DataFrame(detour_law)
detour_law = detour_law.set_index('City', drop=True)

In [None]:
detour_law.round(2)

In [None]:
import pandas as pd
from sklearn.linear_model import LinearRegression
import matplotlib.pyplot as plt

# Extract features (X) and target variable (y) from the DataFrame
X = city_data['london']['data'][['haversine_dist']] 
y = city_data['london']['data']['driving-car_dist'] /1000

# Create a linear regression model
model = LinearRegression()

# Fit the model to the data
model.fit(X, y)

# Plot the data and the regression line
plt.scatter(X, y, alpha=0.6, label='Data points')
plt.plot(X, model.predict(X), 'r-', label='Linear Regression')
plt.xlabel('X-axis')
plt.ylabel('y-axis')
plt.legend()
plt.show()

# Print the coefficients
print(f'Intercept: {model.intercept_}')
print(f'Coefficient: {model.coef_[0]}')

In [None]:
data_df = pd.read_pickle('./data_complete_clusters.pickle')

In [None]:
data_df = data_df.set_index('City', drop=True)

In [None]:
detour_clusters_df = pd.merge(detour_law, data_df[['cluster']], left_index=True, right_index=True)

In [None]:
K = detour_clusters_df['cluster'].max() + 1

In [None]:
detour_clusters_df[detour_clusters_df.cluster == cluster_k]

In [None]:
metric = 'Driving_beta'

bars = []
for cluster_k in range(K):
    cluster_data = detour_clusters_df[detour_clusters_df.cluster == cluster_k]

    bars.append(
        go.Box(y=cluster_data[metric],
               boxpoints='all',
               jitter=0.1,
               name='Cluster {}'.format(cluster_k+1),
               marker_color=cluster_colors[cluster_k],
              ))


layout = go.Layout(
#        title='Average Circuity',
        paper_bgcolor='rgba(0,0,0,0)',
        plot_bgcolor='rgba(0,0,0,0)',
        width=1200, height=600,
        template='plotly_white',
        margin=dict(l=20, r=20, t=20, b=20),
#        xaxis_title="Cities",
#        yaxis_title="Circuity",
        font=dict(
            family="Times New Roman",
            size=18,
            color="Black"
        ),
        showlegend=False,
        xaxis=dict(
            title='Cities',
            showline=True, linewidth=2, linecolor='black',
            tickangle=-60,
        ),
        yaxis=dict(
            title='alpha',
            showline=True, linewidth=2, linecolor='black',
            range=[1, 2],
        ),
    )


fig = go.Figure(data=bars,
                layout=layout,
               )
fig.show()

In [None]:
metric = 'Driving_alpha'

bars = []
for cluster_k in range(K):
    cluster_data = detour_clusters_df[detour_clusters_df.cluster == cluster_k]

    bars.append(
        go.Box(y=cluster_data[metric],
               boxpoints='all',
               jitter=0.1,
               name='Cluster {}'.format(cluster_k+1),
               marker_color=cluster_colors[cluster_k],
              ))


layout = go.Layout(
#        title='Average Circuity',
        paper_bgcolor='rgba(0,0,0,0)',
        plot_bgcolor='rgba(0,0,0,0)',
        width=1200, height=600,
        template='plotly_white',
        margin=dict(l=20, r=20, t=20, b=20),
#        xaxis_title="Cities",
#        yaxis_title="Circuity",
        font=dict(
            family="Times New Roman",
            size=18,
            color="Black"
        ),
        showlegend=False,
        xaxis=dict(
            title='Cities',
            showline=True, linewidth=2, linecolor='black',
            tickangle=-60,
        ),
        yaxis=dict(
            title='alpha',
            showline=True, linewidth=2, linecolor='black',
            range=[-2, 2],
        ),
    )


fig = go.Figure(data=bars,
                layout=layout,
               )
fig.show()

In [None]:
from scipy.stats import ttest_ind

In [None]:
detour_clusters_df.columns[:-1]

In [None]:

for metric in detour_clusters_df.columns[:-1]:
    print(metric)
    results = []
    for cluster_i in range(K-1):
        results_i = []
        for cluster_j in range(1, K):
            test = ttest_ind(detour_clusters_df[detour_clusters_df.cluster == cluster_i][metric], detour_clusters_df[detour_clusters_df.cluster == cluster_j][metric], equal_var=False)
            string = '{} ({})'.format(round(test.statistic, 3), round(test.pvalue, 3))
            if round(test.pvalue, 3) < 0.01:
                string += '*'*3
            elif round(test.pvalue, 3) < 0.05:
                string += '*'*2
            elif round(test.pvalue, 3) < 0.10:
                string += '*'*1
            
            results_i.append(string)
            
        results.append(results_i)
    
    results = pd.DataFrame(results, index=[f'Cluster {i}' for i in range(1, K)], columns=[f'Cluster {i}' for i in range(2, K+1)])
    display(results)