# Packages

In [1]:
import dask.dataframe as dd
from dask.diagnostics import ProgressBar
from dask.distributed import Client
from folders import *
import geopandas as gpd
import matplotlib.pyplot as plt
import networkx as nx
import numpy as np
import os
import osmnx as ox
import pandas as pd
from tqdm.auto import tqdm

# Directory

In [2]:
path = r"C:\Users\jigon\OneDrive\Documentos\Economía\Commuting-Zones-Costa-Rica"
os.chdir(path)

# Dask setup

In [3]:
ProgressBar().register()
N_THREADS = 10 # leave at least 3GB of RAM to each workers. 32GB/10 = 3.2GB OK
client = Client(n_workers=N_THREADS, threads_per_worker=1)  # Connect to distributed cluster and override default
client

0,1
Connection method: Cluster object,Cluster type: distributed.LocalCluster
Dashboard: http://127.0.0.1:8787/status,

0,1
Dashboard: http://127.0.0.1:8787/status,Workers: 10
Total threads: 10,Total memory: 31.71 GiB
Status: running,Using processes: True

0,1
Comm: tcp://127.0.0.1:61271,Workers: 10
Dashboard: http://127.0.0.1:8787/status,Total threads: 10
Started: Just now,Total memory: 31.71 GiB

0,1
Comm: tcp://127.0.0.1:61353,Total threads: 1
Dashboard: http://127.0.0.1:61354/status,Memory: 3.17 GiB
Nanny: tcp://127.0.0.1:61281,
Local directory: C:\Users\jigon\AppData\Local\Temp\dask-worker-space\worker-9wd6vpam,Local directory: C:\Users\jigon\AppData\Local\Temp\dask-worker-space\worker-9wd6vpam

0,1
Comm: tcp://127.0.0.1:61365,Total threads: 1
Dashboard: http://127.0.0.1:61366/status,Memory: 3.17 GiB
Nanny: tcp://127.0.0.1:61274,
Local directory: C:\Users\jigon\AppData\Local\Temp\dask-worker-space\worker-9lc1d0ha,Local directory: C:\Users\jigon\AppData\Local\Temp\dask-worker-space\worker-9lc1d0ha

0,1
Comm: tcp://127.0.0.1:61359,Total threads: 1
Dashboard: http://127.0.0.1:61360/status,Memory: 3.17 GiB
Nanny: tcp://127.0.0.1:61276,
Local directory: C:\Users\jigon\AppData\Local\Temp\dask-worker-space\worker-ktaj7z2f,Local directory: C:\Users\jigon\AppData\Local\Temp\dask-worker-space\worker-ktaj7z2f

0,1
Comm: tcp://127.0.0.1:61383,Total threads: 1
Dashboard: http://127.0.0.1:61384/status,Memory: 3.17 GiB
Nanny: tcp://127.0.0.1:61275,
Local directory: C:\Users\jigon\AppData\Local\Temp\dask-worker-space\worker-5hlu6dtz,Local directory: C:\Users\jigon\AppData\Local\Temp\dask-worker-space\worker-5hlu6dtz

0,1
Comm: tcp://127.0.0.1:61317,Total threads: 1
Dashboard: http://127.0.0.1:61318/status,Memory: 3.17 GiB
Nanny: tcp://127.0.0.1:61278,
Local directory: C:\Users\jigon\AppData\Local\Temp\dask-worker-space\worker-a6afviq2,Local directory: C:\Users\jigon\AppData\Local\Temp\dask-worker-space\worker-a6afviq2

0,1
Comm: tcp://127.0.0.1:61341,Total threads: 1
Dashboard: http://127.0.0.1:61344/status,Memory: 3.17 GiB
Nanny: tcp://127.0.0.1:61279,
Local directory: C:\Users\jigon\AppData\Local\Temp\dask-worker-space\worker-3w88cjj9,Local directory: C:\Users\jigon\AppData\Local\Temp\dask-worker-space\worker-3w88cjj9

0,1
Comm: tcp://127.0.0.1:61356,Total threads: 1
Dashboard: http://127.0.0.1:61357/status,Memory: 3.17 GiB
Nanny: tcp://127.0.0.1:61280,
Local directory: C:\Users\jigon\AppData\Local\Temp\dask-worker-space\worker-ni67x5ku,Local directory: C:\Users\jigon\AppData\Local\Temp\dask-worker-space\worker-ni67x5ku

0,1
Comm: tcp://127.0.0.1:61362,Total threads: 1
Dashboard: http://127.0.0.1:61363/status,Memory: 3.17 GiB
Nanny: tcp://127.0.0.1:61282,
Local directory: C:\Users\jigon\AppData\Local\Temp\dask-worker-space\worker-0sb0949g,Local directory: C:\Users\jigon\AppData\Local\Temp\dask-worker-space\worker-0sb0949g

0,1
Comm: tcp://127.0.0.1:61333,Total threads: 1
Dashboard: http://127.0.0.1:61337/status,Memory: 3.17 GiB
Nanny: tcp://127.0.0.1:61277,
Local directory: C:\Users\jigon\AppData\Local\Temp\dask-worker-space\worker-xxd_fc2r,Local directory: C:\Users\jigon\AppData\Local\Temp\dask-worker-space\worker-xxd_fc2r

0,1
Comm: tcp://127.0.0.1:61346,Total threads: 1
Dashboard: http://127.0.0.1:61347/status,Memory: 3.17 GiB
Nanny: tcp://127.0.0.1:61283,
Local directory: C:\Users\jigon\AppData\Local\Temp\dask-worker-space\worker-l80eztap,Local directory: C:\Users\jigon\AppData\Local\Temp\dask-worker-space\worker-l80eztap


# Read network dataset

In [4]:
G = ox.graph_from_xml(costa_rican_roads_file, simplify=True, retain_all=True)
municipalities_coordinates = pd.read_excel(costa_rican_commuting_zones_file).sort_values("municipality")

# Filter graph to retain only certain edge types

In [5]:
filtr = ['primary', 'secondary', 'tertiary', 'trunk', 'residential']
e = [(u, v, k) for u, v, k, d in G.edges(keys=True, data=True) if 'highway' not in d.keys()]
G.remove_edges_from(e)
e = [(u, v, k) for u, v, k, d in G.edges(keys=True, data=True) if d['highway'] not in filtr]
G.remove_edges_from(e)

# Remove any now-disconnected nodes or subcomponents, then simplify topology

In [6]:
G = ox.utils_graph.get_largest_component(G)

# Add speed values


In [7]:
G = ox.add_edge_speeds(G)
nodes, edges = ox.graph_to_gdfs(G)

# Imputate missing values

In [8]:
edges["speed_kph"] = edges["speed_kph"].fillna(edges["speed_kph"].mean())

# Impute edge (driving) speeds and calculate edge traversal times

In [9]:
nx.set_edge_attributes(G, values=edges["speed_kph"], name="speed_kph")
G = ox.add_edge_travel_times(G)

# Find nearest node for each municipality office
Remember we're using their offices as a reference point for the whole municipality. 

In [10]:
municipalities_nodes = ox.distance.nearest_nodes(G, municipalities_coordinates["lon"], municipalities_coordinates["lat"])
municipalities_coordinates["node"] = municipalities_nodes

# Calculate distance matrix based on travel time

## Create cartesian product of municipalities

In [11]:
municipalities_coordinates = municipalities_coordinates.merge(municipalities_coordinates, how="cross", suffixes=('_origin', '_destination'))
municipalities_coordinates = dd.from_pandas(municipalities_coordinates, npartitions=N_THREADS)

## Compute route distance and travel time through shortest path

- **Travel time:** minutes
- **Distance:** kilometers

## Functions

In [12]:
def shortest_path_dask(dds, col_1, col_2): 
    return ox.distance.shortest_path(G, dds[col_1], dds[col_2], weight="travel_time")
def travel_time(df): 
    return sum(ox.utils_graph.get_route_edge_attributes(G, df['path'], "travel_time")) / 60
def distance(df): 
    return sum(ox.utils_graph.get_route_edge_attributes(G, df['path'], "length")) / 1_000

## Computation
- **Speed:** Kilometers per hour. 
- **Route distance:** Kilometers. 
- **Travel time:** Minutes. 

In [13]:
municipalities_coordinates["path"] = municipalities_coordinates.apply(shortest_path_dask, axis=1, args=("node_origin", "node_destination"), meta=("path", "object"))
with ProgressBar():
    municipalities_coordinates = municipalities_coordinates.compute()
tqdm.pandas(desc="Travel time")
municipalities_coordinates['travel_time'] = municipalities_coordinates.progress_apply(travel_time, axis=1)
tqdm.pandas(desc="Distance")
municipalities_coordinates['distance'] = municipalities_coordinates.progress_apply(distance, axis=1)
municipalities_coordinates["avg_speed"] = 60 * municipalities_coordinates['distance'] / municipalities_coordinates['travel_time']

Travel time:   0%|          | 0/6561 [00:00<?, ?it/s]

Distance:   0%|          | 0/6561 [00:00<?, ?it/s]

# Keep variables of interest

In [14]:
municipalities_coordinates = municipalities_coordinates[['municipality_origin', 'CZ_origin', 'region_origin', 'province_origin', 'municipality_destination', 
                                                        'CZ_destination', 'region_destination', 'province_destination', 'employment_origin', 'employment_destination', 
                                                        'travel_time', 'distance', 'avg_speed']].copy()

# Save distance matrix

In [15]:
municipalities_coordinates.to_excel(costa_rican_municipalities_distance_matrix, index=False)
(municipalities_coordinates[["municipality_origin", "municipality_destination", "travel_time", "distance"]].rename(columns={"municipality_origin": "s_canton", 
                                                                                                                            "municipality_destination": "b_canton", 
                                                                                                                            "travel_time": "time"})
                                                                                                           .to_stata(costa_rican_municipalities_distance_matrix_stata, write_index=False))