# Packages

In [1]:
import dask.dataframe as dd
from dask.diagnostics import ProgressBar
from dask.distributed import Client
from folders import *
import geopandas as gpd
import matplotlib.pyplot as plt
import networkx as nx
import numpy as np
import os
import osmnx as ox
import pandas as pd
from tqdm.auto import tqdm

# Directory

In [2]:
path = r"C:\Users\jigon\OneDrive\Documentos\Economía\Commuting-Zones-Costa-Rica"
os.chdir(path)

# Dask setup

In [3]:
ProgressBar().register()
N_THREADS = 10 # leave at least 3GB of RAM to each workers. 32GB/10 = 3.2GB OK
client = Client(n_workers=N_THREADS, threads_per_worker=1)  # Connect to distributed cluster and override default
client

0,1
Connection method: Cluster object,Cluster type: distributed.LocalCluster
Dashboard: http://127.0.0.1:8787/status,

0,1
Dashboard: http://127.0.0.1:8787/status,Workers: 10
Total threads: 10,Total memory: 31.71 GiB
Status: running,Using processes: True

0,1
Comm: tcp://127.0.0.1:64230,Workers: 10
Dashboard: http://127.0.0.1:8787/status,Total threads: 10
Started: Just now,Total memory: 31.71 GiB

0,1
Comm: tcp://127.0.0.1:64294,Total threads: 1
Dashboard: http://127.0.0.1:64298/status,Memory: 3.17 GiB
Nanny: tcp://127.0.0.1:64238,
Local directory: C:\Users\jigon\AppData\Local\Temp\dask-worker-space\worker-g91n1yz1,Local directory: C:\Users\jigon\AppData\Local\Temp\dask-worker-space\worker-g91n1yz1

0,1
Comm: tcp://127.0.0.1:64286,Total threads: 1
Dashboard: http://127.0.0.1:64287/status,Memory: 3.17 GiB
Nanny: tcp://127.0.0.1:64233,
Local directory: C:\Users\jigon\AppData\Local\Temp\dask-worker-space\worker-4r2eryun,Local directory: C:\Users\jigon\AppData\Local\Temp\dask-worker-space\worker-4r2eryun

0,1
Comm: tcp://127.0.0.1:64322,Total threads: 1
Dashboard: http://127.0.0.1:64323/status,Memory: 3.17 GiB
Nanny: tcp://127.0.0.1:64234,
Local directory: C:\Users\jigon\AppData\Local\Temp\dask-worker-space\worker-j83r4nzq,Local directory: C:\Users\jigon\AppData\Local\Temp\dask-worker-space\worker-j83r4nzq

0,1
Comm: tcp://127.0.0.1:64315,Total threads: 1
Dashboard: http://127.0.0.1:64316/status,Memory: 3.17 GiB
Nanny: tcp://127.0.0.1:64235,
Local directory: C:\Users\jigon\AppData\Local\Temp\dask-worker-space\worker-l2k5lql6,Local directory: C:\Users\jigon\AppData\Local\Temp\dask-worker-space\worker-l2k5lql6

0,1
Comm: tcp://127.0.0.1:64300,Total threads: 1
Dashboard: http://127.0.0.1:64301/status,Memory: 3.17 GiB
Nanny: tcp://127.0.0.1:64237,
Local directory: C:\Users\jigon\AppData\Local\Temp\dask-worker-space\worker-kc1w4t3z,Local directory: C:\Users\jigon\AppData\Local\Temp\dask-worker-space\worker-kc1w4t3z

0,1
Comm: tcp://127.0.0.1:64342,Total threads: 1
Dashboard: http://127.0.0.1:64343/status,Memory: 3.17 GiB
Nanny: tcp://127.0.0.1:64239,
Local directory: C:\Users\jigon\AppData\Local\Temp\dask-worker-space\worker-67n5h1v0,Local directory: C:\Users\jigon\AppData\Local\Temp\dask-worker-space\worker-67n5h1v0

0,1
Comm: tcp://127.0.0.1:64333,Total threads: 1
Dashboard: http://127.0.0.1:64334/status,Memory: 3.17 GiB
Nanny: tcp://127.0.0.1:64240,
Local directory: C:\Users\jigon\AppData\Local\Temp\dask-worker-space\worker-cnl5vv2a,Local directory: C:\Users\jigon\AppData\Local\Temp\dask-worker-space\worker-cnl5vv2a

0,1
Comm: tcp://127.0.0.1:64336,Total threads: 1
Dashboard: http://127.0.0.1:64337/status,Memory: 3.17 GiB
Nanny: tcp://127.0.0.1:64241,
Local directory: C:\Users\jigon\AppData\Local\Temp\dask-worker-space\worker-4ic3dri2,Local directory: C:\Users\jigon\AppData\Local\Temp\dask-worker-space\worker-4ic3dri2

0,1
Comm: tcp://127.0.0.1:64311,Total threads: 1
Dashboard: http://127.0.0.1:64312/status,Memory: 3.17 GiB
Nanny: tcp://127.0.0.1:64236,
Local directory: C:\Users\jigon\AppData\Local\Temp\dask-worker-space\worker-yc34xmzc,Local directory: C:\Users\jigon\AppData\Local\Temp\dask-worker-space\worker-yc34xmzc

0,1
Comm: tcp://127.0.0.1:64339,Total threads: 1
Dashboard: http://127.0.0.1:64340/status,Memory: 3.17 GiB
Nanny: tcp://127.0.0.1:64242,
Local directory: C:\Users\jigon\AppData\Local\Temp\dask-worker-space\worker-_edpbrl6,Local directory: C:\Users\jigon\AppData\Local\Temp\dask-worker-space\worker-_edpbrl6


# Read network dataset

In [4]:
G = ox.graph_from_xml(costa_rican_roads_file, simplify=True, retain_all=True)
municipalities_coordinates = pd.read_excel(costa_rican_commuting_zones_file).sort_values("municipality")

# Filter graph to retain only certain edge types

In [5]:
filtr = ['primary', 'secondary', 'tertiary', 'trunk', 'residential']
e = [(u, v, k) for u, v, k, d in G.edges(keys=True, data=True) if 'highway' not in d.keys()]
G.remove_edges_from(e)
e = [(u, v, k) for u, v, k, d in G.edges(keys=True, data=True) if d['highway'] not in filtr]
G.remove_edges_from(e)

# Remove any now-disconnected nodes or subcomponents, then simplify topology

In [6]:
G = ox.utils_graph.get_largest_component(G)

# Add speed values


In [7]:
G = ox.add_edge_speeds(G)
nodes, edges = ox.graph_to_gdfs(G)

# Imputate missing values

In [8]:
edges["speed_kph"] = edges["speed_kph"].fillna(edges["speed_kph"].mean())

# Impute edge (driving) speeds and calculate edge traversal times

In [9]:
nx.set_edge_attributes(G, values=edges["speed_kph"], name="speed_kph")
G = ox.add_edge_travel_times(G)

# Find nearest node for each municipality office
Remember we're using their offices as a reference point for the whole municipality. 

In [10]:
municipalities_nodes = ox.distance.nearest_nodes(G, municipalities_coordinates["lon"], municipalities_coordinates["lat"])
municipalities_coordinates["node"] = municipalities_nodes

# Calculate distance matrix based on travel time

## Create cartesian product of municipalities

In [11]:
municipalities_coordinates = municipalities_coordinates.merge(municipalities_coordinates, how="cross", suffixes=('_origin', '_destination'))
municipalities_coordinates = dd.from_pandas(municipalities_coordinates, npartitions=N_THREADS)

## Compute route distance and travel time through shortest path

- **Travel time:** minutes
- **Distance:** kilometers

## Functions

In [12]:
def shortest_path_dask(dds, col_1, col_2): 
    return ox.distance.shortest_path(G, dds[col_1], dds[col_2], weight="travel_time")
def travel_time(df): 
    return sum(ox.utils_graph.get_route_edge_attributes(G, df['path'], "travel_time")) / 60
def distance(df): 
    return sum(ox.utils_graph.get_route_edge_attributes(G, df['path'], "length")) / 1_000

## Computation
- **Speed:** Kilometers per hour. 
- **Route distance:** Kilometers. 
- **Travel time:** Minutes. 

In [13]:
municipalities_coordinates["path"] = municipalities_coordinates.apply(shortest_path_dask, axis=1, args=("node_origin", "node_destination"), meta=("path", "object"))
with ProgressBar():
    municipalities_coordinates = municipalities_coordinates.compute()
tqdm.pandas(desc="Travel time")
municipalities_coordinates['travel_time'] = municipalities_coordinates.progress_apply(travel_time, axis=1)
tqdm.pandas(desc="Distance")
municipalities_coordinates['distance'] = municipalities_coordinates.progress_apply(distance, axis=1)
municipalities_coordinates["avg_speed"] = 60 * municipalities_coordinates['distance'] / municipalities_coordinates['travel_time']

Travel time:   0%|          | 0/6561 [00:00<?, ?it/s]

Distance:   0%|          | 0/6561 [00:00<?, ?it/s]

# Keep variables of interest

In [15]:
municipalities_coordinates = municipalities_coordinates[['municipality_origin', 'CZ_origin', 'region_origin', 'province_origin', 'municipality_destination', 
                                                        'CZ_destination', 'region_destination', 'province_destination', 'employment_origin', 'employment_destination', 
                                                        'travel_time', 'distance', 'avg_speed']].copy()

# Save distance matrix

In [16]:
municipalities_coordinates.to_excel(costa_rican_municipalities_distance_matrix, index=False)