# Packages

In [1]:
import dask.dataframe as dd
from dask.diagnostics import ProgressBar
from dask.distributed import Client
from folders import *
import geopandas as gpd
import matplotlib.pyplot as plt
import networkx as nx
import numpy as np
import os
import osmnx as ox
import pandas as pd
from tqdm.auto import tqdm

# Directory

In [2]:
path = r"C:\Users\jigon\OneDrive\Documentos\Economía\Commuting-Zones-Costa-Rica"
os.chdir(path)

# Dask setup

In [3]:
ProgressBar().register()
N_THREADS = 10 # leave at least 3GB of RAM to each workers. 32GB/10 = 3.2GB OK
client = Client(n_workers=N_THREADS, threads_per_worker=1)  # Connect to distributed cluster and override default
client

0,1
Connection method: Cluster object,Cluster type: distributed.LocalCluster
Dashboard: http://127.0.0.1:8787/status,

0,1
Dashboard: http://127.0.0.1:8787/status,Workers: 10
Total threads: 10,Total memory: 31.71 GiB
Status: running,Using processes: True

0,1
Comm: tcp://127.0.0.1:57080,Workers: 10
Dashboard: http://127.0.0.1:8787/status,Total threads: 10
Started: Just now,Total memory: 31.71 GiB

0,1
Comm: tcp://127.0.0.1:57175,Total threads: 1
Dashboard: http://127.0.0.1:57176/status,Memory: 3.17 GiB
Nanny: tcp://127.0.0.1:57090,
Local directory: C:\Users\jigon\AppData\Local\Temp\dask-worker-space\worker-x1pa1e17,Local directory: C:\Users\jigon\AppData\Local\Temp\dask-worker-space\worker-x1pa1e17

0,1
Comm: tcp://127.0.0.1:57178,Total threads: 1
Dashboard: http://127.0.0.1:57179/status,Memory: 3.17 GiB
Nanny: tcp://127.0.0.1:57086,
Local directory: C:\Users\jigon\AppData\Local\Temp\dask-worker-space\worker-7izc46f0,Local directory: C:\Users\jigon\AppData\Local\Temp\dask-worker-space\worker-7izc46f0

0,1
Comm: tcp://127.0.0.1:57170,Total threads: 1
Dashboard: http://127.0.0.1:57173/status,Memory: 3.17 GiB
Nanny: tcp://127.0.0.1:57088,
Local directory: C:\Users\jigon\AppData\Local\Temp\dask-worker-space\worker-9l__krh3,Local directory: C:\Users\jigon\AppData\Local\Temp\dask-worker-space\worker-9l__krh3

0,1
Comm: tcp://127.0.0.1:57161,Total threads: 1
Dashboard: http://127.0.0.1:57164/status,Memory: 3.17 GiB
Nanny: tcp://127.0.0.1:57092,
Local directory: C:\Users\jigon\AppData\Local\Temp\dask-worker-space\worker-zl50vugn,Local directory: C:\Users\jigon\AppData\Local\Temp\dask-worker-space\worker-zl50vugn

0,1
Comm: tcp://127.0.0.1:57181,Total threads: 1
Dashboard: http://127.0.0.1:57182/status,Memory: 3.17 GiB
Nanny: tcp://127.0.0.1:57087,
Local directory: C:\Users\jigon\AppData\Local\Temp\dask-worker-space\worker-9om8i13n,Local directory: C:\Users\jigon\AppData\Local\Temp\dask-worker-space\worker-9om8i13n

0,1
Comm: tcp://127.0.0.1:57154,Total threads: 1
Dashboard: http://127.0.0.1:57155/status,Memory: 3.17 GiB
Nanny: tcp://127.0.0.1:57083,
Local directory: C:\Users\jigon\AppData\Local\Temp\dask-worker-space\worker-2xo46yia,Local directory: C:\Users\jigon\AppData\Local\Temp\dask-worker-space\worker-2xo46yia

0,1
Comm: tcp://127.0.0.1:57166,Total threads: 1
Dashboard: http://127.0.0.1:57167/status,Memory: 3.17 GiB
Nanny: tcp://127.0.0.1:57089,
Local directory: C:\Users\jigon\AppData\Local\Temp\dask-worker-space\worker-_zlbl1o3,Local directory: C:\Users\jigon\AppData\Local\Temp\dask-worker-space\worker-_zlbl1o3

0,1
Comm: tcp://127.0.0.1:57160,Total threads: 1
Dashboard: http://127.0.0.1:57162/status,Memory: 3.17 GiB
Nanny: tcp://127.0.0.1:57084,
Local directory: C:\Users\jigon\AppData\Local\Temp\dask-worker-space\worker-sehzd6ca,Local directory: C:\Users\jigon\AppData\Local\Temp\dask-worker-space\worker-sehzd6ca

0,1
Comm: tcp://127.0.0.1:57169,Total threads: 1
Dashboard: http://127.0.0.1:57171/status,Memory: 3.17 GiB
Nanny: tcp://127.0.0.1:57091,
Local directory: C:\Users\jigon\AppData\Local\Temp\dask-worker-space\worker-ztjw8a48,Local directory: C:\Users\jigon\AppData\Local\Temp\dask-worker-space\worker-ztjw8a48

0,1
Comm: tcp://127.0.0.1:57157,Total threads: 1
Dashboard: http://127.0.0.1:57158/status,Memory: 3.17 GiB
Nanny: tcp://127.0.0.1:57085,
Local directory: C:\Users\jigon\AppData\Local\Temp\dask-worker-space\worker-8b1kriqq,Local directory: C:\Users\jigon\AppData\Local\Temp\dask-worker-space\worker-8b1kriqq


# Read network dataset

In [4]:
G = ox.graph_from_xml(costa_rican_roads_file)
municipalities_coordinates = pd.read_excel(costa_rican_commuting_zones_file).sort_values("municipality")

# Add speed values


In [5]:
G = ox.add_edge_speeds(G)
nodes, edges = ox.graph_to_gdfs(G)

# Imputate missing values

In [6]:
edges["speed_kph"] = edges["speed_kph"].fillna(edges["speed_kph"].mean())

# Impute edge (driving) speeds and calculate edge traversal times

In [7]:
nx.set_edge_attributes(G, values=edges["speed_kph"], name="speed_kph")
G = ox.add_edge_travel_times(G)



# Find nearest node for each municipality office
Remember we're using their offices as a reference point for the whole municipality. 

In [8]:
municipalities_nodes = ox.distance.nearest_nodes(G, municipalities_coordinates["lon"], municipalities_coordinates["lat"])
municipalities_coordinates["node"] = municipalities_nodes

# Calculate distance matrix based on travel time

## Create cartesian product of municipalities

In [9]:
municipalities_coordinates = municipalities_coordinates.merge(municipalities_coordinates, how="cross", suffixes=('_origin', '_destination'))
municipalities_coordinates = dd.from_pandas(municipalities_coordinates, npartitions=N_THREADS)

## Compute route distance and travel time through shortest path

- **Travel time:** minutes
- **Distance:** kilometers

## Functions

In [10]:
def shortest_path_dask(dds, col_1, col_2): 
    return ox.distance.shortest_path(G, dds[col_1], dds[col_2], weight="travel_time")
def travel_time(df): 
    return sum(ox.utils_graph.get_route_edge_attributes(G, df['path'], "travel_time")) / 60
def distance(df): 
    return sum(ox.utils_graph.get_route_edge_attributes(G, df['path'], "length")) / 1_000

## Computation
- **Speed:** Kilometers per hour. 
- **Route distance:** Kilometers. 
- **Travel time:** Minutes. 

In [11]:
municipalities_coordinates["path"] = municipalities_coordinates.apply(shortest_path_dask, axis=1, args=("node_origin", "node_destination"), meta=("path", "object"))
with ProgressBar():
    municipalities_coordinates = municipalities_coordinates.compute()
tqdm.pandas(desc="Travel time")
municipalities_coordinates['travel_time'] = municipalities_coordinates.progress_apply(travel_time, axis=1)
tqdm.pandas(desc="Distance")
municipalities_coordinates['distance'] = municipalities_coordinates.progress_apply(distance, axis=1)
municipalities_coordinates["avg_speed"] = 60 * municipalities_coordinates['distance'] / municipalities_coordinates['travel_time']

Travel time:   0%|          | 0/6561 [00:00<?, ?it/s]

Distance:   0%|          | 0/6561 [00:00<?, ?it/s]

# Keep variables of interest

In [12]:
municipalities_coordinates = municipalities_coordinates[['municipality_origin', 'CZ_origin', 'region_origin', 'province_origin', 'municipality_destination', 
                                                        'CZ_destination', 'region_destination', 'province_destination', 'employment_origin', 'employment_destination', 
                                                        'travel_time', 'distance', 'avg_speed']].copy()

# Save distance matrix

In [13]:
municipalities_coordinates.to_excel(costa_rican_municipalities_distance_matrix, index=False)