In [11]:
import os
import pandas as pd
import geopandas as gpd
from tqdm import tqdm
tqdm.pandas()

Argentina Network Preprocessing

In [None]:
# Define paths 
arg_country_folder = "/Volumes/IT/git_projects/traction/casestudies/argentina"
arg_incoming_data = os.path.join(arg_country_folder, "incoming_data", "networks")
arg_output_data = os.path.join(arg_country_folder, "data","networks")

In [None]:
# Read shapefiles
arg_airport_nodes_shp = gpd.read_file(os.path.join(arg_incoming_data,"air_nodes.shp"))
arg_airport_edges_shp = gpd.read_file(os.path.join(arg_incoming_data,"air_edges.shp"))

arg_rail_nodes_shp = gpd.read_file(os.path.join(arg_incoming_data,"rail_nodes.shp"))
arg_rail_edges_shp = gpd.read_file(os.path.join(arg_incoming_data,"rail_edges.shp"))

arg_port_nodes_shp = gpd.read_file(os.path.join(arg_incoming_data,"port_nodes.shp"))
arg_port_edges_shp = gpd.read_file(os.path.join(arg_incoming_data,"port_edges.shp"))

arg_road_nodes_shp = gpd.read_file(os.path.join(arg_incoming_data,"road_nodes.shp"))
arg_road_edges_shp = gpd.read_file(os.path.join(arg_incoming_data,"road_edges.shp"))

In [None]:
# Read csv files
arg_airport_nodes_csv = gpd.read_file(os.path.join(arg_incoming_data,"air_nodes.csv"), ignore_geometry = True)
arg_airport_edges_csv = gpd.read_file(os.path.join(arg_incoming_data,"air_edges.csv"), ignore_geometry = True)

arg_rail_nodes_csv = gpd.read_file(os.path.join(arg_incoming_data,"rail_nodes.csv"), ignore_geometry = True)
arg_rail_edges_csv = gpd.read_file(os.path.join(arg_incoming_data,"rail_edges.csv"), ignore_geometry = True)

arg_port_nodes_csv = gpd.read_file(os.path.join(arg_incoming_data,"port_nodes.csv"), ignore_geometry = True)
arg_port_edges_csv = gpd.read_file(os.path.join(arg_incoming_data,"port_edges.csv"), ignore_geometry = True)

arg_road_nodes_csv = gpd.read_file(os.path.join(arg_incoming_data,"road_nodes.csv"), ignore_geometry = True)
arg_road_edges_csv = gpd.read_file(os.path.join(arg_incoming_data,"road_edges.csv"), ignore_geometry = True)

In [None]:
# Add geometry from shapefile to csv file using merge
arg_airport_nodes = arg_airport_nodes_csv.merge(arg_airport_nodes_shp[["node_id","geometry"]],how = "left", on = "node_id")
arg_airport_edges = arg_airport_edges_csv.merge(arg_airport_edges_shp[["edge_id","geometry"]],how = "left", on = "edge_id")

arg_rail_nodes = arg_rail_nodes_csv.merge(arg_rail_nodes_shp[["node_id","geometry"]],how = "left", on = "node_id")
arg_rail_edges = arg_rail_edges_csv.merge(arg_rail_edges_shp[["edge_id","geometry"]],how = "left", on = "edge_id")

arg_port_nodes = arg_port_nodes_csv.merge(arg_port_nodes_shp[["node_id","geometry"]],how = "left", on = "node_id")
arg_port_edges = arg_port_edges_csv.merge(arg_port_edges_shp[["edge_id","geometry"]],how = "left", on = "edge_id")

arg_road_nodes = arg_road_nodes_csv.merge(arg_road_nodes_shp[["node_id","geometry"]],how = "left", on = "node_id")
arg_road_edges = arg_road_edges_csv.merge(arg_road_edges_shp[["edge_id","geometry"]],how = "left", on = "edge_id")

In [None]:
# Convert to geopandas
arg_airport_nodes = gpd.GeoDataFrame(arg_airport_nodes, geometry='geometry')
arg_airport_edges = gpd.GeoDataFrame(arg_airport_edges, geometry='geometry')

arg_rail_nodes = gpd.GeoDataFrame(arg_rail_nodes, geometry='geometry')
arg_rail_edges = gpd.GeoDataFrame(arg_rail_edges, geometry='geometry')

arg_port_nodes = gpd.GeoDataFrame(arg_port_nodes, geometry='geometry')
arg_port_edges = gpd.GeoDataFrame(arg_port_edges, geometry='geometry')

arg_road_nodes = gpd.GeoDataFrame(arg_road_nodes, geometry='geometry')
arg_road_edges = gpd.GeoDataFrame(arg_road_edges, geometry='geometry')

In [None]:
# Export geopandas to geopackages
arg_airport_nodes.to_file(os.path.join(arg_output_data,"airport.gpkg"), layer='nodes', driver="GPKG")
#arg_airport_edges.to_file(os.path.join(arg_output_data,"airport.gpkg"), layer='edges', driver="GPKG")

arg_rail_nodes.to_file(os.path.join(arg_output_data,"rail.gpkg"), layer='nodes', driver="GPKG")
arg_rail_edges.to_file(os.path.join(arg_output_data,"rail.gpkg"), layer='edges', driver="GPKG")

arg_port_nodes.to_file(os.path.join(arg_output_data,"port.gpkg"), layer='nodes', driver="GPKG")
#arg_port_edges.to_file(os.path.join(arg_output_data,"port.gpkg"), layer='edges', driver="GPKG")

arg_road_nodes.to_file(os.path.join(arg_output_data,"road.gpkg"), layer='nodes', driver="GPKG")
arg_road_edges.to_file(os.path.join(arg_output_data,"road.gpkg"), layer='edges', driver="GPKG")

Vietnam Network Preprocessing

In [None]:
# Define paths 
vnm_country_folder = "/Volumes/IT/git_projects/traction/casestudies/vietnam"
vnm_incoming_data = os.path.join(vnm_country_folder, "incoming_data", "networks")
vnm_output_data = os.path.join(vnm_country_folder, "data","networks")

In [None]:
# Read shapefiles
vnm_airport_nodes_shp = gpd.read_file(os.path.join(vnm_incoming_data,"air_nodes.shp"))
vnm_airport_edges_shp = gpd.read_file(os.path.join(vnm_incoming_data,"air_edges.shp"))

vnm_rail_nodes_shp = gpd.read_file(os.path.join(vnm_incoming_data,"rail_nodes.shp"))
vnm_rail_edges_shp = gpd.read_file(os.path.join(vnm_incoming_data,"rail_edges.shp"))

vnm_port_nodes_shp = gpd.read_file(os.path.join(vnm_incoming_data,"inland_nodes.shp"))
vnm_port_edges_shp = gpd.read_file(os.path.join(vnm_incoming_data,"inland_edges.shp"))

vnm_road_nodes_shp = gpd.read_file(os.path.join(vnm_incoming_data,"road_nodes.shp"))
vnm_road_edges_shp = gpd.read_file(os.path.join(vnm_incoming_data,"road_edges.shp"))

In [None]:
# Read csv files
vnm_airport_nodes_csv = pd.read_excel(os.path.join(vnm_incoming_data,"national_nodes.xlsx"), sheet_name = "air")
vnm_airport_edges_csv = pd.read_excel(os.path.join(vnm_incoming_data,"national_edges.xlsx"), sheet_name = "air")

vnm_rail_nodes_csv = pd.read_excel(os.path.join(vnm_incoming_data,"national_nodes.xlsx"), sheet_name = "rail")
vnm_rail_edges_csv = pd.read_excel(os.path.join(vnm_incoming_data,"national_edges.xlsx"), sheet_name = "rail")

vnm_port_nodes_csv = pd.read_excel(os.path.join(vnm_incoming_data,"national_nodes.xlsx"), sheet_name = "inland")
vnm_port_edges_csv = pd.read_excel(os.path.join(vnm_incoming_data,"national_edges.xlsx"), sheet_name = "inland")

vnm_road_nodes_csv = pd.read_excel(os.path.join(vnm_incoming_data,"national_nodes.xlsx"), sheet_name = "road")
vnm_road_edges_csv = pd.read_excel(os.path.join(vnm_incoming_data,"national_edges.xlsx"), sheet_name = "road")

In [None]:
# Add geometry from shapefile to csv file using merge
vnm_airport_nodes = vnm_airport_nodes_csv.merge(vnm_airport_nodes_shp[["node_id","geometry"]],how = "left", on = "node_id")
vnm_airport_edges = vnm_airport_edges_csv.merge(vnm_airport_edges_shp[["edge_id","geometry"]],how = "left", on = "edge_id")

vnm_rail_nodes = vnm_rail_nodes_csv.merge(vnm_rail_nodes_shp[["node_id","geometry"]],how = "left", on = "node_id")
vnm_rail_edges = vnm_rail_edges_csv.merge(vnm_rail_edges_shp[["edge_id","geometry"]],how = "left", on = "edge_id")

vnm_port_nodes = vnm_port_nodes_csv.merge(vnm_port_nodes_shp[["node_id","geometry"]],how = "left", on = "node_id")
vnm_port_edges = vnm_port_edges_csv.merge(vnm_port_edges_shp[["edge_id","geometry"]],how = "left", on = "edge_id")

vnm_road_nodes = vnm_road_nodes_csv.merge(vnm_road_nodes_shp[["node_id","geometry"]],how = "left", on = "node_id")
vnm_road_edges = vnm_road_edges_csv.merge(vnm_road_edges_shp[["edge_id","geometry"]],how = "left", on = "edge_id")

In [None]:
# Convert to geopandas
vnm_airport_nodes = gpd.GeoDataFrame(vnm_airport_nodes, geometry='geometry')
vnm_airport_edges = gpd.GeoDataFrame(vnm_airport_edges, geometry='geometry')

vnm_rail_nodes = gpd.GeoDataFrame(vnm_rail_nodes, geometry='geometry')
vnm_rail_edges = gpd.GeoDataFrame(vnm_rail_edges, geometry='geometry')

vnm_port_nodes = gpd.GeoDataFrame(vnm_port_nodes, geometry='geometry')
vnm_port_edges = gpd.GeoDataFrame(vnm_port_edges, geometry='geometry')

vnm_road_nodes = gpd.GeoDataFrame(vnm_road_nodes, geometry='geometry')
vnm_road_edges = gpd.GeoDataFrame(vnm_road_edges, geometry='geometry')

In [None]:
# Export shapefiles to geopackages
vnm_airport_nodes.to_file(os.path.join(vnm_output_data,"airport.gpkg"), layer='nodes', driver="GPKG")
#vnm_airport_edges.to_file(os.path.join(vnm_output_data,"airport.gpkg"), layer='edges', driver="GPKG")

vnm_rail_nodes.to_file(os.path.join(vnm_output_data,"rail.gpkg"), layer='nodes', driver="GPKG")
vnm_rail_edges.to_file(os.path.join(vnm_output_data,"rail.gpkg"), layer='edges', driver="GPKG")

vnm_port_nodes.to_file(os.path.join(vnm_output_data,"port.gpkg"), layer='nodes', driver="GPKG")
#vnm_port_edges.to_file(os.path.join(vnm_output_data,"port.gpkg"), layer='edges', driver="GPKG")

vnm_road_nodes.to_file(os.path.join(vnm_output_data,"road.gpkg"), layer='nodes', driver="GPKG")
vnm_road_edges.to_file(os.path.join(vnm_output_data,"road.gpkg"), layer='edges', driver="GPKG")

Tanzania Network Preprocessing

In [79]:
# Define paths
tza_country_folder = "/Volumes/IT/git_projects/traction/casestudies/tanzania"
tza_incoming_data = os.path.join(tza_country_folder, "incoming_data", "networks")
tza_output_data = os.path.join(tza_country_folder, "data","networks")

In [80]:
# Read shapefiles
tza_airport_nodes_shp = gpd.read_file(os.path.join(tza_incoming_data,"airports","tz_airport_node_flows.shp"))

tza_rail_nodes_shp = gpd.read_file(os.path.join(tza_incoming_data,"railways","tanzania-rail-nodes-processed.shp"))
tza_rail_edges_shp = gpd.read_file(os.path.join(tza_incoming_data,"railways","tanzania-rail-ways-processed.shp"))

tza_port_nodes_shp = gpd.read_file(os.path.join(tza_incoming_data,"ports","tz_port_nodes.shp"))
tza_port_edges_shp = gpd.read_file(os.path.join(tza_incoming_data,"ports","tz_port_edges.shp")) 

tza_future_port_nodes_shp = gpd.read_file(os.path.join(tza_incoming_data,"ports","tz_port_nodes_future.shp"))
tza_future_port_edges_shp = gpd.read_file(os.path.join(tza_incoming_data,"ports","tz_port_edges_future.shp")) 

tza_road_nodes_shp = gpd.read_file(os.path.join(tza_incoming_data,"roads","tanroads_nodes_main_all_2017_adj.shp"))
tza_road_edges_shp = gpd.read_file(os.path.join(tza_incoming_data,"roads","tanroads_main_all_2017_adj.shp"))

tza_future_road_edges_shp = gpd.read_file(os.path.join(tza_incoming_data,"roads","tanroads_main_all_2030_adj.shp"))

In [81]:
# Read csv files [commented lines do not have csv files]
tza_airport_nodes_csv = gpd.read_file(os.path.join(tza_incoming_data,"airports","tanzania_airports.csv"), ignore_geometry = True)
#tza_airport_edges_csv = gpd.read_file(os.path.join(tza_incoming_data,"airports",""), ignore_geometry = True)

#tza_rail_nodes_csv = gpd.read_file(os.path.join(tza_incoming_data,"railways",""), ignore_geometry = True)
tza_rail_edges_csv = gpd.read_file(os.path.join(tza_incoming_data,"railways","tanzania-rail-ways-processed.csv"), ignore_geometry = True)

tza_port_nodes_csv = gpd.read_file(os.path.join(tza_incoming_data,"ports","TZ_ports.csv"), ignore_geometry = True)
tza_port_edges_csv = gpd.read_file(os.path.join(tza_incoming_data,"ports","tz_port_edges.csv"), ignore_geometry = True)

#tza_road_nodes_csv = gpd.read_file(os.path.join(tza_incoming_data,"roads",""), ignore_geometry = True)
tza_road_edges_csv = gpd.read_file(os.path.join(tza_incoming_data,"roads","tanroads_main_all_2017_adj.csv"), ignore_geometry = True)

#tza_road_edges_shp.link = tza_road_edges_shp.link.astype(object)
tza_road_edges_csv.link = tza_road_edges_csv.link.astype(int)

In [82]:
# Add geometry from shapefile to csv file using merge
tza_airport_nodes = tza_airport_nodes_csv.merge(tza_airport_nodes_shp[["ident","geometry"]],how = "left", on = "ident")
#tza_airport_edges = tza_airport_edges_csv.merge(tza_airport_edges_shp[["","geometry"]],how = "left", on = "")

#tza_rail_nodes = tza_rail_nodes_csv.merge(tza_rail_nodes_shp[["","geometry"]],how = "left", on = "")
tza_rail_edges = tza_rail_edges_csv.merge(tza_rail_edges_shp[["id","geometry"]],how = "left", on = "id")

tza_port_nodes = tza_port_nodes_csv.merge(tza_port_nodes_shp[["id","geometry"]],how = "left", on = "id")
tza_port_edges = tza_port_edges_csv.merge(tza_port_edges_shp[["edgeid","geometry"]],how = "left", on = "edgeid")

#tza_road_nodes = tza_road_nodes_csv.merge(tza_road_nodes_shp[["","geometry"]],how = "left", on = "")
tza_road_edges = tza_road_edges_csv.merge(tza_road_edges_shp[["link","geometry"]],how = "left", on = "link")

In [83]:
# Convert to geopandas
tza_airport_nodes = gpd.GeoDataFrame(tza_airport_nodes, geometry='geometry')
#tza_airport_edges = gpd.GeoDataFrame(tza_airport_edges_shp, geometry='geometry') #shp used

tza_rail_nodes = gpd.GeoDataFrame(tza_rail_nodes_shp, geometry='geometry') #shp used
tza_rail_edges = gpd.GeoDataFrame(tza_rail_edges, geometry='geometry')

tza_port_nodes = gpd.GeoDataFrame(tza_port_nodes, geometry='geometry')
tza_port_edges = gpd.GeoDataFrame(tza_port_edges, geometry='geometry')

tza_road_nodes = gpd.GeoDataFrame(tza_road_nodes_shp, geometry='geometry') #shp used
tza_road_edges = gpd.GeoDataFrame(tza_road_edges, geometry='geometry')

In [84]:
# Create new railway column 

tza_rail_edges["railway"] = tza_rail_edges.progress_apply(lambda x:f"rail",axis=1)

100%|██████████████████████████████████████| 485/485 [00:00<00:00, 37901.99it/s]


In [85]:
# Create new column for node_id and edge_id

#tza_airport_nodes["node_id"] = tza_airport_nodes.index
#tza_airport_nodes["node_id"]=tza_airport_nodes.progress_apply(lambda x:f"airport_node_{x.node_id}",axis=1)

#tza_airport_edges["edge_id"] = tza_airport_edges.index
#tza_airport_edges["edge_id"]=tza_airport_edges.progress_apply(lambda x:f"airport_edge_{x.edge_id}",axis=1)

#tza_rail_nodes["node_id"] = tza_rail_nodes.index
#tza_rail_nodes["node_id"]=tza_rail_nodes.progress_apply(lambda x:f"rail_node_{x.node_id}",axis=1)

#tza_rail_edges["edge_id"] = tza_rail_edges.index
#tza_rail_edges["edge_id"]=tza_rail_edges.progress_apply(lambda x:f"rail_edge_{x.edge_id}",axis=1)

#tza_port_nodes["node_id"] = tza_port_nodes.index
#tza_port_nodes["node_id"]=tza_port_nodes.progress_apply(lambda x:f"port_node_{x.node_id}",axis=1)

#tza_port_edges["edge_id"] = tza_port_edges.index
#tza_port_edges["edge_id"]=tza_port_edges.progress_apply(lambda x:f"port_edge_{x.edge_id}",axis=1)

tza_road_nodes["node_id"] = tza_road_nodes.index
tza_road_nodes["node_id"]=tza_road_nodes.progress_apply(lambda x:f"road_node_{x.node_id}",axis=1)

tza_road_edges["edge_id"] = tza_road_edges.index
tza_road_edges["edge_id"]=tza_road_edges.progress_apply(lambda x:f"rail_edge_{x.edge_id}",axis=1)

100%|████████████████████████████████████| 1399/1399 [00:00<00:00, 44695.03it/s]
100%|████████████████████████████████████| 1691/1691 [00:00<00:00, 51899.75it/s]


In [86]:
# Export geopandas to geopackages
tza_airport_nodes.to_file(os.path.join(tza_output_data,"airport.gpkg"), layer='nodes', driver="GPKG")
#tza_airport_edges.to_file(os.path.join(tza_output_data,"airport.gpkg"), layer='edges', driver="GPKG")

tza_rail_nodes.to_file(os.path.join(tza_output_data,"rail.gpkg"), layer='nodes', driver="GPKG")
tza_rail_edges.to_file(os.path.join(tza_output_data,"rail.gpkg"), layer='edges', driver="GPKG")

tza_port_nodes.to_file(os.path.join(tza_output_data,"port.gpkg"), layer='nodes', driver="GPKG")
#tza_port_edges.to_file(os.path.join(tza_output_data,"port.gpkg"), layer='edges', driver="GPKG")

tza_road_nodes.to_file(os.path.join(tza_output_data,"road.gpkg"), layer='nodes', driver="GPKG")
tza_road_edges.to_file(os.path.join(tza_output_data,"road.gpkg"), layer='edges', driver="GPKG")

East Africa Network Preprocessing

In [None]:
# Define paths
afr_country_folder = "/Volumes/IT/git_projects/traction/casestudies/africa"
afr_incoming_data = os.path.join(afr_country_folder, "incoming_data", "networks")
afr_output_data = os.path.join(afr_country_folder, "data","networks")

In [None]:
afr_road_nodes = gpd.read_file(os.path.join(afr_output_data,"africa_roads_modified.gpkg"), layer="nodes")
afr_road_edges = gpd.read_file(os.path.join(afr_output_data,"africa_roads_modified.gpkg"), layer="edges")

In [None]:
afr_road_nodes.columns
afr_road_nodes.rename(columns ={'CONTINENT':'continent'}, inplace=True)
afr_road_nodes.drop('old_node_id', inplace=True, axis=1)

In [None]:
print(afr_road_nodes.columns)
print(afr_road_edges.columns)