This notebook aims to produce transport layers that are fit for the model.

Transport modes are among: roads, railways, waterways, maritime, and multimodal. Roads are required. Note that the multimodal is required if there are more than 2 transport modes.

### Input
- For each mode, a `raw_<mode>_edges.geojson` and and a `raw_<mode>_nodes.geoson`, except for multimodal for which there is only a edge layer
- Required attributes :
    - `raw_roads_edges.geojson`: "class" ("primary", "seconday", etc.), "surface" ("paved, "unpaved")
    - `raw_multimodal_edges.geojson`: "multimodes" ("roads-railways", "roads-maritime", etc.)
    - For the countri
- Optional attributes :
    - `raw_<mode>_edges.geojson`: "capacity" (float, max daily tonnage on the edge)
- Other requirements :
    - the location of countries should be placed in the nodes layers
    - concretely, there should be a "name" attribute in `raw_<mode>_nodes.geoson`, with the trigram of the country in the selected node
       
### Output
- For each mode, a `<mode>_edges.geojson` and a `<mode>_nodes.geoson`
- New attributes :
    - `<mode>_edges.geojson` and `<mode>_nodes.geoson`: 'id' (integer)
    - `<mode>_edges.geojson`: 'end1' and 'end2' (integer, id of nodes located at the end of the edges), 'km' (float, length of edge)
    - Note that the end1 and end2 of edges are found with looking at the node of `<mode>_nodes.geoson` which is the closed to the ends of the dege
- Modification on the geometry
    - (optional) `<mode>_nodes.geoson`: if the end1 or end2 nodes are not located at the ends of the edges, they are moved to the ends of the edges
    - (optional) `<mode>_nodes.geoson`: if there are several points very close to each others (distance epsilon), merge them

### Folder path
- input files : in the input_folder, there should be one subfolder per mode, named `<Mode>` with the files
- output : defined by output_folder

### Points to improve
- we could think of not using a `raw_<mode>_nodes.geoson` and generate the nodes directly from the ends of edges
- but we need to say where the countries are

In [1]:
region = "ECA"

In [2]:
import os
import time
import math
import numpy as np
import pandas as pd
import geopandas as gpd
from shapely.geometry import Point, LineString
import shapely.wkt
from tqdm import tqdm

if region == "Italia":
    input_folder = os.path.join('..', '..', '..', '..', 'Research', 'Elisa', "disruptsc-ita", "input", "Italy", "Transport")
else:
    input_folder = os.path.join('..', '..', '..', region, 'Data', 'Structured', "Transport")

output_folder = os.path.join('..', 'input', region, 'Transport')

In [3]:
projected_crs = {
    'Cambodia': 3857,
    'Ecuador': 31986,
    'ECA': 3857,
    'Italia': 32633
}
projected_crs = projected_crs[region]

In [4]:
def loadShp(filename):
    gpdf = gpd.read_file(filename)
    gpdf = gpdf[~gpdf['geometry'].isnull()]
    gpdf = gpdf.to_crs(epsg=4326)
    return gpdf

In [5]:
def createNodes(df_links):
    all_coords = df_links['geometry'].apply(getEndCoordsFromLine).to_list()
    all_coords = list(set([item for sublist in all_coords for item in sublist]))
    return gpd.GeoDataFrame({"geometry": [Point(coords) for coords in all_coords], "id": range(len(all_coords))}, crs=4326)

def getEndCoordsFromLine(linestring_obj):
    end1Coord = linestring_obj.coords[0]
    end2Coord = linestring_obj.coords[-1]
    return [end1Coord, end2Coord]

def getEndPointsFromLine(linestring_obj):
    end1Coord = linestring_obj.coords[0]
    end2Coord = linestring_obj.coords[-1]
    return Point(*end1Coord), Point(*end2Coord)

def getIndexClosestPoint(point, df_with_points):
    distList = [point.distance(item) for item in df_with_points['geometry'].tolist()]
    return int(df_with_points.index[distList.index(min(distList))])

def updateLineString(linestring_obj, newEnd1, newEnd2):
    return LineString([newEnd1.coords[0]] + linestring_obj.coords[1:-1]+[newEnd2.coords[0]])

def assignEndpointsAndUpdate(df_links, id_link, df_nodes, update=False):
    p1, p2 = getEndPointsFromLine(df_links.loc[id_link, 'geometry'])
    id_closest_point1 = getIndexClosestPoint(p1, df_nodes)
    id_closest_point2 = getIndexClosestPoint(p2, df_nodes)
    df_links.loc[id_link, 'end1'] = id_closest_point1
    df_links.loc[id_link, 'end2'] = id_closest_point2
    if update:
        df_links.loc[id_link, 'geometry'] = updateLineString(df_links.loc[id_link, 'geometry'], df_nodes.loc[id_closest_point1, 'geometry'], df_nodes.loc[id_closest_point2, 'geometry'])
    return df_links

def assignEndpointsAndUpdateFullDf(df_links, df_nodes, update=False):
    print('Assigning end nodes to linestring')
    if update:
        print('Stag liens to endnodes')
    res = df_links.copy()
    for i in tqdm(res.index):
        res = assignEndpointsAndUpdate(res, i, df_nodes, update=update)
    res['end1'] = res['end1'].astype(int)
    res['end2'] = res['end2'].astype(int)
    return res

def getAllEndpoints(df_links):
    all_endpoints = [getEndPointsFromLine(item) for item in df_links['geometry']]
    return [item for sublist in all_endpoints for item in sublist]

def mergePoints(df_nodes, df_links, epsilon):
    print("Nb of original nodes:", df_nodes.shape[0])
    
    all_endpoints = getAllEndpoints(df_links)
    print("Nb of endpoints:", len(all_endpoints))
    
    all_points_gpd = gpd.GeoDataFrame({"geometry": df_nodes['geometry'].tolist()+getAllEndpoints(df_links)}, crs={'epsg':'4326'})
    buffered_polygons = gpd.GeoDataFrame({"geometry": all_points_gpd.buffer(distance=epsilon)}, crs={'epsg':'4326'})
    multipolygon = buffered_polygons.unary_union
    centroids_each_polygon = [polygon.centroid for polygon in multipolygon]
    print("Nb of grouped points:", len(centroids_each_polygon))
    
    return gpd.GeoDataFrame({"id":range(len(multipolygon)), "geometry":centroids_each_polygon}, crs={'epsg':'4326'})

def assignEndpointsOneEdge(row, df_nodes):
    p1, p2 = getEndPointsFromLine(row['geometry'])
    id_closest_point1 = getIndexClosestPoint(p1, df_nodes)
    id_closest_point2 = getIndexClosestPoint(p2, df_nodes)
    row['end1'] = id_closest_point1
    row['end2'] = id_closest_point2
    return row

def assignEndpoints(df_links, df_nodes):
    return df_links.apply(lambda row: assignEndpointsOneEdge(row, df_nodes), axis=1)

In [6]:
def loadAndFormatGeojson(transport_mode, nodeedge, subfolder, suffix=""):
    if nodeedge == "nodes":
        nodes = gpd.read_file(os.path.join(subfolder, "raw_"+transport_mode+"_nodes"+suffix+".geojson"))
        if 'index' in nodes.columns:
            nodes = nodes.drop('index', axis=1)
        nodes['id']=range(nodes.shape[0])
        nodes.index = nodes['id']
        nodes.index.name = "index"
        print("There are", nodes.shape[0], "nodes")
        print(nodes.crs)
        return nodes
    
    if nodeedge == "edges":
        edges = gpd.read_file(os.path.join(subfolder, "raw_"+transport_mode+"_edges"+suffix+".geojson"))
        edges = edges[~edges['geometry'].isnull()]
        if 'index' in edges.columns:
            edges = edges.drop('index', axis=1)
        edges['id']=range(edges.shape[0])
        edges['end1'] = None
        edges['end2'] = None
        if "capacity" not in edges.columns:
            edges['capacity'] = None
        edges.index = edges['id']
        edges.index.name = "index"
        print("There are", edges.shape[0], "edges")
        print(edges.crs)
        return edges
    
    
def addKm(edges, crs):
    # Project the layer. Watch out, the CRS should be adapted to the country
    edges['km'] = edges.to_crs({'init': 'epsg:'+str(crs)}).length/1000
    return edges


def create_nodes_from_edges(gpd_edges):
    endpoints = gpd.GeoDataFrame(
        {"geometry": gpd_edges.geometry.apply(lambda line: [Point(line.coords[0]), Point(line.coords[-1])]).explode()}
    ).reset_index(drop=True)
    tolerance = 6

    # Round the x and y coordinates and create a new column for duplicate detection
    endpoints['rounded_geometry'] = endpoints.geometry.apply(lambda geom: (round(geom.x, tolerance), round(geom.y, tolerance)))
    
    # Drop duplicates based on the rounded coordinates
    unique_endpoints = endpoints.drop_duplicates(subset='rounded_geometry')['geometry']

    nodes = gpd.GeoDataFrame(geometry=unique_endpoints.values, crs=gpd_edges.crs)
    nodes['id'] = range(nodes.shape[0])
    return nodes

In [52]:
from shapely.ops import linemerge, unary_union


def export(nodes, edges, input_folder, output_folder, transport_mode, special_suffix):
    subfolder = os.path.join(input_folder, transport_mode.capitalize())
    filename = os.path.join(output_folder, transport_mode+"_nodes"+special_suffix+".geojson")
    nodes.to_file(filename, driver="GeoJSON", index=False)
    print(filename + ' exported')
    
    filename = os.path.join(subfolder, "treated_"+transport_mode+"_nodes"+special_suffix+".geojson")
    nodes.to_file(filename, driver="GeoJSON", index=False)
    print(filename + ' exported')
    
    filename = os.path.join(output_folder, transport_mode+"_edges"+special_suffix+".geojson")
    edges.to_file(filename, driver="GeoJSON", index=False)
    print(filename + ' exported')
    
    filename = os.path.join(subfolder, "treated_"+transport_mode+"_edges"+special_suffix+".geojson")
    edges.to_file(filename, driver="GeoJSON", index=False)
    print(filename + ' exported')


def create_nodes_and_update_edges(edges):
    # create nodes from endpoints
    endpoints = gpd.GeoDataFrame({"end1": edges.geometry.apply(lambda line: Point(line.coords[0])), "end2": edges.geometry.apply(lambda line: Point(line.coords[-1]))})
    all_endpoints = gpd.GeoDataFrame(pd.concat([endpoints['end1'], endpoints['end2']]), columns=["geometry"], crs=edges.crs)
    all_endpoints['geometry_wkt'] = all_endpoints['geometry'].to_wkt()
    nodes = all_endpoints.drop_duplicates('geometry_wkt').copy()
    nodes['id'] = range(nodes.shape[0])
    
    # add nodes_id into end1 and end2 columns of edges
    edges['end1'] = endpoints['end1'].to_wkt().map(nodes.set_index('geometry_wkt')['id'])
    edges['end2'] = endpoints['end2'].to_wkt().map(nodes.set_index('geometry_wkt')['id'])

    return nodes, edges


def remove_degree_2_nodes(gdf):
    # Assume gdf is your GeoDataFrame of road network LineStrings.
    # First, perform a unary union of all geometries to combine touching segments.
    merged_union = unary_union(gdf.geometry)  # Dissolve boundaries between touching geometries
    
    # Then, merge contiguous line segments. This will merge lines connected by a degree‑2 node.
    merged_lines = linemerge(merged_union)  # Returns a LineString or MultiLineString
    
    # If the result is a MultiLineString, convert it to a list of LineStrings.
    if merged_lines.geom_type == 'LineString':
        merged_geoms = [merged_lines]
    elif merged_lines.geom_type == 'MultiLineString':
        merged_geoms = list(merged_lines.geoms)  # Use the .geoms attribute to get the individual LineStrings
    
    # Create a new GeoDataFrame with the merged geometries.
    return gpd.GeoDataFrame(geometry=merged_geoms, crs=gdf.crs)

# Roads

In [58]:
unary_union(edges.geometry).geom_type

'MultiLineString'

In [55]:
remove_degree_2_nodes(edges)

Unnamed: 0,geometry
0,"LINESTRING (32.72381 39.75849, 33.26917 39.637..."
1,"LINESTRING (40.00735 43.41756, 40.00833 43.421..."
2,"LINESTRING (40.01074 43.39364, 40.24745 43.316..."
3,"LINESTRING (40.31954 43.23057, 40.29732 43.208..."
4,"LINESTRING (40.31954 43.23057, 40.32382 43.230..."
...,...
15849,"LINESTRING (85.61382 47.50246, 85.61577 47.500..."
15850,"LINESTRING (85.68279 47.49446, 85.6743 47.4975..."
15851,"LINESTRING (85.68279 47.49446, 85.68715 47.492..."
15852,"LINESTRING (85.84199 47.47055, 85.84147 47.471..."


In [54]:
edges

Unnamed: 0_level_0,osmids,special,geometry,id,end1,end2,capacity,surface,class,disruption,name,km
index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
0,"(24745, 24748, 24750, 24754, 16821, 16822, 247...",,"LINESTRING (44.86745 40.6866, 44.85309 40.6969...",0,0,2,,paved,,,,10.039223
1,"(86074, 86071, 85944, 86073, 86072)",,"LINESTRING (44.86745 40.6866, 44.88625 40.6697...",1,0,8500,,paved,,,,5.010167
2,"(78727, 78728, 78616, 78632, 78638, 78643, 297...",,"LINESTRING (44.95259 40.61724, 44.95157 40.618...",2,1,0,,paved,,,,14.378978
3,"(11552, 11554, 25476, 74342, 25511, 74348, 194...",,"LINESTRING (44.86559 40.73822, 44.86844 40.745...",3,2,4,,paved,,,,7.279171
4,"(40964, 40967, 40498, 40503, 24126, 12871, 128...",,"LINESTRING (44.50912 40.79478, 44.51165 40.793...",4,3,2,,paved,,,,44.547590
...,...,...,...,...,...,...,...,...,...,...,...,...
15288,,,"LINESTRING (71.7305 41.50634, 71.73183 41.5053...",15288,8496,5058,,paved,,,,3.633177
15290,,,"LINESTRING (71.53592 41.29487, 71.53476 41.297...",15290,8497,9869,,paved,,,,4.711639
15291,{ },custom,"LINESTRING (71.53831 41.29142, 71.53632 41.294...",15291,8498,8497,,paved,,,,0.577699
15292,,,"LINESTRING (69.39987 41.52651, 69.39886 41.525...",15292,8323,2582,,paved,,,,0.780956


In [49]:
transport_mode = 'roads'
special_suffix = "_osmsimp"  # _ximena leave empty "" otherwise
subfolder = os.path.join(input_folder, transport_mode.capitalize())
edges = loadAndFormatGeojson(transport_mode, "edges", subfolder, special_suffix)

nodes, edges = create_nodes_and_update_edges(edges)

# remove selfloop
cond = edges['end1'] == edges['end2']
if cond.sum() > 0:
    print(f"Removing {cond.sum()} self-loops")
    edges = edges[~cond]

# if several edges have the same start and end points, keep only one
edges['end_set'] = edges.apply(lambda row: frozenset([row['end1'], row['end2']]), axis=1)  # Create a set representation of each row's end1 and end2
cond = edges['end_set'].duplicated()
if cond.sum() > 0:
    print(f"Removing {cond.sum()} edges that have the same endpoints")
    edges = edges[~cond]
edges = edges.drop(columns=['end_set'])  # Drop the temporary column

# add columns
edges['surface'] = 'paved'
if "surface" not in edges.columns:
    edges['surface'] = 'paved'
if "class" not in edges.columns:
    edges['class'] = None
if "disruption" not in edges.columns:
    edges['disruption'] = None
if "name" not in edges.columns:
    edges['name'] = None
if "special" not in edges.columns:
    edges['special'] = None
edges = addKm(edges, projected_crs)

# Exports
#export(nodes, edges, input_folder, output_folder, transport_mode, special_suffix)

print(nodes.head())
print(edges.head())

There are 15294 edges
EPSG:4326
Removing 37 self-loops
Removing 232 edges that have the same endpoints
                        geometry                 geometry_wkt  id
index                                                            
0       POINT (44.86745 40.6866)  POINT (44.867447 40.686598)   0
2      POINT (44.95259 40.61724)  POINT (44.952591 40.617243)   1
3      POINT (44.86559 40.73822)  POINT (44.865588 40.738215)   2
4      POINT (44.50912 40.79478)  POINT (44.509122 40.794779)   3
6       POINT (44.9159 40.76316)  POINT (44.915904 40.763158)   4
                                                  osmids special  \
index                                                              
0      (24745, 24748, 24750, 24754, 16821, 16822, 247...    None   
1                    (86074, 86071, 85944, 86073, 86072)    None   
2      (78727, 78728, 78616, 78632, 78638, 78643, 297...    None   
3      (11552, 11554, 25476, 74342, 25511, 74348, 194...    None   
4      (40964, 40967, 40498

  in_crs_string = _prepare_from_proj_string(in_crs_string)


In [12]:
transport_mode = 'roads'
special_suffix = ""  # _ximena leave empty "" otherwise
subfolder = os.path.join(input_folder, transport_mode.capitalize())

#nodes['special'] = nodes['name'] #should do it in QGIS but for obscure reasons it does not work
edges = loadAndFormatGeojson(transport_mode, "edges", subfolder, special_suffix)
nodes = create_nodes_from_edges(edges)

edges['surface'] = 'paved'
if "surface" not in edges.columns:
    edges['surface'] = 'paved'
if "class" not in edges.columns:
    edges['class'] = None
if "disruption" not in edges.columns:
    edges['disruption'] = None
if "name" not in edges.columns:
    edges['name'] = None
if "special" not in edges.columns:
    edges['special'] = None
    
edges = addKm(edges, projected_crs)
print(edges['km'].sum())
edges = assignEndpointsAndUpdateFullDf(edges, nodes)

print(nodes.head())
print(edges.head())

nodes.to_file(os.path.join(output_folder, transport_mode+"_nodes"+special_suffix+".geojson"), driver="GeoJSON", index=False)
edges.to_file(os.path.join(output_folder, transport_mode+"_edges"+special_suffix+".geojson"), driver="GeoJSON", index=False)

if (edges['end1'] == edges['end2']).any():
    print('ATT')
    edges[edges['end1'] == edges['end2']]

  return ogr_read(
  in_crs_string = _prepare_from_proj_string(in_crs_string)


There are 508 edges
EPSG:4326
77515.21279862858
Assigning end nodes to linestring


100%|████████████████████████████████████████████████████████████████████████████████| 508/508 [00:07<00:00, 70.90it/s]


                    geometry  id
0  POINT (25.22646 42.19327)   0
1  POINT (16.38534 48.35147)   1
2    POINT (80.4087 44.1841)   2
3  POINT (81.59412 44.28747)   3
4  POINT (75.02899 40.65067)   4
       id  end1  end2           km surface class disruption special capacity  \
index                                                                          
0       0     0     1  1385.760126   paved  None       None    None     None   
1       1     2     3   132.934330   paved  None       None    None     None   
2       2     4     5     9.908933   paved  None       None    None     None   
3       3     6     7     0.914980   paved  None       None  custom     None   
4       4     8     9    59.918420   paved  None       None    None     None   

       name                                           geometry  
index                                                           
0      None  LINESTRING (25.22646 42.19327, 16.38534 48.35147)  
1      None    LINESTRING (80.4087 44.1841, 81

# Maritime

In [43]:
transport_mode = 'maritime'
special_suffix = "_mc"  # _ximena leave empty "" otherwise
subfolder = os.path.join(input_folder, transport_mode.capitalize())
edges = loadAndFormatGeojson(transport_mode, "edges", subfolder, special_suffix)

# create nodes from endpoints
endpoints = gpd.GeoDataFrame({"end1": edges.geometry.apply(lambda line: Point(line.coords[0])), "end2": edges.geometry.apply(lambda line: Point(line.coords[-1]))})
unique_endpoints = list(set(endpoints['end1'].to_list()) | set(endpoints['end2'].to_list()))
nodes = gpd.GeoDataFrame(geometry=unique_endpoints, crs=edges.crs)
nodes['id'] = range(nodes.shape[0])

# add nodes_id into end1 and end2 columns of edges
edges['end1'] = endpoints['end1'].map(nodes.set_index('geometry')['id'])
edges['end2'] = endpoints['end2'].map(nodes.set_index('geometry')['id'])

edges = addKm(edges, crs=3975) #for maritime we use 3975, which is projection for the whole world

print(nodes.head())
print(edges.head())

export(nodes, edges, input_folder, output_folder, transport_mode, special_suffix)

if (edges['end1'] == edges['end2']).any():
    print('ATT')

There are 112 edges
EPSG:4326
                    geometry  id
0        POINT (51 13.00001)   0
1          POINT (19.8 38.6)   1
2  POINT (33.75001 27.89998)   2
3          POINT (32.6 29.7)   3
4      POINT (41.2 16.29999)   4
      from_infra  to_infra    distance    length  id            km capacity  \
index                                                                         
0           None      None         NaN       NaN   0   7223.033494     None   
1           None      None         NaN       NaN   1   6337.468767     None   
2           None      None         NaN       NaN   2  24857.328586     None   
3           None      None         NaN       NaN   3   6546.422559     None   
4       maritime  maritime  532.647058  5.009238   4    585.160733     None   

       end1  end2 special  name  \
index                             
0        42    77    None  None   
1        77    62    None  None   
2        61    77    None  None   
3        61    82    None  None   
4       

  in_crs_string = _prepare_from_proj_string(in_crs_string)


# Airways

In [19]:
transport_mode = 'airways'
subfolder = os.path.join(input_folder, transport_mode.capitalize())

nodes = loadAndFormatGeojson(transport_mode, "nodes", subfolder)
edges = loadAndFormatGeojson(transport_mode, "edges", subfolder)

edges = addKm(edges, projected_crs) #for maritime we use 3975, which is projection for the whole world
edges = assignEndpointsAndUpdateFullDf(edges, nodes)

print(nodes.head())
print(edges.head())

nodes.to_file(os.path.join(output_folder, transport_mode+"_nodes.geojson"), driver="GeoJSON", index=False)
edges.to_file(os.path.join(output_folder, transport_mode+"_edges.geojson"), driver="GeoJSON", index=False)

if (edges['end1'] == edges['end2']).any():
    print('ATT')

There are 3 nodes
epsg:4326
There are 3 edges
epsg:4326


  return _prepare_from_string(" ".join(pjargs))


Assigning end nodes to linestring
                                         name special  \
index                                                   
0      Aeropuerto Ecologico Galapagos Seymour    None   
1                        Aeropuerto Guayaquil    None   
2             Aeropuerto Quito Mariscal Sucre    None   

                         geometry  id  
index                                  
0      POINT (-90.26504 -0.45506)   0  
1      POINT (-79.88713 -2.15896)   1  
2      POINT (-78.35621 -0.12718)   2  
      special capacity                                           geometry  id  \
index                                                                           
0        None     None  LINESTRING (-90.26504 -0.45506, -78.35621 -0.1...   0   
1        None     None  LINESTRING (-78.35621 -0.12718, -79.88713 -2.1...   1   
2        None     None  LINESTRING (-79.88713 -2.15896, -90.26504 -0.4...   2   

       end1  end2           km  
index                           
0       

# Waterways

In [11]:
transport_mode = 'waterways'
subfolder = os.path.join(input_folder, transport_mode.capitalize())

nodes = loadAndFormatGeojson(transport_mode, "nodes", subfolder)
edges = loadAndFormatGeojson(transport_mode, "edges", subfolder)
edges = addKm(edges, projected_crs)
edges = assignEndpointsAndUpdateFullDf(edges, nodes)

print(nodes.head())
print(edges.head())

nodes.to_file(os.path.join(output_folder, transport_mode+"_nodes.geojson"), driver="GeoJSON", index=False)
edges.to_file(os.path.join(output_folder, transport_mode+"_edges.geojson"), driver="GeoJSON", index=False)

if (edges['end1'] == edges['end2']).any():
    print('ATT')

There are 22 nodes
epsg:4326
There are 20 edges
epsg:4326
Assigning end nodes to linestring


  return _prepare_from_string(" ".join(pjargs))
100%|████████████████████████████████████████████████████████████████████████████████| 20/20 [00:00<00:00, 1250.87it/s]


       id                      name                    geometry
index                                                          
0       0          Chong Kneas Port  POINT (103.82202 13.26983)
1       1      Kampong Chlnang Port  POINT (104.68121 12.26825)
2       2  Kampong Chlnang Junction  POINT (104.69201 12.27107)
3       3         Kratie River Port  POINT (106.01621 12.48460)
4       4           Kratie Junction  POINT (106.01220 12.48335)
       end1  end2 special  capacity  id  \
index                                     
0         2     0    None  200000.0   0   
1        10     6    None   20000.0   1   
2         6     5    None   20000.0   2   
3         6     4    None   20000.0   3   
4         4     3    None       NaN   4   

                                                geometry          km  
index                                                                 
0      LINESTRING (104.69187 12.27092, 104.67544 12.3...  156.544324  
1      LINESTRING (104.95104 11.55619

# Multimodality

In [81]:
def find_anchor_points(nodes, mode):
    if mode == 'roads':
        return nodes
    elif mode == 'maritime':
        return nodes[nodes['port']]
    elif mode == 'railways':
        return nodes[nodes['station']]
    elif mode == 'airways':
        return nodes[nodes['airport']]
    elif mode == 'waterways':
        return nodes[nodes['port']]
    else:
        raise ValueError("Wrong mode choosen")


def build_multimodal_links(from_nodes, from_mode, to_nodes, to_mode):
    anchor_from_nodes = find_anchor_points(from_nodes, from_mode)
    anchor_to_nodes = find_anchor_points(to_nodes, to_mode)

    # Find the closest road for each port
    links = []
    projected_anchor_from_nodes = anchor_from_nodes.to_crs(epsg=3857)
    projected_anchor_to_nodes = anchor_to_nodes.to_crs(epsg=3857)
    for _, target_node in projected_anchor_to_nodes.iterrows():
        # Find the closest road point to the current port
        closest_from_nodes = projected_anchor_from_nodes.distance(target_node.geometry).idxmin()
        closest_point = projected_anchor_from_nodes.loc[closest_from_nodes].geometry
    
        # Create a LineString from port to closest road point
        link = LineString([target_node.geometry, closest_point])
        links.append({"geometry": link})
    
    # Create a new GeoDataFrame for the links
    links_gdf = gpd.GeoDataFrame(links, crs=projected_anchor_nodes.crs)
    links_gdf = links_gdf.to_crs(epsg=4326)
    links_gdf['multimodes'] = from_mode + '-' + to_mode
    return links_gdf

In [83]:
# automaed creationg of multimodal split
import geopandas as gpd
from shapely.geometry import LineString

multimode = ["roads", "maritime"]
suffix0 = "_osmsimp"
suffix1 = "_mc"

roads_nodes = gpd.read_file(os.path.join(input_folder, "Roads", "treated_roads_nodes_osmsimp.geojson"))
maritime_nodes = gpd.read_file(os.path.join(input_folder, "Maritime", "treated_maritime_nodes_mc.geojson"))
maritime_nodes['port'] = maritime_nodes['port'].map(lambda x: bool(x) if pd.notna(x) else False)
railways_nodes = gpd.read_file(os.path.join(input_folder, "Railways", "treated_railways_nodes.geojson"))
railways_nodes['station'] = railways_nodes['station'].map(lambda x: bool(x) if pd.notna(x) else False)

multimodal_edges = pd.concat([
    build_multimodal_links(roads_nodes, "roads", maritime_nodes, "maritime"),
    build_multimodal_links(roads_nodes, "roads", railways_nodes, "railways"),
    build_multimodal_links(railways_nodes, "railways", maritime_nodes, "maritime")
])
multimodal_edges['km'] = 0.1  # no impact
multimodal_edges['id'] = range(multimodal_edges.shape[0])
multimodal_edges['capacity'] = None
multimodal_edges.to_file(os.path.join(output_folder, "multimodal_edges_osmsimp.geojson"), driver="GeoJSON", index=False)

print(multimodal_edges.head())
print(multimodal_edges.shape)

                                            geometry      multimodes   km  id  \
0   LINESTRING (41.66864 41.6486, 41.66877 41.64588)  roads-maritime  0.1   0   
1  LINESTRING (51.22857 43.60237, 51.22302 43.60393)  roads-maritime  0.1   1   
2  LINESTRING (41.65711 42.15927, 41.66985 42.16787)  roads-maritime  0.1   2   
3  LINESTRING (49.73252 40.28461, 49.71417 40.28648)  roads-maritime  0.1   3   
0  LINESTRING (71.39464 42.94935, 71.37812 42.94671)  roads-railways  0.1   4   

  capacity  
0     None  
1     None  
2     None  
3     None  
0     None  
(38, 5)


# Railways

In [72]:
transport_mode = 'railways'

subfolder = os.path.join(input_folder, transport_mode.capitalize())
edges = loadAndFormatGeojson(transport_mode, "edges", subfolder)
nodes, edges = create_nodes_and_update_edges(edges)

edges = addKm(edges, projected_crs)
edges['disruption'] = None

print(nodes.head())
print(edges.head())

if (edges['end1'] == edges['end2']).any():
    print('ATT')

export(nodes, edges, input_folder, output_folder, transport_mode, special_suffix="")

There are 31 edges
EPSG:4326
                    geometry  id
0  POINT (71.39464 42.94935)   0
1   POINT (45.2105 41.37973)   1
2  POINT (44.87981 41.65591)   2
3  POINT (82.60064 45.07896)   3
4  POINT (43.64281 40.74461)   4
      OBJECTID NAME1 NAME2 NAME3 ISO_CC RR_FEATURE Shape_Length special  \
index                                                                     
0         None  None  None  None   None       None         None    None   
1         None  None  None  None   None       None         None    None   
2         None  None  None  None   None       None         None    None   
3         None  None  None  None   None       None         None    None   
4         None  None  None  None   None       None         None    None   

                                                geometry  id  end1  end2  \
index                                                                      
0      LINESTRING (51.22432 43.59837, 51.22589 43.606...   0    33    27   
1      LINESTRING (

  in_crs_string = _prepare_from_proj_string(in_crs_string)


# Other stuff, may be useful

### Transition from shp to geojson

In [47]:
for edge_node in ["node", "edge"]:    
    transport_mode = 'roads'
    version = "v8"
    subfolder = os.path.join(folder, 'Data', "Structured", transport_mode.capitalize(), version)

    filename = "raw_"+transport_mode+"_"+edge_node+"s.shp"
    df = gpd.read_file(os.path.join(subfolder, filename))

    version = "current_version"
    subfolder = os.path.join(folder, 'Data', "Structured", transport_mode.capitalize(), version)
    df.to_file(os.path.join(subfolder, "raw_"+transport_mode+"_"+edge_node+"s.geojson"), driver="GeoJSON")

### Change to CRS 4326

In [76]:
for edge_node in ["node", "edge"]:    
    transport_mode = 'multimodal'
    version = "current_version"
    subfolder = os.path.join(folder, 'Data', "Structured", transport_mode.capitalize(), version)
    
    filename = "raw_"+transport_mode+"_"+edge_node+"s.shp"
    df = gpd.read_file(os.path.join(subfolder, "raw_"+transport_mode+"_"+edge_node+"s.geojson"))
    
    df = df.to_crs(4326)
    
    df.to_file(os.path.join(subfolder, "raw_"+transport_mode+"_"+edge_node+"s.geojson"), driver="GeoJSON")