This notebook aims to produce transport layers that are fit for the model.

Transport modes are among: roads, railways, waterways, maritime, and multimodal. Roads are required. Note that the multimodal is required if there are more than 2 transport modes.

### Input
- For each mode, a `raw_<mode>_edges.geojson` and and a `raw_<mode>_nodes.geoson`, except for multimodal for which there is only a edge layer
- Required attributes :
    - `raw_roads_edges.geojson`: "class" ("primary", "seconday", etc.), "surface" ("paved, "unpaved")
    - `raw_multimodal_edges.geojson`: "multimodes" ("roads-railways", "roads-maritime", etc.)
    - For the countri
- Optional attributes :
    - `raw_<mode>_edges.geojson`: "capacity" (float, max daily tonnage on the edge)
- Other requirements :
    - the location of countries should be placed in the nodes layers
    - concretely, there should be a "name" attribute in `raw_<mode>_nodes.geoson`, with the trigram of the country in the selected node
       
### Output
- For each mode, a `<mode>_edges.geojson` and a `<mode>_nodes.geoson`
- New attributes :
    - `<mode>_edges.geojson` and `<mode>_nodes.geoson`: 'id' (integer)
    - `<mode>_edges.geojson`: 'end1' and 'end2' (integer, id of nodes located at the end of the edges), 'km' (float, length of edge)
    - Note that the end1 and end2 of edges are found with looking at the node of `<mode>_nodes.geoson` which is the closed to the ends of the dege
- Modification on the geometry
    - (optional) `<mode>_nodes.geoson`: if the end1 or end2 nodes are not located at the ends of the edges, they are moved to the ends of the edges
    - (optional) `<mode>_nodes.geoson`: if there are several points very close to each others (distance epsilon), merge them

### Folder path
- input files : in the input_folder, there should be one subfolder per mode, named `<Mode>` with the files
- output : defined by output_folder

### Points to improve
- we could think of not using a `raw_<mode>_nodes.geoson` and generate the nodes directly from the ends of edges
- but we need to say where the countries are

In [1]:
region = "Cambodia"

In [2]:
import os
import time
import math
import numpy as np
import pandas as pd
import geopandas as gpd
from shapely.geometry import Point, LineString
import shapely.wkt
from tqdm import tqdm

input_folder = os.path.join('..', '..', '..', region, 'Data', 'Structured', "Transport")
output_folder = os.path.join('..', 'input', region, 'Transport')

In [3]:
projected_crs = {
    'Cambodia': 3857,
    'Ecuador': 31986,
    'ECA': 3857
}
projected_crs = projected_crs[region]

In [4]:
def loadShp(filename):
    gpdf = gpd.read_file(filename)
    gpdf = gpdf[~gpdf['geometry'].isnull()]
    gpdf = gpdf.to_crs(epsg=4326)
    return gpdf

In [5]:
def createNodes(df_links):
    all_coords = df_links['geometry'].apply(getEndCoordsFromLine).to_list()
    all_coords = list(set([item for sublist in all_coords for item in sublist]))
    return gpd.GeoDataFrame({"geometry": [Point(coords) for coords in all_coords], "id": range(len(all_coords))}, crs=4326)

def getEndCoordsFromLine(linestring_obj):
    end1Coord = linestring_obj.coords[0]
    end2Coord = linestring_obj.coords[-1]
    return [end1Coord, end2Coord]

def getEndPointsFromLine(linestring_obj):
    end1Coord = linestring_obj.coords[0]
    end2Coord = linestring_obj.coords[-1]
    return Point(*end1Coord), Point(*end2Coord)

def getIndexClosestPoint(point, df_with_points):
    distList = [point.distance(item) for item in df_with_points['geometry'].tolist()]
    return int(df_with_points.index[distList.index(min(distList))])

def updateLineString(linestring_obj, newEnd1, newEnd2):
    return LineString([newEnd1.coords[0]] + linestring_obj.coords[1:-1]+[newEnd2.coords[0]])

def assignEndpointsAndUpdate(df_links, id_link, df_nodes, update=False):
    p1, p2 = getEndPointsFromLine(df_links.loc[id_link, 'geometry'])
    id_closest_point1 = getIndexClosestPoint(p1, df_nodes)
    id_closest_point2 = getIndexClosestPoint(p2, df_nodes)
    df_links.loc[id_link, 'end1'] = id_closest_point1
    df_links.loc[id_link, 'end2'] = id_closest_point2
    if update:
        df_links.loc[id_link, 'geometry'] = updateLineString(df_links.loc[id_link, 'geometry'], df_nodes.loc[id_closest_point1, 'geometry'], df_nodes.loc[id_closest_point2, 'geometry'])
    return df_links

def assignEndpointsAndUpdateFullDf(df_links, df_nodes, update=False):
    print('Assigning end nodes to linestring')
    if update:
        print('Stag liens to endnodes')
    res = df_links.copy()
    for i in tqdm(res.index):
        res = assignEndpointsAndUpdate(res, i, df_nodes, update=update)
    res['end1'] = res['end1'].astype(int)
    res['end2'] = res['end2'].astype(int)
    return res

def getAllEndpoints(df_links):
    all_endpoints = [getEndPointsFromLine(item) for item in df_links['geometry']]
    return [item for sublist in all_endpoints for item in sublist]

def mergePoints(df_nodes, df_links, epsilon):
    print("Nb of original nodes:", df_nodes.shape[0])
    
    all_endpoints = getAllEndpoints(df_links)
    print("Nb of endpoints:", len(all_endpoints))
    
    all_points_gpd = gpd.GeoDataFrame({"geometry": df_nodes['geometry'].tolist()+getAllEndpoints(df_links)}, crs={'epsg':'4326'})
    buffered_polygons = gpd.GeoDataFrame({"geometry": all_points_gpd.buffer(distance=epsilon)}, crs={'epsg':'4326'})
    multipolygon = buffered_polygons.unary_union
    centroids_each_polygon = [polygon.centroid for polygon in multipolygon]
    print("Nb of grouped points:", len(centroids_each_polygon))
    
    return gpd.GeoDataFrame({"id":range(len(multipolygon)), "geometry":centroids_each_polygon}, crs={'epsg':'4326'})

def assignEndpointsOneEdge(row, df_nodes):
    p1, p2 = getEndPointsFromLine(row['geometry'])
    id_closest_point1 = getIndexClosestPoint(p1, df_nodes)
    id_closest_point2 = getIndexClosestPoint(p2, df_nodes)
    row['end1'] = id_closest_point1
    row['end2'] = id_closest_point2
    return row

def assignEndpoints(df_links, df_nodes):
    return df_links.apply(lambda row: assignEndpointsOneEdge(row, df_nodes), axis=1)

In [6]:
def loadAndFormatGeojson(transport_mode, nodeedge, subfolder):
    if nodeedge == "nodes":
        nodes = gpd.read_file(os.path.join(subfolder, "raw_"+transport_mode+"_nodes.geojson"))
        if 'index' in nodes.columns:
            nodes = nodes.drop('index', axis=1)
        nodes['id']=range(nodes.shape[0])
        nodes.index = nodes['id']
        nodes.index.name = "index"
        print("There are", nodes.shape[0], "nodes")
        print(nodes.crs)
        return nodes
    
    if nodeedge == "edges":
        edges = gpd.read_file(os.path.join(subfolder, "raw_"+transport_mode+"_edges.geojson"))
        edges = edges[~edges['geometry'].isnull()]
        if 'index' in edges.columns:
            edges = edges.drop('index', axis=1)
        edges['id']=range(edges.shape[0])
        edges['end1'] = None
        edges['end2'] = None
        if "capacity" not in edges.columns:
            edges['capacity'] = None
        edges.index = edges['id']
        edges.index.name = "index"
        print("There are", edges.shape[0], "edges")
        print(edges.crs)
        return edges
    
    
def addKm(edges, crs):
    # Project the layer. Watch out, the CRS should be adapted to the country
    edges['km'] = edges.to_crs({'init': 'epsg:'+str(crs)}).length/1000
    return edges

# Roads

In [9]:
transport_mode = 'roads'
special_suffix = ""  # _ximena leave empty "" otherwise
subfolder = os.path.join(input_folder, transport_mode.capitalize()+special_suffix)

#nodes['special'] = nodes['name'] #should do it in QGIS but for obscure reasons it does not work
edges = loadAndFormatGeojson(transport_mode, "edges", subfolder)
create_nodes = True
if create_nodes:
    nodes = createNodes(edges)
else:
    nodes = loadAndFormatGeojson(transport_mode, "nodes", subfolder)
if "surface" not in edges.columns:
    edges['surface'] = 'paved'
if "class" not in edges.columns:
    edges['class'] = None
if "disruption" not in edges.columns:
    edges['disruption'] = None
if "name" not in edges.columns:
    edges['name'] = None
if "special" not in edges.columns:
    edges['special'] = None
    
edges = addKm(edges, projected_crs)
print(edges['km'].sum())
edges = assignEndpointsAndUpdateFullDf(edges, nodes)

print(nodes.head())
print(edges.head())

nodes.to_file(os.path.join(output_folder, transport_mode+"_nodes"+special_suffix+".geojson"), driver="GeoJSON", index=False)
edges.to_file(os.path.join(output_folder, transport_mode+"_edges"+special_suffix+".geojson"), driver="GeoJSON", index=False)

if (edges['end1'] == edges['end2']).any():
    print('ATT')
    edges[edges['end1'] == edges['end2']]

There are 8632 edges
epsg:4326


  return _prepare_from_string(" ".join(pjargs))


528379.745174192
Assigning end nodes to linestring


100%|██████████████████████████████████████████████████████████████████████████████| 8632/8632 [10:50<00:00, 13.27it/s]


                     geometry  id
0  POINT (121.12024 27.88813)   0
1   POINT (37.88515 50.74885)   1
2   POINT (44.35610 33.30235)   2
3   POINT (69.36676 55.29093)   3
4  POINT (109.30862 25.00105)   4
       OBJECTID ISO_CC Name1 Name2 Name3 Name4 Name5 Shield        TYPE  RANK  \
index                                                                           
0       26259.0     CN   315                            315  Major road   2.0   
1           NaN   None  None  None  None  None  None   None        None   NaN   
2           1.0     AF                                       Major road   1.0   
3           2.0     AF                                          Highway   1.0   
4           3.0     AF                                       Local road   2.0   

       ... id  end1  end2  capacity  surface  class disruption  name special  \
index  ...                                                                     
0      ...  0  7110  2926      None    paved   None       None  None

# Maritime

In [7]:
transport_mode = 'maritime'
subfolder = os.path.join(input_folder, transport_mode.capitalize())

nodes = loadAndFormatGeojson(transport_mode, "nodes", subfolder)
edges = loadAndFormatGeojson(transport_mode, "edges", subfolder)

edges = addKm(edges, crs=3975) #for maritime we use 3975, which is projection for the whole world
edges = assignEndpointsAndUpdateFullDf(edges, nodes)

print(nodes.head())
print(edges.head())

nodes.to_file(os.path.join(output_folder, transport_mode+"_nodes.geojson"), driver="GeoJSON", index=False)
edges.to_file(os.path.join(output_folder, transport_mode+"_edges.geojson"), driver="GeoJSON", index=False)

if (edges['end1'] == edges['end2']).any():
    print('ATT')

  in_crs_string = _prepare_from_proj_string(in_crs_string)


There are 21 nodes
EPSG:4326
There are 17 edges
EPSG:4326
Assigning end nodes to linestring


100%|█████████████████████████████████████████████████████████████████████████████████| 17/17 [00:00<00:00, 568.19it/s]

                                   name special                     geometry  \
index                                                                          
0      Sihanoukville international port    port   POINT (103.51631 10.65094)   
1                                  None     ASI   POINT (113.25973 19.97663)   
2                          Cai Mep Port    port   POINT (107.02588 10.53712)   
3                                  None     OCE  POINT (138.57498 -11.10568)   
4                                  None     AFR        POINT (39.461 -6.961)   

       id  
index      
0       0  
1       1  
2       2  
3       3  
4       4  
       FID special                                           geometry  id  \
index                                                                       
0      1.0    None  LINESTRING (103.51549 10.65113, 103.43294 10.6...   0   
1      2.0    None  LINESTRING (103.51549 10.65113, 103.43294 10.6...   1   
2      NaN    None  LINESTRING (6.00739 43.1696




# Airways

In [19]:
transport_mode = 'airways'
subfolder = os.path.join(input_folder, transport_mode.capitalize())

nodes = loadAndFormatGeojson(transport_mode, "nodes", subfolder)
edges = loadAndFormatGeojson(transport_mode, "edges", subfolder)

edges = addKm(edges, projected_crs) #for maritime we use 3975, which is projection for the whole world
edges = assignEndpointsAndUpdateFullDf(edges, nodes)

print(nodes.head())
print(edges.head())

nodes.to_file(os.path.join(output_folder, transport_mode+"_nodes.geojson"), driver="GeoJSON", index=False)
edges.to_file(os.path.join(output_folder, transport_mode+"_edges.geojson"), driver="GeoJSON", index=False)

if (edges['end1'] == edges['end2']).any():
    print('ATT')

There are 3 nodes
epsg:4326
There are 3 edges
epsg:4326


  return _prepare_from_string(" ".join(pjargs))


Assigning end nodes to linestring
                                         name special  \
index                                                   
0      Aeropuerto Ecologico Galapagos Seymour    None   
1                        Aeropuerto Guayaquil    None   
2             Aeropuerto Quito Mariscal Sucre    None   

                         geometry  id  
index                                  
0      POINT (-90.26504 -0.45506)   0  
1      POINT (-79.88713 -2.15896)   1  
2      POINT (-78.35621 -0.12718)   2  
      special capacity                                           geometry  id  \
index                                                                           
0        None     None  LINESTRING (-90.26504 -0.45506, -78.35621 -0.1...   0   
1        None     None  LINESTRING (-78.35621 -0.12718, -79.88713 -2.1...   1   
2        None     None  LINESTRING (-79.88713 -2.15896, -90.26504 -0.4...   2   

       end1  end2           km  
index                           
0       

# Waterways

In [11]:
transport_mode = 'waterways'
subfolder = os.path.join(input_folder, transport_mode.capitalize())

nodes = loadAndFormatGeojson(transport_mode, "nodes", subfolder)
edges = loadAndFormatGeojson(transport_mode, "edges", subfolder)
edges = addKm(edges, projected_crs)
edges = assignEndpointsAndUpdateFullDf(edges, nodes)

print(nodes.head())
print(edges.head())

nodes.to_file(os.path.join(output_folder, transport_mode+"_nodes.geojson"), driver="GeoJSON", index=False)
edges.to_file(os.path.join(output_folder, transport_mode+"_edges.geojson"), driver="GeoJSON", index=False)

if (edges['end1'] == edges['end2']).any():
    print('ATT')

There are 22 nodes
epsg:4326
There are 20 edges
epsg:4326
Assigning end nodes to linestring


  return _prepare_from_string(" ".join(pjargs))
100%|████████████████████████████████████████████████████████████████████████████████| 20/20 [00:00<00:00, 1250.87it/s]


       id                      name                    geometry
index                                                          
0       0          Chong Kneas Port  POINT (103.82202 13.26983)
1       1      Kampong Chlnang Port  POINT (104.68121 12.26825)
2       2  Kampong Chlnang Junction  POINT (104.69201 12.27107)
3       3         Kratie River Port  POINT (106.01621 12.48460)
4       4           Kratie Junction  POINT (106.01220 12.48335)
       end1  end2 special  capacity  id  \
index                                     
0         2     0    None  200000.0   0   
1        10     6    None   20000.0   1   
2         6     5    None   20000.0   2   
3         6     4    None   20000.0   3   
4         4     3    None       NaN   4   

                                                geometry          km  
index                                                                 
0      LINESTRING (104.69187 12.27092, 104.67544 12.3...  156.544324  
1      LINESTRING (104.95104 11.55619

# Multimodality

In [8]:
transport_mode = 'multimodal'
special_suffix = ""  # leave empty "" otherwise
subfolder = os.path.join(input_folder, transport_mode.capitalize()+special_suffix)

edges = loadAndFormatGeojson(transport_mode, "edges", subfolder)
edges['km'] = 0.1 #no impact

print(edges.head())

edges.to_file(os.path.join(output_folder, transport_mode+"_edges"+special_suffix+".geojson"), driver="GeoJSON", index=False)

There are 14 edges
EPSG:4326
               multimodes special   capacity  \
index                                          
0          roads-railways    None  1000000.0   
1       railways-maritime                NaN   
2      roads-maritime-shv              334.0   
3         roads-waterways    None        NaN   
4         roads-waterways    None        NaN   

                                                geometry  id  end1  end2   km  
index                                                                          
0      LINESTRING (104.91654 11.57265, 104.91811 11.5...   0  None  None  0.1  
1      LINESTRING (103.50217 10.64288, 103.51631 10.6...   1  None  None  0.1  
2      LINESTRING (103.51222 10.64248, 103.51631 10.6...   2  None  None  0.1  
3      LINESTRING (104.92095 11.58301, 104.92194 11.5...   3  None  None  0.1  
4      LINESTRING (105.46532 11.98568, 105.46508 11.9...   4  None  None  0.1  


# Railways

In [37]:
transport_mode = 'railways'
create_nodes = True

subfolder = os.path.join(input_folder, transport_mode.capitalize())
#edges = loadAndFormatGeojson(transport_mode, "edges", subfolder)

if create_nodes:
    nodes = createNodes(edges)
else:
    nodes = loadAndFormatGeojson(transport_mode, "nodes", subfolder)
edges = addKm(edges, projected_crs)
edges = assignEndpointsAndUpdateFullDf(edges, nodes)
edges['disruption'] = None

print(nodes.head())
print(edges.head())

nodes.to_file(os.path.join(output_folder, transport_mode+"_nodes.geojson"), driver="GeoJSON")
edges.to_file(os.path.join(output_folder, transport_mode+"_edges.geojson"), driver="GeoJSON")

if (edges['end1'] == edges['end2']).any():
    print('ATT')

  return _prepare_from_string(" ".join(pjargs))


Assigning end nodes to linestring


100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 177/177 [00:00<00:00, 276.61it/s]


                     geometry  id
0  POINT (119.06567 37.00389)   0
1   POINT (39.60605 40.81612)   1
2  POINT (118.55905 24.87561)   2
3  POINT (120.70911 27.98787)   3
4   POINT (38.24080 47.16110)   4
       id      multimodes special capacity        km  end1  end2  name  \
index                                                                    
0       0  roads-maritime    None     None  2.284587    81    55  None   
1       1  roads-railways    None     None  2.244781   223   243  None   
2       2  roads-railways    None     None  3.613893    59   295  None   
3       3  roads-maritime    None     None  5.692818    83   225  None   
4       4  roads-railways    None     None  1.716334   218   244  None   

                                                geometry disruption  
index                                                                
0      LINESTRING (48.02159 46.38468, 48.01296 46.37183)       None  
1      LINESTRING (48.05829 46.36522, 48.03825 46.36367)       None

# Other stuff, may be useful

### Transition from shp to geojson

In [47]:
for edge_node in ["node", "edge"]:    
    transport_mode = 'roads'
    version = "v8"
    subfolder = os.path.join(folder, 'Data', "Structured", transport_mode.capitalize(), version)

    filename = "raw_"+transport_mode+"_"+edge_node+"s.shp"
    df = gpd.read_file(os.path.join(subfolder, filename))

    version = "current_version"
    subfolder = os.path.join(folder, 'Data', "Structured", transport_mode.capitalize(), version)
    df.to_file(os.path.join(subfolder, "raw_"+transport_mode+"_"+edge_node+"s.geojson"), driver="GeoJSON")

### Change to CRS 4326

In [76]:
for edge_node in ["node", "edge"]:    
    transport_mode = 'multimodal'
    version = "current_version"
    subfolder = os.path.join(folder, 'Data', "Structured", transport_mode.capitalize(), version)
    
    filename = "raw_"+transport_mode+"_"+edge_node+"s.shp"
    df = gpd.read_file(os.path.join(subfolder, "raw_"+transport_mode+"_"+edge_node+"s.geojson"))
    
    df = df.to_crs(4326)
    
    df.to_file(os.path.join(subfolder, "raw_"+transport_mode+"_"+edge_node+"s.geojson"), driver="GeoJSON")