This notebook aims to produce transport layers that are fit for the model.

Transport modes are among: roads, railways, waterways, maritime, and multimodal. Roads are required. Note that the multimodal is required if there are more than 2 transport modes.

### Input
- For each mode, a `raw_<mode>_edges.geojson` and and a `raw_<mode>_nodes.geoson`, except for multimodal for which there is only a edge layer
- Required attributes :
    - `raw_roads_edges.geojson`: "class" ("primary", "seconday", etc.), "surface" ("paved, "unpaved")
    - `raw_multimodal_edges.geojson`: "multimodes" ("roads-railways", "roads-maritime", etc.)
    - For the countri
- Optional attributes :
    - `raw_<mode>_edges.geojson`: "capacity" (float, max annual tonnage on the edge)
- Other requirements :
    - the location of countries should be placed in the nodes layers
    - concretely, there should be a "name" attribute in `raw_<mode>_nodes.geoson`, with the trigram of the country in the selected node
       
### Output
- For each mode, a `<mode>_edges.geojson` and a `<mode>_nodes.geoson`
- New attributes :
    - `<mode>_edges.geojson` and `<mode>_nodes.geoson`: 'id' (integer)
    - `<mode>_edges.geojson`: 'end1' and 'end2' (integer, id of nodes located at the end of the edges), 'km' (float, length of edge)
    - Note that the end1 and end2 of edges are found with looking at the node of `<mode>_nodes.geoson` which is the closed to the ends of the dege
- Modification on the geometry
    - (optional) `<mode>_nodes.geoson`: if the end1 or end2 nodes are not located at the ends of the edges, they are moved to the ends of the edges
    - (optional) `<mode>_nodes.geoson`: if there are several points very close to each others (distance epsilon), merge them

### Folder path
- input files : in the input_folder, there should be one subfolder per mode, named `<Mode>` with the files
- output : defined by output_folder

### Points to improve
- we could think of not using a `raw_<mode>_nodes.geoson` and generate the nodes directly from the ends of edges
- but we need to say where the countries are

In [1]:
import os
import time
import math
import numpy as np
import pandas as pd
import geopandas as gpd
from shapely.geometry import Point, LineString
import shapely.wkt


input_folder = os.path.join('..', '..', '..', 'Ecuador', 'Data', 'Structured', "Transport")
output_folder = os.path.join('..', 'input', 'Ecuador', 'Transport')

In [2]:
projected_crs = {
    'Cambodia': 3857,
    'Ecuador': 31986
}
projected_crs = projected_crs['Ecuador']

In [3]:
def loadShp(filename):
    gpdf = gpd.read_file(filename)
    gpdf = gpdf[~gpdf['geometry'].isnull()]
    gpdf = gpdf.to_crs(epsg=4326)
    return gpdf

In [4]:
def getEndPointsFromLine(linestring_obj):
    end1Coord = linestring_obj.coords[0]
    end2Coord = linestring_obj.coords[-1]
    return Point(*end1Coord), Point(*end2Coord)

def getIndexClosestPoint(point, df_with_points):
    distList = [point.distance(item) for item in df_with_points['geometry'].tolist()]
    return int(df_with_points.index[distList.index(min(distList))])

def updateLineString(linestring_obj, newEnd1, newEnd2):
    return LineString([newEnd1.coords[0]] + linestring_obj.coords[1:-1]+[newEnd2.coords[0]])

def assignEndpointsAndUpdate(df_links, id_link, df_nodes, update=False):
    p1, p2 = getEndPointsFromLine(df_links.loc[id_link, 'geometry'])
    id_closest_point1 = getIndexClosestPoint(p1, df_nodes)
    id_closest_point2 = getIndexClosestPoint(p2, df_nodes)
    df_links.loc[id_link, 'end1'] = id_closest_point1
    df_links.loc[id_link, 'end2'] = id_closest_point2
    if update:
        df_links.loc[id_link, 'geometry'] = updateLineString(df_links.loc[id_link, 'geometry'], df_nodes.loc[id_closest_point1, 'geometry'], df_nodes.loc[id_closest_point2, 'geometry'])
    return df_links

def assignEndpointsAndUpdateFullDf(df_links, df_nodes, update=False):
    print('Assigning end nodes to linestring')
    if update:
        print('Stag liens to endnodes')
    res = df_links.copy()
    for i in res.index:
        res = assignEndpointsAndUpdate(res, i, df_nodes, update=update)
    res['end1'] = res['end1'].astype(int)
    res['end2'] = res['end2'].astype(int)
    return res

def getAllEndpoints(df_links):
    all_endpoints = [getEndPointsFromLine(item) for item in df_links['geometry']]
    return [item for sublist in all_endpoints for item in sublist]

def mergePoints(df_nodes, df_links, epsilon):
    print("Nb of original nodes:", df_nodes.shape[0])
    
    all_endpoints = getAllEndpoints(df_links)
    print("Nb of endpoints:", len(all_endpoints))
    
    all_points_gpd = gpd.GeoDataFrame({"geometry": df_nodes['geometry'].tolist()+getAllEndpoints(df_links)}, crs={'epsg':'4326'})
    buffered_polygons = gpd.GeoDataFrame({"geometry": all_points_gpd.buffer(distance=epsilon)}, crs={'epsg':'4326'})
    multipolygon = buffered_polygons.unary_union
    centroids_each_polygon = [polygon.centroid for polygon in multipolygon]
    print("Nb of grouped points:", len(centroids_each_polygon))
    
    return gpd.GeoDataFrame({"id":range(len(multipolygon)), "geometry":centroids_each_polygon}, crs={'epsg':'4326'})

def assignEndpointsOneEdge(row, df_nodes):
    p1, p2 = getEndPointsFromLine(row['geometry'])
    id_closest_point1 = getIndexClosestPoint(p1, df_nodes)
    id_closest_point2 = getIndexClosestPoint(p2, df_nodes)
    row['end1'] = id_closest_point1
    row['end2'] = id_closest_point2
    return row

def assignEndpoints(df_links, df_nodes):
    return df_links.apply(lambda row: assignEndpointsOneEdge(row, df_nodes), axis=1)

In [5]:
def loadAndFormatGeojson(transport_mode, nodeedge, subfolder):
    if nodeedge == "nodes":
        nodes = gpd.read_file(os.path.join(subfolder, "raw_"+transport_mode+"_nodes.geojson"))
        if 'index' in nodes.columns:
            nodes = nodes.drop('index', axis=1)
        nodes['id']=range(nodes.shape[0])
        nodes.index = nodes['id']
        nodes.index.name = "index"
        print("There are", nodes.shape[0], "nodes")
        print(nodes.crs)
        return nodes
    
    if nodeedge == "edges":
        edges = gpd.read_file(os.path.join(subfolder, "raw_"+transport_mode+"_edges.geojson"))
        edges = edges[~edges['geometry'].isnull()]
        if 'index' in edges.columns:
            edges = edges.drop('index', axis=1)
        edges['id']=range(edges.shape[0])
        edges['end1'] = None
        edges['end2'] = None
        if "capacity" not in edges.columns:
            edges['capacity'] = None
        edges.index = edges['id']
        edges.index.name = "index"
        print("There are", edges.shape[0], "edges")
        print(edges.crs)
        return edges
    
    
def addKm(edges, crs):
    # Project the layer. Watch out, the CRS should be adapted to the country
    edges['km'] = edges.to_crs({'init': 'epsg:'+str(crs)}).length/1000
    return edges

# Maritime

In [16]:
transport_mode = 'maritime'
subfolder = os.path.join(input_folder, transport_mode.capitalize())

nodes = loadAndFormatGeojson(transport_mode, "nodes", subfolder)
edges = loadAndFormatGeojson(transport_mode, "edges", subfolder)

edges = addKm(edges, crs=3975) #for maritime we use 3975, which is projection for the whole world
edges = assignEndpointsAndUpdateFullDf(edges, nodes)

print(nodes.head())
print(edges.head())

nodes.to_file(os.path.join(output_folder, transport_mode+"_nodes.geojson"), driver="GeoJSON", index=False)
edges.to_file(os.path.join(output_folder, transport_mode+"_edges.geojson"), driver="GeoJSON", index=False)

if (edges['end1'] == edges['end2']).any():
    print('ATT')

There are 11 nodes
epsg:4326
There are 10 edges
epsg:4326
Assigning end nodes to linestring
       name special                     geometry  id
index                                               
0      None     ASI   POINT (113.73721 22.37291)   0
1      None     OCE   POINT (120.12959 -7.86443)   1
2      None     AFR      POINT (6.65334 4.34548)   2
3      None     EUR     POINT (4.56087 52.44929)   3
4      None     NAM  POINT (-118.14754 33.67455)   4


  return _prepare_from_string(" ".join(pjargs))


        FID special                                           geometry  id  \
index                                                                        
0      None    None  LINESTRING (-79.90491 -2.28268, -79.90574 -2.2...   0   
1      None    None  LINESTRING (-80.59454 -2.93901, -81.10867 -3.3...   1   
2      None    None  LINESTRING (-80.59454 -2.93901, -80.93897 -3.0...   2   
3      None    None  LINESTRING (-79.50790 8.96201, -75.76302 14.19...   3   
4      None    None  LINESTRING (-79.50790 8.96201, -73.96901 13.86...   4   

       end1  end2 capacity           km  
index                                    
0         8     9     None   114.960147  
1         9     6     None  3802.214988  
2         9     7     None  1642.520974  
3         7     3     None  9810.880141  
4         7     2     None  9734.354238  


# Waterways

In [11]:
transport_mode = 'waterways'
version = "current_version"
subfolder = os.path.join(input_folder, transport_mode.capitalize(), version)

nodes = loadAndFormatGeojson(transport_mode, "nodes", subfolder)
edges = loadAndFormatGeojson(transport_mode, "edges", subfolder)
edges = addKm(edges, projected_crs)
edges = assignEndpointsAndUpdateFullDf(edges, nodes)

print(nodes.head())
print(edges.head())

nodes.to_file(os.path.join(output_folder, transport_mode+"_nodes.geojson"), driver="GeoJSON")
edges.to_file(os.path.join(output_folder, transport_mode+"_edges.geojson"), driver="GeoJSON")

if (edges['end1'] == edges['end2']).any():
    print('ATT')

There are 20 nodes
{'init': 'epsg:4326'}
There are 19 edges
{'init': 'epsg:4326'}


  return _prepare_from_string(" ".join(pjargs))
  return _prepare_from_string(" ".join(pjargs))


Assigning end nodes to linestring
       id                      name  \
index                                 
0       0          Chong Kneas Port   
1       1      Kampong Chlnang Port   
2       2  Kampong Chlnang Junction   
3       3         Kratie River Port   
4       4           Kratie Junction   

                                          geometry  
index                                               
0      POINT (103.8220192917276 13.26983357773719)  
1      POINT (104.6812144473571 12.26824772755564)  
2        POINT (104.6920055505009 12.271072307162)  
3      POINT (106.0162086075059 12.48460089813051)  
4      POINT (106.0122032737445 12.48335342660554)  
       end1  end2 special  capacity  id  \
index                                     
0         2     0    None  200000.0   0   
1        10     6    None   20000.0   1   
2         6     5    None   20000.0   2   
3         6     4    None   20000.0   3   
4         4     3    None       NaN   4   

                   

  with fiona.drivers():


# Multimodality

In [13]:
transport_mode = 'multimodal'
subfolder = os.path.join(input_folder, transport_mode.capitalize())

edges = loadAndFormatGeojson(transport_mode, "edges", subfolder)
edges['km'] = 0.1 #no impact

print(edges.head())

edges.to_file(os.path.join(output_folder, transport_mode+"_edges.geojson"), driver="GeoJSON", index=False)

There are 1 edges
epsg:4326
           multimodes special capacity  \
index                                    
0      roads-maritime    None     None   

                                                geometry  id  end1  end2   km  
index                                                                          
0      LINESTRING (-79.90643 -2.27615, -79.90491 -2.2...   0  None  None  0.1  


# Railways

In [17]:
transport_mode = 'railways'
version = "current_version"
subfolder = os.path.join(folder, 'Data', "Structured", transport_mode.capitalize(), version)

nodes = loadAndFormatGeojson(transport_mode, "nodes", subfolder)
edges = loadAndFormatGeojson(transport_mode, "edges", subfolder)
edges = addKm(edges, projected_crs)
edges = assignEndpointsAndUpdateFullDf(edges, nodes)

print(nodes.head())
print(edges.head())

nodes.to_file(os.path.join(output_folder, transport_mode+"_nodes.geojson"), driver="GeoJSON")
edges.to_file(os.path.join(output_folder, transport_mode+"_edges.geojson"), driver="GeoJSON")

if (edges['end1'] == edges['end2']).any():
    print('ATT')

There are 10 nodes
{'init': 'epsg:4326'}
There are 10 edges
{'init': 'epsg:4326'}


  return _prepare_from_string(" ".join(pjargs))
  return _prepare_from_string(" ".join(pjargs))


Assigning end nodes to linestring
          name                                     geometry  id
index                                                          
0      station  POINT (104.9165417310334 11.57264782123487)   0
1         None  POINT (102.9310145520781 13.58146756607205)   1
2         None  POINT (104.8218771869167 11.56651961957141)   2
3         None  POINT (104.6969901718489 11.69402940521593)   3
4         None  POINT (102.5439458929634 13.66549038485579)   4
            From_                         To special  capacity  \
index                                                            
0      Phnom Penh              Svay Sisaphon  unused       NaN   
1      Phnom Penh             Preah Sihanouk    used  700000.0   
2           Snoul                Laos Border  unused       NaN   
3       Bat Doeng  Loch Ninh ,Viet Nam Borde  unused       NaN   
4       Bat Doeng  Loch Ninh ,Viet Nam Borde  unused       NaN   

                                                geometr

  with fiona.drivers():


# Roads

In [17]:
transport_mode = 'roads'
subfolder = os.path.join(input_folder, transport_mode.capitalize())

nodes = loadAndFormatGeojson(transport_mode, "nodes", subfolder)
nodes['special'] = nodes['name'] #should do it in QGIS but for obscure reasons it does not work
edges = loadAndFormatGeojson(transport_mode, "edges", subfolder)
edges['surface'] = 'paved'
edges['class'] = None
edges['name'] = None
edges = addKm(edges, projected_crs)
edges = assignEndpointsAndUpdateFullDf(edges, nodes)

print(nodes.head())
print(edges.head())

nodes.to_file(os.path.join(output_folder, transport_mode+"_nodes.geojson"), driver="GeoJSON", index=False)
edges.to_file(os.path.join(output_folder, transport_mode+"_edges.geojson"), driver="GeoJSON", index=False)

if (edges['end1'] == edges['end2']).any():
    print('ATT')

There are 1517 nodes
epsg:4326
There are 2166 edges
epsg:4326


  return _prepare_from_string(" ".join(pjargs))


Assigning end nodes to linestring
       name                    geometry  id special
index                                              
0      None  POINT (-79.96508 -4.39275)   0    None
1      None  POINT (-77.89086 -1.55776)   1    None
2      None   POINT (-77.80381 0.81533)   2    None
3      None  POINT (-79.84806 -4.45338)   3    None
4      None   POINT (-77.94515 0.40815)   4    None
      infra_type        length        Type special  id  \
index                                                    
0           None           NaN        None  border   0   
1      Carretera  10499.463640  legitimate    None   1   
2      Carretera  11450.214973  legitimate    None   2   
3      Carretera   9729.416225  legitimate    None   3   
4      Carretera  10492.111143  legitimate    None   4   

                                                geometry  end1  end2 capacity  \
index                                                                           
0      LINESTRING (-79.96462 -4.3

# Other stuff, may be useful

### Transition from shp to geojson

In [47]:
for edge_node in ["node", "edge"]:    
    transport_mode = 'roads'
    version = "v8"
    subfolder = os.path.join(folder, 'Data', "Structured", transport_mode.capitalize(), version)

    filename = "raw_"+transport_mode+"_"+edge_node+"s.shp"
    df = gpd.read_file(os.path.join(subfolder, filename))

    version = "current_version"
    subfolder = os.path.join(folder, 'Data', "Structured", transport_mode.capitalize(), version)
    df.to_file(os.path.join(subfolder, "raw_"+transport_mode+"_"+edge_node+"s.geojson"), driver="GeoJSON")

### Change to CRS 4326

In [76]:
for edge_node in ["node", "edge"]:    
    transport_mode = 'multimodal'
    version = "current_version"
    subfolder = os.path.join(folder, 'Data', "Structured", transport_mode.capitalize(), version)
    
    filename = "raw_"+transport_mode+"_"+edge_node+"s.shp"
    df = gpd.read_file(os.path.join(subfolder, "raw_"+transport_mode+"_"+edge_node+"s.geojson"))
    
    df = df.to_crs(4326)
    
    df.to_file(os.path.join(subfolder, "raw_"+transport_mode+"_"+edge_node+"s.geojson"), driver="GeoJSON")