# Read data from NVDB and convert to RDF for comparision with Overture Maps

Libraries and parameters

In [1]:
import sys, datetime
from rdflib import Graph, Namespace
import nvdbapiv3
from nvdb2rdf import *
import geopandas as gpd
from shapely import Point, wkt
from shapely.geometry import shape, box
from shapely.wkb import loads
import pandas as pd
import pyproj
import overturemaps
from lonboard import Map, PolygonLayer, PathLayer, ScatterplotLayer
from lonboard.colormap import apply_continuous_cmap
from lonboard import viz

# Parameters
rootPath = "C:\\Data\\GitHub\\jetgeo\\OM2ANY"

#NVDB Paths
nvdbOtlFilePath = "C:\\Data\\GitHub\\jetgeo\\OM2ANY\\OWL\\nvdb\\nvdb-owl.ttl"
nvdbVoIRI = "https://ontologi.atlas.vegvesen.no/nvdb/core/nvdb-owl/vegobjekt#"
nvdbVnIRI = "https://ontologi.atlas.vegvesen.no/nvdb/core/vegnett#"
nvdbOtlIRI = "https://ontologi.atlas.vegvesen.no/nvdb/core/nvdb-owl#"

#INSPIRE Paths
itnrOtlFilePath = "C:\\Data\\GitHub\\jetgeo\\OM2ANY\\OWL\\inspire\\itnr-owl.ttl"
itnrOtlIRI = "http://inspire.ec.europa.eu/ont"
featuretypeid = 105
knr = 3403


Føyer C:\DATA\GitHub\vegvesen\NVDB-Datakatalogen\owl til søkestien


Read ontologies

In [None]:
# ---------------------------------------------------------------------------------------------
startTime = datetime.datetime.now()
# ---------------------------------------------------------------------------------------------
# Leser NVDB-ontologien til graf for objekttypebiblioteker
print(str(datetime.datetime.now()) + ' Leser inn NVDB-OTL fra ', nvdbOtlFilePath)
otl_nvdb = Graph()
otl_nvdb.parse(nvdbOtlFilePath, format="turtle")
# Leser INSPIRE-ontologien til egen graf
print(str(datetime.datetime.now()) + ' Leser inn INSPIRE-OTL fra ', itnrOtlFilePath)
otl_itnr = Graph()
otl_itnr.parse(itnrOtlFilePath, format="turtle")
#Slår sammen alle ontologier til en stor graf
print(str(datetime.datetime.now()) + ' Slår sammen ontologiene')
otl = Graph()
otl = otl_nvdb + otl_itnr 
# Setter opp graf og namespace-forkortelser for NVDB-data
print(str(datetime.datetime.now()) + ' Setter opp graf og namespace-forkortelser for data')
g_nvdb=Graph()
g_nvdb.bind("nvdb_vo", Namespace(nvdbVoIRI)) #IRI for NVDB-Objekter
g_nvdb.bind("nvdb_vn", Namespace(nvdbVnIRI)) #IRI for NVDB Vegnett
g_nvdb.bind("nvdb_otl",Namespace(nvdbOtlIRI))
g_nvdb.bind("gsp",'http://www.opengis.net/ont/geosparql#')
g_nvdb.bind("nvdb_otl",Namespace(nvdbOtlIRI))


# ---------------------------------------------------------------------------------------------

Read data from NVDB and convert to RDF

In [None]:
g_nvdb = nvdb2graph(featuretypeid, knr, otl_nvdb)

Print to Turtle

In [None]:
fileName = rootPath + "\\data\\nvdb_" + str(featuretypeid) + "_" + str(knr) + ".ttl."
print(str(datetime.datetime.now()) + ' Skriver til NVDB-Turtle-fil: ' + fileName)
g_nvdb.serialize(destination=fileName, format="turtle")

Read road network from NVDB and create GeoDataFrames with links and nodes

In [3]:
v = nvdbapiv3.nvdbVegnett()
v.filter( { 'kommune' : knr } )
print("Filter: " + str(v.filterdata))

#Convert to GeoDataFrame
sDf = pd.DataFrame(v.to_records())
sDf['geometry'] = sDf['geometri'].apply( wkt.loads )
sGDF = gpd.GeoDataFrame( sDf, geometry='geometry', crs=5973 )
sGDF.to_file(rootPath + "\\data\\nvdb\\nvdb_Segments.geojson", driver="GeoJSON")


# Convert start and end coordinates to Shapely Point geometries
startpoints = [Point(coords[0]) for coords in sGDF['geometry'].apply(lambda line: line.coords)]
endpoints = [Point(coords[-1]) for coords in sGDF['geometry'].apply(lambda line: line.coords)]

nGDF = gpd.GeoDataFrame({
    'geometry': startpoints + endpoints,
    'nodeid': sGDF['startnode'].tolist() + sGDF['sluttnode'].tolist(),
}, crs=sGDF.crs)

nGDF.drop_duplicates(subset='nodeid', inplace=True)
nGDF.to_file(rootPath + "\\data\\nvdb\\nvdb_Nodes.geojson", driver="GeoJSON")

Filter: {'kommune': 3403}


Convert to NVDB data to WGS 84 (EPSG:4326)

In [4]:
sGDF4326 = sGDF.to_crs(epsg=4326)
sGDF4326.crs="EPSG:4326"
nGDF4326 = nGDF.to_crs(epsg=4326)
nGDF4326.crs="EPSG:4326"
#nGDF4326.to_file(rootPath + "\\data\\nvdb\\nvdb_Nodes4326.geojson", driver="GeoJSON")

Find extent of the NVDB Data and create a bounding box in EPSG:4326


In [None]:
# Get the overall bounding box (extent) for all geometries
minx, miny, maxx, maxy = sGDF.total_bounds

# Create a Shapely bounding box
bounding_box = box(minx, miny, maxx, maxy)

# Create a PyProj transformer for EPSG:5973 to EPSG:4326
transformer = pyproj.Transformer.from_crs("EPSG:5973", "EPSG:4326", always_xy=True)

# Transform the bounding box coordinates to EPSG:4326
minlon, minlat = transformer.transform(minx, miny)
maxlon, maxlat = transformer.transform(maxx, maxy)

# Create a  bounding box in EPSG:4326
bbox = minlon, minlat, maxlon, maxlat
print(bbox)


Read Overture Segments and Nodes within the BBox and store in GeoDataFrame

In [None]:
def rbr2GDF(rbr):
    #Convert Record Batch Reader to GeoDataFrame
    # Extract the binary geometry column
    binary_geometry = rbr['geometry']
    # Convert binary geometry to Shapely geometries
    geometries = [loads(geom.as_py()) for geom in binary_geometry]
    # Create a Pandas DataFrame with the geometries 
    df = rbr.to_pandas()
    df['geometry'] = geometries
    # df['id'] = df['id']  
    # Create a GeoDataFrame with the geometries 
    gdf = gpd.GeoDataFrame(df, geometry='geometry',crs=4326)
    return gdf

ft= "segment"
segTable = overturemaps.record_batch_reader(ft, bbox).read_all()
# Temporarily required as of Lonboard 0.8 to avoid a Lonboard bug
segTable = segTable.combine_chunks()
segGDF = rbr2GDF(segTable)
# print(segGDF.head(10))


ft= "connector"
conTable = overturemaps.record_batch_reader(ft, bbox).read_all()
conGDF = rbr2GDF(conTable)
# print(conGDF.head(10))


Export to GeoJSON, for use in QGIS

In [None]:
gdf_filtered = segGDF[['id', 'geometry']]
gdf_filtered.to_file(rootPath + "\\data\\om\\om_segments.geojson", driver="GeoJSON")
gdf_filtered = conGDF[['id', 'geometry']]
gdf_filtered.to_file(rootPath + "\\data\\om\\om_connectors.geojson", driver="GeoJSON")

Show in lonboard

In [None]:
# segLayer = PathLayer(
#     table=segTable, get_color=[0, 0, 139], width_min_pixels=1
# )
# conLayer = ScatterplotLayer(
#     table=conTable,get_fill_color=[255, 0, 0]
# )

nodeLayer = ScatterplotLayer.from_geopandas(nGDF4326,get_fill_color=[155, 75, 0]
)
sLayer = PathLayer.from_geopandas(sGDF4326, get_color=[0, 0, 139], width_min_pixels=1)

view_state = {
    "longitude": 11.08,
    "latitude": 60.795,
    "zoom": 13,
    "pitch": 0,
    "bearing": 0,
}

m = Map([sLayer,nodeLayer], view_state=view_state,_height=1000)
# m = Map([segLayer,conLayer,nodeLayer], view_state=view_state,_height=1000)
m

#Note: lonboard fucks up the presentation 

Compare Nodes

In [None]:
import geopandas as gpd

# Assuming you have GeoDataFrames 'gdf1' and 'gdf2' with point geometries
# and a specified maximum deviation (max_deviation)

# Spatial join to find points within max_deviation distance
joined = gpd.sjoin(gdf1, gdf2, op="within", distance_col="distance")

# Filter out points within max_deviation
missing_points = joined[joined["distance"] > max_deviation]

# Extract the missing points
missing_points_geom = missing_points.geometry

# If you want the missing points as a list of tuples (x, y):
missing_points_list = [(point.x, point.y) for point in missing_points_geom]

print(f"Missing points: {missing_points_list}")

# The approach I provided using gpd.sjoin identifies points that are within the specified distance in both GeoDataFrames. 
# To find points missing in either gdf1 or gdf2, you can follow these steps:
# Find points in gdf1 that are not within max_deviation distance of any point in gdf2:
missing_in_gdf1 = gdf1[~gdf1.intersects(gdf2.buffer(max_deviation))]
# Find points in gdf2 that are not within max_deviation distance of any point in gdf1:
missing_in_gdf2 = gdf2[~gdf2.intersects(gdf1.buffer(max_deviation))]
