In [1]:
import geopandas as gpd
import yaml
import pickle
import pandas as pd
import os

In [2]:
with open(r"../config.yml") as file:
    parsed_yaml_file = yaml.load(file, Loader=yaml.FullLoader)

    crs = parsed_yaml_file["CRS"]

    study_area = parsed_yaml_file["study_area"]

print("Settings loaded!")

# filepaths
osm_data_fp = f"../data/osm/{study_area}/processed/"
osm_results_fp = f"../results/osm/{study_area}/data/"

geodk_data_fp = f"../data/reference/{study_area}/processed/"
geodk_results_fp = f"../results/reference/{study_area}/data/"

compare_results_fp = f"../results/compare/{study_area}/data/"

Settings loaded!


In [3]:
# read data

osm_simplified_edges = gpd.read_parquet(osm_data_fp + "osm_edges_simplified.parquet")

geodk_simplified_edges = gpd.read_parquet(
    geodk_data_fp + "ref_edges_simplified.parquet"
)

osm_component_edges = gpd.read_parquet(
    osm_results_fp + "osm_edges_component_id.parquet"
)
geodk_component_edges = gpd.read_parquet(
    geodk_results_fp + "ref_edges_component_id.parquet"
)

osm_largest_cc = gpd.read_parquet(
    osm_results_fp + "largest_connected_component.parquet"
)
geodk_largest_cc = gpd.read_parquet(
    geodk_results_fp + "largest_connected_component.parquet"
)

buffer_dist = 15
hausdorff_dist = 17
angle = 30

osm_matched = gpd.read_parquet(
    compare_results_fp
    + f"osm_matched_segments_{buffer_dist}_{hausdorff_dist}_{angle}.parquet"
)
geodk_matched = gpd.read_parquet(
    compare_results_fp
    + f"ref_matched_segments_{buffer_dist}_{hausdorff_dist}_{angle}.parquet"
)

osm_unmatched = gpd.read_parquet(
    compare_results_fp
    + f"osm_unmatched_segments_{buffer_dist}_{hausdorff_dist}_{angle}.parquet"
)
geodk_unmatched = gpd.read_parquet(
    compare_results_fp
    + f"ref_unmatched_segments_{buffer_dist}_{hausdorff_dist}_{angle}.parquet"
)

with open(compare_results_fp + "grid_results_extrinsic.pickle", "rb") as fp:
    extrinsic_grid = pickle.load(fp)

with open(osm_results_fp + "grid_results_intrinsic.pickle", "rb") as fp:
    osm_intrinsic_grid = pickle.load(fp)


# join edge data

ref_cols = [
    "edge_id",
    "length",
    "infrastructure_length",
    "protected",
    "from",
    "to",
    "component",
    "geometry",
]

osm_cols = [
    "edge_id",
    "length",
    "infrastructure_length",
    "protected",
    "bicycle_infrastructure",
    "bicycle_bidirectional",
    "bicycle_geometries",
    "component",
    "geometry",
]


osm_joined_edges = osm_simplified_edges.merge(
    osm_component_edges[["edge_id", "component"]], on="edge_id"
)

assert len(osm_joined_edges) == len(osm_simplified_edges)

osm_joined_edges = osm_joined_edges[osm_cols]

osm_joined_edges["largest_cc"] = False

osm_joined_edges.loc[osm_joined_edges.component == 0, "largest_cc"] = True

assert len(osm_joined_edges[osm_joined_edges.largest_cc == True]) == len(osm_largest_cc)


geodk_joined_edges = geodk_simplified_edges.merge(
    geodk_component_edges[["edge_id", "component"]], on="edge_id"
)

assert len(geodk_joined_edges) == len(geodk_simplified_edges)

geodk_joined_edges = geodk_joined_edges[ref_cols]

geodk_joined_edges["largest_cc"] = False

geodk_joined_edges.loc[geodk_joined_edges.component == 25, "largest_cc"] = True

assert len(geodk_joined_edges[geodk_joined_edges.largest_cc == True]) == len(
    geodk_largest_cc
)


data = [
    osm_joined_edges,
    geodk_joined_edges,
    osm_matched,
    osm_unmatched,
    geodk_matched,
    geodk_unmatched,
    extrinsic_grid,
    osm_intrinsic_grid,
]
table_names = [
    "osm_edges",
    "geodk_edges",
    "osm_matched",
    "osm_unmatched",
    "geodk_matched",
    "geodk_unmatched",
    "extrinsic_grid",
    "osm_intrinsic_grid",
]

osm_intrinsic_grid["component_ids_osm"] = osm_intrinsic_grid.component_ids_osm.astype(
    str
)

print("Data ready!")

Data ready!


In [4]:
# concat edges
osm_joined_edges["source"] = "osm"
geodk_joined_edges["source"] = "geodk"
osm_infra = osm_joined_edges[
    ["protected", "bicycle_bidirectional", "geometry", "source"]
]
geodk_infra = geodk_joined_edges[["protected", "geometry", "source"]]

# infra = pd.concat([osm_json, geodk_json])

# assert len(infra) == len(geodk_json) + len(osm_json)

# concat matches
osm_matched["matched"] = True
osm_unmatched["matched"] = False
geodk_matched["matched"] = True
geodk_unmatched["matched"] = False

match_cols = ["matched", "geometry"]
# osm_matched = osm_matched[match_cols]
# osm_unmatched = osm_unmatched[match_cols]
# geodk_matched = geodk_matched[match_cols]
# geodk_unmatched = geodk_unmatched[match_cols]

osm_matched["source"] = "osm"
osm_unmatched["source"] = "osm"
geodk_matched["source"] = "geodk"
geodk_unmatched["source"] = "geodk"

osm_matched = osm_matched[match_cols]
osm_unmatched = osm_unmatched[match_cols]

geodk_matched = geodk_matched[match_cols]
geodk_unmatched = geodk_unmatched[match_cols]

# osm_matched_all = pd.concat(
#     [
#         osm_matched[match_cols],
#         osm_unmatched[match_cols],
#     ]
# )

# geodk_matched_all = pd.concat(
#     [
 
#         geodk_matched[match_cols],
#         geodk_unmatched[match_cols],
#     ]
# )

# assert len(osm_matched_all) == len(osm_matched) + len(osm_unmatched)

# assert len(geodk_matched_all) == len(geodk_matched) + len(
#     geodk_unmatched
# )

comp_cols = ["geometry", "component"]
osm_components = osm_component_edges[comp_cols]
geodk_components = geodk_component_edges[comp_cols]
#components = pd.concat([osm_joined_edges[comp_cols], geodk_joined_edges[comp_cols]])
#assert len(components) == len(osm_joined_edges) + len(geodk_joined_edges)

# Convert largest CC to geojson - does not need to be tiled? but might as well be
largest_cc = pd.concat(
    [
        osm_joined_edges.loc[osm_joined_edges.largest_cc == True][
            ["source", "geometry"]
        ],
        geodk_joined_edges.loc[geodk_joined_edges.largest_cc == True][
            ["source", "geometry"]
        ],
    ]
)
assert len(largest_cc) == len(osm_largest_cc) + len(geodk_largest_cc)


to_json_names = ["osm_infra", "geodk_infra", "osm_matched", "osm_unmatched","geodk_matched", "geodk_unmatched","osm_components", "geodk_components", "largest_cc"]
to_json_data = [osm_infra, geodk_infra, osm_matched, osm_unmatched, geodk_matched, geodk_unmatched,  osm_components, geodk_components, largest_cc]


print("Data ready for conversion!")

for name, dataset in zip(to_json_names, to_json_data):
    dataset.to_crs("EPSG:4326").to_file(f"../data/geojson/{name}.geojson")

print("Created geojsons!")

Data ready for conversion!
Created geojsons!


In [5]:

to_json_names_small = ["osm_infra", "geodk_infra", "osm_components", "geodk_components", "largest_cc"]
to_json_names_large = ["osm_matched", "osm_unmatched", "geodk_matched", "geodk_unmatched"]

In [10]:
# tippecanoe -z17  --no-tile-compression --drop-densest-as-needed --output-to-directory=data/geojson/tiles data/geojson/geodk_infra.geojson

for name in to_json_names_small:

    if os.path.isdir(f"../data/tiles_{name}") == False:

        os.mkdir(f"../data/tiles/tiles_{name}")

    dest = f"--output-to-directory=../data/tiles/tiles_{name}/"

    input_data = f"../data/geojson/{name}.geojson"

    command = "tippecanoe -z16  --no-tile-compression" + " " + dest + " " + input_data
    #command = "tippecanoe -z16  --no-tile-compression --drop-densest-as-needed" + " " + dest + " " + input_data
    print(command)

    os.system(command)

tippecanoe -z16  --no-tile-compression --output-to-directory=../data/tiles/tiles_osm_infra/ ../data/geojson/osm_infra.geojson


For layer 0, using name "osm_infra"
88997 features, 3996961 bytes of geometry, 80 bytes of string pool
tile 7/67/39 size is 550485 with detail 12, >500000    
  99.9%  16/34337/20283  


tippecanoe -z16  --no-tile-compression --output-to-directory=../data/tiles/tiles_geodk_infra/ ../data/geojson/geodk_infra.geojson


For layer 0, using name "geodk_infra"
50856 features, 3591521 bytes of geometry, 39 bytes of string pool
  99.9%  16/35040/20497  


In [7]:
# # Simplify matched geojson
# for name in to_json_names_large:

#     input_fp = f"../data/geojson/{name}.geojson"
#     output_fp = f"../data/geojson/{name}_small.geojson"

#     command = "mapshaper " + input_fp + " -simplify dp 1% keep-shapes -o format=geojson " + output_fp
#     print(command)

#     os.system(command)

mapshaper ../data/geojson/osm_matched.geojson -simplify dp 1% keep-shapes -o format=geojson ../data/geojson/osm_matched_small.geojson


[simplify] Repaired 0 intersections; 42 intersections could not be repaired
[o] Wrote ../data/geojson/osm_matched_small.geojson


mapshaper ../data/geojson/osm_unmatched.geojson -simplify dp 1% keep-shapes -o format=geojson ../data/geojson/osm_unmatched_small.geojson


[simplify] Repaired 0 intersections; 568 intersections could not be repaired
[o] Wrote ../data/geojson/osm_unmatched_small.geojson


mapshaper ../data/geojson/geodk_matched.geojson -simplify dp 1% keep-shapes -o format=geojson ../data/geojson/geodk_matched_small.geojson


[simplify] Repaired 0 intersections; 12 intersections could not be repaired
[o] Wrote ../data/geojson/geodk_matched_small.geojson


mapshaper ../data/geojson/osm_unmatched.geojson -simplify dp 1% keep-shapes -o format=geojson ../data/geojson/osm_unmatched_small.geojson


[simplify] Repaired 0 intersections; 568 intersections could not be repaired
[o] Wrote ../data/geojson/osm_unmatched_small.geojson


In [8]:
# Simplify matched geojson for upload

for name in to_json_names_large:

    if os.path.isdir(f"../data/tiles_{name}") == False:

        os.mkdir(f"../data/tiles/tiles_{name}")

    dest = f"--output-to-directory=../data/tiles/tiles_{name}/"

    input_data = f"../data/geojson/{name}.geojson"

    #command = "tippecanoe -z16  --no-tile-compression" + " " + dest + " " + input_data
    command = "tippecanoe -z16  --no-tile-compression --drop-densest-as-needed" + " " + dest + " " + input_data
    print(command)

    os.system(command)



tippecanoe -z16  --no-tile-compression --drop-densest-as-needed --output-to-directory=../data/tiles/tiles_osm_matched/ ../data/geojson/osm_matched.geojson


For layer 0, using name "osm_matched"
351476 features, 11136307 bytes of geometry, 15 bytes of string pool
tile 7/68/40 size is 584454 with detail 12, >500000    
Going to try keeping the sparsest 76.99% of the features to make it fit
tile 7/68/40 size is 567689 with detail 12, >500000    
Going to try keeping the sparsest 61.03% of the features to make it fit
tile 7/68/40 size is 541542 with detail 12, >500000    
Going to try keeping the sparsest 50.72% of the features to make it fit
tile 7/68/40 size is 517297 with detail 12, >500000    
Going to try keeping the sparsest 44.12% of the features to make it fit
tile 7/68/40 size is 500330 with detail 12, >500000    
Going to try keeping the sparsest 39.68% of the features to make it fit
tile 8/136/80 size is 1058766 with detail 12, >500000    
Going to try keeping the sparsest 42.50% of the features to make it fit
tile 8/136/80 size is 753801 with detail 12, >500000    
Going to try keeping the sparsest 25.37% of the features to make i

tippecanoe -z16  --no-tile-compression --drop-densest-as-needed --output-to-directory=../data/tiles/tiles_osm_unmatched/ ../data/geojson/osm_unmatched.geojson


For layer 0, using name "osm_unmatched"
1198125 features, 37972658 bytes of geometry, 16 bytes of string pool
tile 4/8/4 size is 547075 with detail 12, >500000    
Going to try keeping the sparsest 82.26% of the features to make it fit
tile 4/8/4 size is 543979 with detail 12, >500000    
Going to try keeping the sparsest 68.04% of the features to make it fit
tile 4/8/4 size is 539911 with detail 12, >500000    
Going to try keeping the sparsest 56.71% of the features to make it fit
tile 4/8/4 size is 535933 with detail 12, >500000    
Going to try keeping the sparsest 47.62% of the features to make it fit
tile 4/8/4 size is 531109 with detail 12, >500000    
Going to try keeping the sparsest 40.35% of the features to make it fit
tile 4/8/4 size is 524413 with detail 12, >500000    
Going to try keeping the sparsest 34.62% of the features to make it fit
tile 4/8/4 size is 520273 with detail 12, >500000    
Going to try keeping the sparsest 29.95% of the features to make it fit
tile 4/8

tippecanoe -z16  --no-tile-compression --drop-densest-as-needed --output-to-directory=../data/tiles/tiles_geodk_matched/ ../data/geojson/geodk_matched.geojson


For layer 0, using name "geodk_matched"
564661 features, 18444672 bytes of geometry, 15 bytes of string pool
tile 6/34/20 size is 508913 with detail 12, >500000    
Going to try keeping the sparsest 88.42% of the features to make it fit
tile 6/34/20 size is 503960 with detail 12, >500000    
Going to try keeping the sparsest 78.96% of the features to make it fit
tile 6/34/20 size is 502346 with detail 12, >500000    
Going to try keeping the sparsest 70.73% of the features to make it fit
tile 7/67/39 size is 711198 with detail 12, >500000    
Going to try keeping the sparsest 63.27% of the features to make it fit
tile 7/67/40 size is 742886 with detail 12, >500000    
Going to try keeping the sparsest 60.57% of the features to make it fit
tile 7/68/40 size is 905736 with detail 12, >500000    
Going to try keeping the sparsest 49.68% of the features to make it fit
tile 7/67/39 size is 653013 with detail 12, >500000    
Going to try keeping the sparsest 43.60% of the features to make it

tippecanoe -z16  --no-tile-compression --drop-densest-as-needed --output-to-directory=../data/tiles/tiles_geodk_unmatched/ ../data/geojson/geodk_unmatched.geojson


For layer 0, using name "geodk_unmatched"
315767 features, 10330705 bytes of geometry, 16 bytes of string pool
tile 7/67/40 size is 535372 with detail 12, >500000    
Going to try keeping the sparsest 84.05% of the features to make it fit
tile 7/67/39 size is 620813 with detail 12, >500000    
Going to try keeping the sparsest 72.49% of the features to make it fit
tile 7/67/40 size is 525408 with detail 12, >500000    
Going to try keeping the sparsest 71.99% of the features to make it fit
tile 7/67/39 size is 588626 with detail 12, >500000    
Going to try keeping the sparsest 55.41% of the features to make it fit
tile 7/67/40 size is 505907 with detail 12, >500000    
Going to try keeping the sparsest 64.03% of the features to make it fit
tile 7/67/39 size is 537022 with detail 12, >500000    
Going to try keeping the sparsest 46.43% of the features to make it fit
tile 7/67/39 size is 501702 with detail 12, >500000    
Going to try keeping the sparsest 41.65% of the features to make 