In [1]:
import geopandas as gpd
import yaml
import pickle
import pandas as pd
import os

In [2]:
with open(r"../config.yml") as file:
    parsed_yaml_file = yaml.load(file, Loader=yaml.FullLoader)

    crs = parsed_yaml_file["CRS"]

    study_area = parsed_yaml_file["study_area"]

print("Settings loaded!")

# filepaths
osm_data_fp = f"../data/osm/{study_area}/processed/"
osm_results_fp = f"../results/osm/{study_area}/data/"

geodk_data_fp = f"../data/reference/{study_area}/processed/"
geodk_results_fp = f"../results/reference/{study_area}/data/"

compare_results_fp = f"../results/compare/{study_area}/data/"

Settings loaded!


In [3]:
# read data

osm_simplified_edges = gpd.read_parquet(osm_data_fp + "osm_edges_simplified.parquet")

geodk_simplified_edges = gpd.read_parquet(
    geodk_data_fp + "ref_edges_simplified.parquet"
)

osm_component_edges = gpd.read_parquet(
    osm_results_fp + "osm_edges_component_id.parquet"
)
geodk_component_edges = gpd.read_parquet(
    geodk_results_fp + "ref_edges_component_id.parquet"
)

osm_largest_cc = gpd.read_parquet(
    osm_results_fp + "largest_connected_component.parquet"
)
geodk_largest_cc = gpd.read_parquet(
    geodk_results_fp + "largest_connected_component.parquet"
)

buffer_dist = 15
hausdorff_dist = 17
angle = 30

osm_matched = gpd.read_parquet(
    compare_results_fp
    + f"osm_matched_segments_{buffer_dist}_{hausdorff_dist}_{angle}.parquet"
)
geodk_matched = gpd.read_parquet(
    compare_results_fp
    + f"ref_matched_segments_{buffer_dist}_{hausdorff_dist}_{angle}.parquet"
)

osm_unmatched = gpd.read_parquet(
    compare_results_fp
    + f"osm_unmatched_segments_{buffer_dist}_{hausdorff_dist}_{angle}.parquet"
)
geodk_unmatched = gpd.read_parquet(
    compare_results_fp
    + f"ref_unmatched_segments_{buffer_dist}_{hausdorff_dist}_{angle}.parquet"
)

with open(compare_results_fp + "grid_results_extrinsic.pickle", "rb") as fp:
    extrinsic_grid = pickle.load(fp)

with open(osm_results_fp + "grid_results_intrinsic.pickle", "rb") as fp:
    osm_intrinsic_grid = pickle.load(fp)


# join edge data

ref_cols = [
    "edge_id",
    "length",
    "infrastructure_length",
    "protected",
    "from",
    "to",
    "component",
    "geometry",
]

osm_cols = [
    "edge_id",
    "length",
    "infrastructure_length",
    "protected",
    "bicycle_infrastructure",
    "bicycle_bidirectional",
    "bicycle_geometries",
    "component",
    "geometry",
]


osm_joined_edges = osm_simplified_edges.merge(
    osm_component_edges[["edge_id", "component"]], on="edge_id"
)

assert len(osm_joined_edges) == len(osm_simplified_edges)

osm_joined_edges = osm_joined_edges[osm_cols]

osm_joined_edges["largest_cc"] = False

osm_joined_edges.loc[osm_joined_edges.component == 0, "largest_cc"] = True

assert len(osm_joined_edges[osm_joined_edges.largest_cc == True]) == len(osm_largest_cc)


geodk_joined_edges = geodk_simplified_edges.merge(
    geodk_component_edges[["edge_id", "component"]], on="edge_id"
)

assert len(geodk_joined_edges) == len(geodk_simplified_edges)

geodk_joined_edges = geodk_joined_edges[ref_cols]

geodk_joined_edges["largest_cc"] = False

geodk_joined_edges.loc[geodk_joined_edges.component == 25, "largest_cc"] = True

assert len(geodk_joined_edges[geodk_joined_edges.largest_cc == True]) == len(
    geodk_largest_cc
)


data = [
    osm_joined_edges,
    geodk_joined_edges,
    osm_matched,
    osm_unmatched,
    geodk_matched,
    geodk_unmatched,
    extrinsic_grid,
    osm_intrinsic_grid,
]
table_names = [
    "osm_edges",
    "geodk_edges",
    "osm_matched",
    "osm_unmatched",
    "geodk_matched",
    "geodk_unmatched",
    "extrinsic_grid",
    "osm_intrinsic_grid",
]

osm_intrinsic_grid["component_ids_osm"] = osm_intrinsic_grid.component_ids_osm.astype(
    str
)

print("Data ready!")

Data ready!


In [4]:
# concat edges
osm_joined_edges["source"] = "osm"
geodk_joined_edges["source"] = "geodk"
osm_json = osm_joined_edges[
    ["protected", "bicycle_bidirectional", "geometry", "source"]
]
geodk_json = geodk_joined_edges[["protected", "geometry", "source"]]

infra = pd.concat([osm_json, geodk_json])

assert len(infra) == len(geodk_json) + len(osm_json)

# concat matches
osm_matched["matched"] = True
osm_unmatched["matched"] = False
geodk_matched["matched"] = True
geodk_unmatched["matched"] = False
osm_matched["source"] = "osm"
osm_unmatched["source"] = "osm"
geodk_matched["source"] = "geodk"
geodk_unmatched["source"] = "geodk"

match_cols = ["source", "protected", "matched", "geometry"]
matched = pd.concat(
    [
        osm_matched[match_cols],
        osm_unmatched[match_cols],
        geodk_matched[match_cols],
        geodk_unmatched[match_cols],
    ]
)

assert len(matched) == len(osm_matched) + len(osm_unmatched) + len(geodk_matched) + len(
    geodk_unmatched
)

comp_cols = ["source", "geometry", "component"]
components = pd.concat([osm_joined_edges[comp_cols], geodk_joined_edges[comp_cols]])
assert len(components) == len(osm_joined_edges) + len(geodk_joined_edges)

# Convert largest CC to geojson - does not need to be tiled? but might as well be
largest_cc = pd.concat(
    [
        osm_joined_edges.loc[osm_joined_edges.largest_cc == True][
            ["source", "geometry"]
        ],
        geodk_joined_edges.loc[geodk_joined_edges.largest_cc == True][
            ["source", "geometry"]
        ],
    ]
)
assert len(largest_cc) == len(osm_largest_cc) + len(geodk_largest_cc)

infra_dens = extrinsic_grid[
    ["grid_id", "geometry", "osm_edge_density", "ref_edge_density", "edge_density_diff"]
]

to_json_names = ["infra", "matched", "components", "largest_cc", "infra_dens"]
to_json_data = [infra, matched, components, largest_cc, infra_dens]

print("Data ready for tiling!")

for name, dataset in zip(to_json_names, to_json_data):
    dataset.to_crs("EPSG:4326").to_file(f"../data/geojson/{name}.geojson")

print("Created geojsons!")

Data ready for tiling!


In [15]:
# tippecanoe -z17  --no-tile-compression --drop-densest-as-needed --output-to-directory=data/geojson/tiles data/geojson/geodk_infra.geojson

for name, dataset in zip(to_json_names, to_json_data):

    if os.path.isdir(f"../data/tiles_{name}") == False:

        os.mkdir(f"../data/tiles_{name}")

    dest = f"--output-to-directory=../data/tiles_{name}/"

    input_data = f"../data/geojson/{name}.geojson"

    command = "tippecanoe -z17  --no-tile-compression --drop-densest-as-needed" + " " + dest + " " + input_data
    print(command)

    os.system(command)

tippecanoe -z17  --no-tile-compression --drop-densest-as-needed --output-to-directory=../data/tiles_infra/ ../data/geojson/infra.geojson


For layer 0, using name "infra"
139853 features, 9581488 bytes of geometry, 86 bytes of string pool
tile 3/4/2 size is 582309 with detail 12, >500000    
Going to try keeping the sparsest 77.28% of the features to make it fit
tile 3/4/2 size is 569535 with detail 12, >500000    
Going to try keeping the sparsest 61.06% of the features to make it fit
tile 3/4/2 size is 543903 with detail 12, >500000    
Going to try keeping the sparsest 50.52% of the features to make it fit
tile 3/4/2 size is 519241 with detail 12, >500000    
Going to try keeping the sparsest 43.78% of the features to make it fit
tile 4/8/5 size is 602862 with detail 12, >500000    
Going to try keeping the sparsest 74.64% of the features to make it fit
tile 4/8/4 size is 669621 with detail 12, >500000    
Going to try keeping the sparsest 67.20% of the features to make it fit
tile 4/8/5 size is 582239 with detail 12, >500000    
Going to try keeping the sparsest 57.69% of the features to make it fit
tile 4/8/4 size is

tippecanoe -z17  --no-tile-compression --drop-densest-as-needed --output-to-directory=../data/tiles_matched/ ../data/geojson/matched.geojson


For layer 0, using name "matched"
2430029 features, 90863801 bytes of geometry, 73 bytes of string pool
tile 3/4/2 size is 948043 with detail 12, >500000    
Going to try keeping the sparsest 47.47% of the features to make it fit
tile 3/4/2 size is 839913 with detail 12, >500000    
Going to try keeping the sparsest 25.43% of the features to make it fit
tile 3/4/2 size is 774111 with detail 12, >500000    
Going to try keeping the sparsest 14.78% of the features to make it fit
tile 3/4/2 size is 733433 with detail 12, >500000    
Going to try keeping the sparsest 9.07% of the features to make it fit
tile 3/4/2 size is 694449 with detail 12, >500000    
Going to try keeping the sparsest 5.88% of the features to make it fit
tile 3/4/2 size is 649635 with detail 12, >500000    
Going to try keeping the sparsest 4.07% of the features to make it fit
tile 3/4/2 size is 595075 with detail 12, >500000    
Going to try keeping the sparsest 3.08% of the features to make it fit
tile 3/4/2 size is

tippecanoe -z17  --no-tile-compression --drop-densest-as-needed --output-to-directory=../data/tiles_components/ ../data/geojson/components.geojson


139853 features, 9524133 bytes of geometry, 546515 bytes of string pool
tile 3/4/2 size is 589654 with detail 12, >500000    
Going to try keeping the sparsest 76.32% of the features to make it fit
tile 3/4/2 size is 576023 with detail 12, >500000    
Going to try keeping the sparsest 59.62% of the features to make it fit
tile 3/4/2 size is 548983 with detail 12, >500000    
Going to try keeping the sparsest 48.87% of the features to make it fit
tile 3/4/2 size is 522635 with detail 12, >500000    
Going to try keeping the sparsest 42.08% of the features to make it fit
tile 3/4/2 size is 500693 with detail 12, >500000    
Going to try keeping the sparsest 37.82% of the features to make it fit
tile 4/8/5 size is 602456 with detail 12, >500000    
Going to try keeping the sparsest 74.69% of the features to make it fit
tile 4/8/4 size is 671978 with detail 12, >500000    
Going to try keeping the sparsest 66.97% of the features to make it fit
tile 4/8/5 size is 582551 with detail 12, >500

tippecanoe -z17  --no-tile-compression --drop-densest-as-needed --output-to-directory=../data/tiles_largest_cc/ ../data/geojson/largest_cc.geojson


26241 features, 1577916 bytes of geometry, 20 bytes of string pool
  99.9%  17/70008/40985  


tippecanoe -z17  --no-tile-compression --drop-densest-as-needed --output-to-directory=../data/tiles_infra_dens/ ../data/geojson/infra_dens.geojson


For layer 0, using name "infra_dens"
78175 features, 5931172 bytes of geometry, 2049522 bytes of string pool
tile 3/4/2 size is 1320567 with detail 12, >500000    
Going to try keeping the sparsest 34.08% of the features to make it fit
tile 3/4/2 size is 828683 with detail 12, >500000    
Going to try keeping the sparsest 18.50% of the features to make it fit
tile 3/4/2 size is 542244 with detail 12, >500000    
Going to try keeping the sparsest 15.36% of the features to make it fit
tile 4/8/5 size is 2410141 with detail 12, >500000    
Going to try keeping the sparsest 18.67% of the features to make it fit
tile 4/8/4 size is 2824063 with detail 12, >500000    
Going to try keeping the sparsest 15.93% of the features to make it fit
tile 5/17/10 size is 950773 with detail 12, >500000    
Going to try keeping the sparsest 47.33% of the features to make it fit
tile 5/16/10 size is 1599328 with detail 12, >500000    
Going to try keeping the sparsest 28.14% of the features to make it fit
t