# Analysis of intrinsic results: OSM Tags

In [1]:
import geopandas as gpd
import matplotlib.pyplot as plt
import pickle
import json
import pandas as pd
import seaborn as sns
import plotly.express as px
import pandas as pd

from src import evaluation_functions as eval_func

%run ../settings/yaml_variables.py
%run ../settings/df_styler.py
%run ../settings/plotting.py
%run ../settings/load_osmdata.py
%run ../settings/paths.py


  cmap = cm.get_cmap(cmap_name, n)


OSM data loaded successfully!


In [2]:
# Read intrinsic grid results
with open(
    f"../results/OSM/{study_area}/data/grid_results_intrinsic.pickle", "rb"
) as fp:
    osm_intrinsic_grid = pickle.load(fp)

# Import intrinsic results
osm_intrinsic_file = open(
    f"../results/OSM/{study_area}/data/intrinsic_analysis.json"
)

osm_intrinsic_results = json.load(osm_intrinsic_file)

# Import summary dataframe
summarize_results_df = pd.read_csv(f"../results/OSM/{study_area}/data/intrinsic_summary_results.csv",index_col=0)

summarize_results_df.style.pipe(format_osm_style)

Unnamed: 0,Unnamed: 1
Total infrastructure length (km),20619
Protected bicycle infrastructure density (m/km2),383
Unprotected bicycle infrastructure density (m/km2),61
Mixed protection bicycle infrastructure density (m/km2),1
Bicycle infrastructure density (m/km2),445
Nodes,90419
Dangling nodes,46344
Nodes per km2,2
Dangling nodes per km2,1
Incompatible tag combinations,25


## Spatial patterns in missing tags

In [None]:
rename_dict = {
    "existing_tags_surface_length_pct_missing":"surface_length_pct_missing",
    "existing_tags_width_length_pct_missing":"width_length_pct_missing",
    "existing_tags_speedlimit_length_pct_missing":"speedlimit_length_pct_missing",
    "existing_tags_lit_length_pct_missing":"lit_length_pct_missing",
    "existing_tags_surface_count_pct_missing":"surface_count_pct_missing",
    "existing_tags_width_count_pct_missing":"width_count_pct_missing",
    "existing_tags_speedlimit_count_pct_missing":"speedlimit_count_pct_missing",
    "existing_tags_lit_count_pct_missing":"lit_count_pct_missing"
}


osm_intrinsic_grid.rename(rename_dict,inplace=True,axis=1)

length_missing = [
    "surface_length_pct_missing",
    "width_length_pct_missing",
    "speedlimit_length_pct_missing",
    "lit_length_pct_missing"
]

count_missing = [
    "surface_count_pct_missing",
    "width_count_pct_missing",
    "speedlimit_count_pct_missing",
    "lit_count_pct_missing"
]

In [None]:
sns.pairplot(osm_intrinsic_grid[osm_intrinsic_grid[length_missing]<100][length_missing].dropna());

In [None]:
sns.pairplot(osm_intrinsic_grid[length_missing].dropna());

In [None]:
sns.pairplot(osm_intrinsic_grid[osm_intrinsic_grid[count_missing]<100][count_missing].dropna());

In [None]:
sns.pairplot(osm_intrinsic_grid[count_missing].dropna());

In [None]:

fig = px.scatter(
    osm_intrinsic_grid, 
    x='surface_count_pct_missing', 
    y='speedlimit_count_pct_missing',
    color="osm_edge_density",
    title='Correlation between missing tags', 
    color_continuous_scale='viridis_r',
    labels={
        "osm_edge_density": "edge density (m/sqkm)",
        "speedlimit_count_pct_missing": "Pct edges without speedlimit tag",
        "surface_count_pct_missing": "Pct edges without surface tag",
        })

fig.update_layout(
    font=dict(
        size=12,
        color="RebeccaPurple"
    )
)
fig.show()

In [None]:
fig = px.scatter(
    osm_intrinsic_grid, 
    x='surface_count_pct_missing', 
    y='lit_count_pct_missing',
    color="osm_edge_density",
    title='Correlation between missing tags', 
    color_continuous_scale='viridis_r',
    labels={
        "osm_edge_density": "edge density (m/sqkm)",
        "speedlimit_count_pct_missing": "Pct edges without speedlimit tag",
        "surface_count_pct_missing": "Pct edges without surface tag",
        })

fig.update_layout(
    font=dict(
        size=12,
        color="RebeccaPurple"
    )
)
fig.show()

In [None]:
from pysal.explore import esda
from pysal.lib import weights
from splot.esda import lisa_cluster

In [None]:

cents = osm_intrinsic_grid.centroid

# Extract coordinates into an array
pts = pd.DataFrame(
    {"X": cents.x, "Y": cents.y}
).values

# w = weights.distance.DistanceBand.from_array(
#     pts, 1000, binary=False
# )

w = weights.distance.KNN.from_array(
    pts, k=6
)

# row standardize
w.transform = "R"

# sns.histplot(w.cardinalities, bins=10, kde=True);

In [None]:
length_missing = [
    "surface_length_pct_missing",
    "width_length_pct_missing",
    "speedlimit_length_pct_missing",
    "lit_length_pct_missing"
]

count_missing = [
    "surface_count_pct_missing",
    "width_count_pct_missing",
    "speedlimit_count_pct_missing",
    "lit_count_pct_missing"
]

In [None]:
tag_types = ['surface','width','speedlimit','lit']

for t, c in zip(tag_types,count_missing):

    # compute spatial lag
    osm_intrinsic_grid[f"{t}_lag"] = weights.spatial_lag.lag_spatial(
        w, osm_intrinsic_grid[c]
    )

    f, ax = plt.subplots(1, figsize=(6, 6))
    sns.regplot(
        x=c,
        y=f"{t}_lag",
        ci=None,
        data=osm_intrinsic_grid,
        line_kws={"color": "r"},
    )
    ax.axvline(0, c="k", alpha=0.5)
    ax.axhline(0, c="k", alpha=0.5)
    ax.set_title(f"Moran Plot - {t}")
    plt.show()

    moran = esda.moran.Moran(osm_intrinsic_grid[c], w)
    print(f"With significance {moran.p_sim}, the Moran's I value for {t} errors is {moran.I}" )


In [None]:
# based on https://geographicdata.science/book/notebooks/07_local_autocorrelation.html

significance_labels = {}

for t, c in zip(tag_types,count_missing):

    lisa = esda.moran.Moran_Local(osm_intrinsic_grid[c], w)

    sig = 1 * (lisa.p_sim < 0.05)

    spots = lisa.q * sig

    # Mapping from value to name (as a dict)
    spots_labels = {
        0: "Non-Significant",
        1: "HH",
        2: "LH",
        3: "LL",
        4: "HL",
    }
    osm_intrinsic_grid[f"{t}_q"] = pd.Series(
        spots,
        index=osm_intrinsic_grid.index
        ).map(spots_labels)

    f, axs = plt.subplots(nrows=2, ncols=2, figsize=(20,20))
    axs = axs.flatten()

    ax = axs[0]

    osm_intrinsic_grid.assign(
        Is=lisa.Is
    ).plot(
        column="Is",
        cmap="plasma",
        scheme="quantiles",
        k=2,
        edgecolor="white",
        linewidth=0.1,
        alpha=0.75,
        legend=True,
        ax=ax,
    )

    ax = axs[1]

    lisa_cluster(lisa, osm_intrinsic_grid, p=1, ax=ax)

    ax = axs[2]
    labels = pd.Series(
        1 * (lisa.p_sim < 0.05), 
        index=osm_intrinsic_grid.index  
    ).map({1: "Significant", 0: "Non-Significant"})
    osm_intrinsic_grid.assign(
        cl=labels
    ).plot(
        column="cl",
        categorical=True,
        k=2,
        cmap="Paired",
        linewidth=0.1,
        edgecolor="white",
        legend=True,
        ax=ax,
    )

    significance_labels[t] = labels

    ax = axs[3]
    lisa_cluster(lisa, osm_intrinsic_grid, p=0.05, ax=ax)

    for i, ax in enumerate(axs.flatten()):
        ax.set_axis_off()
        ax.set_title(
            [
                "Local Statistics",
                "Scatterplot Quadrant",
                "Statistical Significance",
                "Moran Cluster Map",
            ][i],
            y=0,
        )

    f.suptitle(f'Local Spatial Autocorrelation for OSM tag: {t}', fontsize=16)

    f.tight_layout()

    plt.show()

In [None]:
for t in tag_types:
    hotspot = len(osm_intrinsic_grid[osm_intrinsic_grid[f"{t}_q"]=="HH"])
    coldspot = len(osm_intrinsic_grid[osm_intrinsic_grid[f"{t}_q"]=="LL"])

    print(f"For the tag '{t}', {hotspot} out of {len(osm_intrinsic_grid)} grid cells are part of a hotspot.")
    print(f"For the tag '{t}', {coldspot} out of {len(osm_intrinsic_grid)} grid cells are part of a coldspot.")
    print("\n")

In [None]:
# Export
q_cols = [t+'_q' for t in tag_types]
osm_intrinsic_grid[q_cols].to_csv("../results/osm_quality/tags_spatial_autocorrelation.csv",index=True)

## Correlation between errors and population density

In [None]:

# TODO: Correlation between pop density and missing tags