# Summary of extrinsic analysis

Only run this notebook once you have used a reference data set and successfully run the notebook for the [extrinsic analysis](./extrinsinc_analysis.ipynb).

In [21]:
import geopandas as gpd
import osmnx as ox
import networkx as nx
import yaml
import matplotlib.pyplot as plt
import contextily as cx
import json
import pickle
import pandas as pd 
import os.path
from src import evaluation_functions as ef
from src import matching_functions as mf

In [3]:
with open(r'../config.yml') as file:

    parsed_yaml_file = yaml.load(file, Loader=yaml.FullLoader)

    study_area = parsed_yaml_file['study_area']
    study_area_poly_fp = parsed_yaml_file['study_area_poly']
    study_crs = parsed_yaml_file['study_crs']

    missing_tag_dict = parsed_yaml_file['missing_tag_analysis']

    incompatible_tags_dict = parsed_yaml_file['incompatible_tags_analysis']

    cycling_infrastructure_queries = parsed_yaml_file['cycling_infrastructure_queries']

    reference_geometries = parsed_yaml_file['reference_geometries']
    cycling_bidirectional = parsed_yaml_file['bidirectional']

    feature_matching = parsed_yaml_file['feature_matching']
    
print('Settings loaded!')

Settings loaded!


**Load data:**

In [8]:
osm_graph = ox.load_graphml(f'../data/osm_{study_area}.graphml')
osm_simplified_graph = ox.load_graphml(f'../data/osm_{study_area}_simple.graphml')

# Convert to nodes and edges
osm_nodes, osm_edges = ox.graph_to_gdfs(osm_graph)
osm_simplified_nodes, osm_simplified_edges = ox.graph_to_gdfs(osm_simplified_graph)

# Fix converted boolean values
osm_simplified_edges.loc[osm_simplified_edges.cycling_bidirectional=='True','cycling_bidirectional'] = True
osm_simplified_edges.loc[osm_simplified_edges.cycling_bidirectional=='False','cycling_bidirectional'] = False

# Load simplified and non-simplified graphs
ref_graph = ox.load_graphml(f'../data/ref_{study_area}.graphml')
ref_simplified_graph = ox.load_graphml(f'../data/ref_{study_area}_simple.graphml')

# Convert to nodes and ref_edges
ref_nodes, ref_edges = ox.graph_to_gdfs(ref_graph)
ref_simplified_nodes, ref_simplified_edges = ox.graph_to_gdfs(ref_simplified_graph)


print('Data loaded!')

Data processing completed!


**Load results:**

In [29]:
with open(f'../results/extrinsic_analysis_{study_area}.json') as input_file:
    all_results = json.load(input_file)

with open(f'../results/grid_results_extrinsic_{study_area}.pickle', 'rb') as fp:
    grid = pickle.load(fp)

with open(f'../results/feature_matches_{study_area}.json') as input_file:
    fm_results = json.load(input_file)

with open(f'../results/grid_results_feature_matching_{study_area}.pickle', 'rb') as fp:
    grid_fm = pickle.load(fp)

In [30]:
all_results['osm_results']

{'node_count': 4690,
 'edge_count': 5437,
 'edge_density': 5847.16967610508,
 'node_density': 5847.16967610508,
 'danling_node_density': 9.79133707747185,
 'osm_protected_density': 5282.104879480832,
 'osm_unprotected_density': 514.2255842143798,
 'osm_mixed_density': 50.67128013330106,
 'simplified_edge_pct_diff': 82.43409149651073,
 'simplified_node_pct_diff': 84.47276940903824,
 'edges_pr_km': 5.1264022047726545,
 'nodes_pr_km': 4.422075839688017,
 'alpha': 0.07978666666666667,
 'beta': 1.159275053304904,
 'gamma': 0.38658987485779295,
 'component_count': 336,
 'largest_cc_pct_size': 92.78832766278828,
 'largest_cc_pct_length': 743769.047,
 'count_adjacent_issues': 96,
 'dangling_node_count': 1776,
 'snapping_issues': [[3105225284, 3105225286]],
 'snapping_issues_count': 1}

In [31]:
osm_df = pd.DataFrame.from_dict(all_results['osm_results'], orient='index')
ref_df = pd.DataFrame.from_dict(all_results['ref_results'], orient='index')

In [33]:
osm_df.rename(columns={0:'osm'},inplace=True)
ref_df.rename(columns={0:'ref'}, inplace=True)

In [38]:
combined_results = pd.concat([osm_df, ref_df], axis=1)

In [39]:
combined_results

Unnamed: 0,osm,ref
node_count,4690,3655
edge_count,5437,4208
edge_density,5847.169676,3437.551356
node_density,5847.169676,3437.551356
danling_node_density,9.791337,4.807458
osm_protected_density,5282.104879,
osm_unprotected_density,514.225584,
osm_mixed_density,50.67128,
simplified_edge_pct_diff,82.434091,60.181681
simplified_node_pct_diff,84.472769,63.504743


In [10]:
# Convert both to dataframes

# Merge

# Round values

# Experiment with printing

dict_keys(['ref_infra_length', 'osm_infra_length', 'length_difference_osm_ref', 'pct_length_difference', 'osm_results', 'ref_results'])

In [8]:
grid

Unnamed: 0,grid_id,geometry,count_osm_edges,count_osm_nodes,count_osm_simplified_edges,count_osm_simplified_nodes,count_ref_edges,count_ref_nodes,count_ref_simplified_edges,count_ref_simplified_nodes,...,ref_node_edge_ratio,component_ids_osm,component_ids_ref,cells_reached_osm,cells_reached_ref,cell_reach_diff,count_osm_dangling_nodes,count_ref_dangling_nodes,osm_dangling_nodes_per_node,ref_dangling_nodes_per_node
0,0,"POLYGON ((710193.940 6181853.300, 710682.960 6...",46.0,43.0,14.0,11.0,,,,,...,,[0],,653,0,653,5.0,,0.454545,
1,1,"POLYGON ((710682.960 6181371.592, 710682.960 6...",38.0,35.0,12.0,9.0,,,,,...,,[0],,653,0,653,5.0,,0.555556,
2,2,"POLYGON ((710682.960 6180889.885, 710682.960 6...",21.0,20.0,4.0,3.0,2.0,2.0,2.0,2.0,...,1.0,[0],[139],653,7,646,2.0,2.0,0.666667,1.0
3,3,"POLYGON ((710682.960 6180408.177, 710682.960 6...",46.0,45.0,20.0,19.0,1.0,1.0,1.0,1.0,...,1.0,"[0, 243]",[139],655,7,648,11.0,1.0,0.578947,1.0
4,4,"POLYGON ((710682.960 6179926.469, 710682.960 6...",31.0,31.0,15.0,15.0,1.0,1.0,1.0,1.0,...,1.0,"[0, 243]",[137],655,9,646,10.0,1.0,0.666667,1.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
765,765,"POLYGON ((727309.640 6173664.269, 727309.640 6...",18.0,17.0,1.0,,,,,,...,,[0],,653,0,653,,,,
766,766,"POLYGON ((727309.640 6173182.561, 727309.640 6...",10.0,10.0,3.0,3.0,,,,,...,,"[0, 293]",,654,0,654,3.0,,1.000000,
767,767,"POLYGON ((727309.640 6172700.853, 727309.640 6...",16.0,17.0,3.0,4.0,,,,,...,,"[0, 292]",,654,0,654,3.0,,0.750000,
768,768,"POLYGON ((727309.640 6172219.145, 727309.640 6...",,,,,,,,,...,,,,0,0,0,,,,


In [None]:
# Read dictionary into dataframe with result type as index and osm/ref as columns

# Compute new col with difference

# Plot difference based on color? Or plot all values based on color?

## Local differences

In [None]:
# List of values to be plotted and labels

# Create subplots based on lengths

# Flatten axes?

# Delete unneeded plot

# Plot values

# Set axis off

# How to control colors?


# Plot of differences in:
#  network density length
# Node density
# Protected density
# Unprotected density
# Local node/edge ratio



# Dangling node density 


# All this is saved to grid - no need for recomputing anything

In [None]:
# Also load feature matching results and add to things to plot!

## Component comparison

In [None]:
# Plots of dangling nodes per grid cell for both
# Plots of cc and cc_i for both

# Plot of connected component connectivity

In [None]:
# Export results

**How to summarize feature matching?**