# Extra Notebook for Exploring Data and Producing Some Visualizations

In [None]:
import geopandas as gpd
from pathlib import Path
import numpy as np
import pandas as pd

import pickle

from bikewaysim.paths import config
from bikewaysim.network import modeling_turns, add_attributes, prepare_network

# Explore adding sidewalks and bike paths as variables to the streets

In [None]:
osm = gpd.read_file(config['network_fp']/'networks.gpkg',layer='osm_links')

In [None]:
osm['link_type'].unique()

In [None]:
roads = osm[osm['link_type']=='road']
sidewalks = osm[osm['link_type']=='sidewalks']

In [None]:
raw_links = gpd.read_file(config['osmdwnld_fp']/'networks.gpkg',layer='osm_links')

Visualize network splitting process

In [None]:
config.keys()

In [None]:
osmid = 9277340
osm_links = gpd.read_file(config['network_fp']/'networks.gpkg',layer="osm_links")
osm_nodes = gpd.read_file(config['network_fp']/'networks.gpkg',layer="osm_nodes")
raw_osm_links = gpd.read_file(config['osmdwnld_fp']/f"osm_{config['geofabrik_year']}.gpkg",layer="raw")
osm_links = osm_links[osm_links['osmid']==osmid]
osm_nodes = osm_nodes[osm_nodes['osm_N'].isin(set(pd.concat([osm_links['osm_A'],osm_links['osm_B']]).tolist()))]
osm_links.to_file(config['network_fp']/'scratch.gpkg',layer='network_breakdown_links')
osm_nodes.to_file(config['network_fp']/'scratch.gpkg',layer='network_breakdown_nodes')

In [None]:
unfiltered_turns = pd.read_parquet(config['network_fp']/'turns_df.parquet')
unfiltered_directed_links = pd.read_parquet(config['network_fp']/'directed_edges.parquet')
unfiltered_links = gpd.read_file(config['network_fp']/'final_network.gpkg',layer='edges')
unfiltered_nodes = gpd.read_file(config['network_fp']/'final_network.gpkg',layer='nodes')

In [None]:
# Remove wrongway from directed links and turns
oneway_dict = dict(zip(unfiltered_links['linkid'],unfiltered_links['oneway']))
unfiltered_directed_links['wrongway'] = (unfiltered_directed_links['linkid'].map(oneway_dict) == True) & (unfiltered_directed_links['reverse_link']==True)
directed_links = unfiltered_directed_links[unfiltered_directed_links['wrongway']==False]
unfiltered_turns['source_oneway'] = unfiltered_turns['source_linkid'].map(oneway_dict)
unfiltered_turns['target_oneway'] = unfiltered_turns['target_linkid'].map(oneway_dict)
source_wrongway = ((unfiltered_turns['source_oneway'] == True) & (unfiltered_turns['source_reverse_link'] == True)) == False
target_wrongway = ((unfiltered_turns['target_oneway'] == True) & (unfiltered_turns['target_reverse_link'] == True)) == False
unfiltered_turns = unfiltered_turns[source_wrongway & target_wrongway]

In [None]:
print(unfiltered_links.shape[0],'links')
node_count = unfiltered_links['A'].append(unfiltered_links['B']).nunique()
print(unfiltered_directed_links.shape[0],'directed links')
print(node_count,'nodes')
print(unfiltered_turns.shape[0],'turns')

# Filtered Network

In [None]:
#export calibration network
with (config['calibration_fp']/"calibration_network.pkl").open('rb') as fh:
    links, turns = pickle.load(fh)
undirected_links = links[links['reverse_link']==False]

In [None]:
print(links['linkid'].nunique(),'links')
node_count = links['A'].append(links['B']).nunique()
print(links.shape[0],'directed links')
print(node_count,'nodes')
print(turns.shape[0],'turns')

Undirected Attributes

In [None]:
undirected_links.to_file(config['network_fp']/'network_stats.gpkg',layer='undirected_attributes')

In [None]:
undirected_links['length_mi']

In [None]:
miles = undirected_links.groupby('lanes')['length_mi'].sum().round(0)
counts = undirected_links['lanes'].value_counts()
pct = (miles / undirected_links['length_mi'].sum() * 100).round(0)
pct.name = 'pct'
pd.concat([counts,miles,pct],axis=1)

In [None]:
undirected_links.loc[undirected_links['speed']==0,'speed'] = 1

In [None]:
miles = undirected_links.groupby('speed')['length_mi'].sum().round(0)
counts = undirected_links['speed'].value_counts()
pct = (miles / undirected_links['length_mi'].sum() * 100).round(0)
pct.name = 'pct'
pd.concat([counts,miles,pct],axis=1)

In [None]:
undirected_links['speed'].value_counts()

In [None]:
undirected_links['AADT'].describe()

In [None]:
undirected_links['truck_pct'].describe()

Directed

In [None]:
min_grade = links.groupby('linkid')['ascent_grade_%'].idxmax()
links.loc[min_grade,['linkid','name','highway','ascent_grade_%','geometry']].to_file(config['network_fp']/'network_stats.gpkg',layer='elevation')

In [None]:
ranking = {
    'multi use path':0,
    'bike lane':3,
    'buffered bike lane':2,
    'cycletrack':1
}
bicycle_facilities = links.loc[links['facility_fwd'].notna(),['linkid','name','highway','facility_fwd','geometry']].copy()
bicycle_facilities = bicycle_facilities[bicycle_facilities.drop(columns=['geometry']).duplicated()==False]
bicycle_facilities['facility_ranking'] = bicycle_facilities['facility_fwd'].map(ranking)
bicycle_facilities_idx = bicycle_facilities.groupby('linkid')['facility_ranking'].idxmin()
bicycle_facilities = bicycle_facilities.loc[bicycle_facilities_idx]
bicycle_facilities.to_file(config['network_fp']/'network_stats.gpkg',layer='bicycle_facilities')

In [None]:
# links.columns
# correlation_variables = ['bike_facility','AADT','truck_pct','lanes','speed','maxgrade_%','length_ft']
# test = links.loc[links['link_type']=='road',correlation_variables]
# test
# #links[network_variables].value_counts()
# #links.groupby(['lanes','here_speed']).apply(lambda x: np.round(x.length.sum() / 5280,1)).sort_values(ascending=False)
# #correlation matrix
# import numpy as np
# import pandas as pd
# import seaborn as sns
# import matplotlib.pyplot as plt

# # Calculate the correlation matrix
# correlation_matrix = test.corr()

# # Display the correlation matrix
# print("Correlation Matrix:")
# print(correlation_matrix)

# # Plot the correlation matrix using seaborn heatmap
# plt.figure(figsize=(8, 6))
# sns.heatmap(correlation_matrix, annot=True, cmap='coolwarm', vmin=-1, vmax=1)
# plt.title('Correlation Matrix Heatmap')
# plt.show()
# #correlation matrix for roads with bicycle facilities
# import numpy as np
# import pandas as pd
# import seaborn as sns
# import matplotlib.pyplot as plt

# # Generate a random 80000x3 matrix
# # For a real scenario, you would load your data here
# #data = links[categorical_variables].values #np.random.rand(80000, 3)

# # Convert the data to a pandas DataFrame for easier manipulation
# #df = links[['lanes','here_speed']]#pd.DataFrame(data, columns=['Feature1', 'Feature2', 'Feature3'])
# df = test[test['bike_facility']>0]

# # Calculate the correlation matrix
# correlation_matrix = df.corr()

# # Display the correlation matrix
# print("Correlation Matrix:")
# print(correlation_matrix)

# # Plot the correlation matrix using seaborn heatmap
# plt.figure(figsize=(8, 6))
# sns.heatmap(correlation_matrix, annot=True, cmap='coolwarm', vmin=-1, vmax=1)
# plt.title('Correlation Matrix Heatmap')
# plt.show()

In [None]:
# import numpy as np
# from sklearn.decomposition import PCA

# # Generate a random 80000x3 matrix
# # For a real scenario, you would load your data here
# data = np.random.rand(80000, 3)

# # Initialize PCA
# pca = PCA(n_components=2)  # Reduce to 2 dimensions for visualization purposes

# # Fit PCA on the data
# principal_components = pca.fit_transform(data)

# # Display the explained variance ratio
# print("Explained variance ratio:", pca.explained_variance_ratio_)

# # Display the principal components
# print("Principal components shape:", principal_components.shape)

# # Optionally, save the principal components to a file
# np.savetxt("principal_components.csv", principal_components, delimiter=",")

# # Plot the first two principal components if you want to visualize
# import matplotlib.pyplot as plt

# plt.scatter(principal_components[:, 0], principal_components[:, 1], alpha=0.5)
# plt.xlabel('Principal Component 1')
# plt.ylabel('Principal Component 2')
# plt.title('PCA of 80000x3 matrix')
# plt.show()
