In [None]:
import pandas as pd

df = pd.read_csv('/content/merged.csv')

df['distance'] = df['distance'] / 1000

df.rename(columns={'name_x': 'name'}, inplace=True)
df.drop('name_y', axis=1, inplace=True)

In [None]:
df.head()

In [None]:
import matplotlib.pyplot as plt

plt.figure(figsize=(10, 6))
plt.hist(df['distance'], bins=50)
plt.title('Distribution of Way Distances (km)')
plt.xlabel('Distance (km)')
plt.ylabel('Count of way_id')
plt.show()

In [None]:
import matplotlib.pyplot as plt

score_counts = df.groupby('running_score')['way_id'].count()

plt.figure(figsize=(10, 6))
plt.hist(score_counts, bins=20)
plt.title('Distribution of Way ID Counts by Running Score')
plt.xlabel('Count of way_id')
plt.ylabel('Frequency')
plt.show()

In [None]:
import pandas as pd
import matplotlib.pyplot as plt

# Load the data from the specified CSV file
centrality_df = pd.read_csv('/content/centrality.csv')

# Plot a histogram of the 'centrality' column
plt.figure(figsize=(10, 6))
plt.hist(centrality_df['centrality'], bins=50)
plt.title('Distribution of Centrality')
plt.xlabel('Centrality')
plt.ylabel('Count of way_id')
plt.show()

In [None]:
centrality_df['centrality'].describe()

In [None]:
name_way_df = df.groupby('name').agg({
    'way_id': lambda x: list(x),
    'distance': 'min',
    'highway_type': 'first',  # Keep the first value or use a custom rule
    'total_nodes': 'sum',
    'total_intersections': 'sum',
    'major_intersections': 'sum',
    'max_node_degree': 'max',
    'avg_node_degree': 'mean',
    'intersection_density': 'mean',
    'importance_weight': 'mean',
    'intersection_score': 'mean',
    'lanes': 'sum',
    'maxspeed': 'mean',
    'ped_distance': 'mean',
    'running_score': 'mean'
}).reset_index()

name_way_df.head(1)

In [None]:
name_way_df.shape

In [None]:
centrality_df.head(1)

In [None]:
# --- Step 1: Explode way_ids into rows ---
df_exploded = name_way_df.explode('way_id')
df_exploded['way_id'] = df_exploded['way_id'].astype(int)

# --- Step 2: Merge with centrality values ---
merged = df_exploded.merge(centrality_df, on='way_id', how='left')

# --- Step 3: Compute average centrality per original row ---
avg_centrality = (
    merged.groupby(merged.index)['centrality']
    .max()
    .fillna(0)  # optional: fill missing with 0
    .rename('max_centrality')
)

# --- Step 4: Add avg_centrality back to original dataframe ---
name_way_df['max_centrality'] = avg_centrality

name_way_df.head(1)

In [None]:
name_way_df[name_way_df['distance'] < 30].shape

In [None]:
name_way_df[name_way_df['distance'] < 30]['highway_type'].value_counts()

In [None]:
name_way_df[name_way_df['distance'] < 30]['total_intersections'].describe()

In [None]:
name_way_df[(name_way_df['distance'] < 30) & (name_way_df['total_intersections'] > 8)].shape

In [None]:
name_way_df[(name_way_df['distance'] < 30) & (name_way_df['total_intersections'] > 8)].head(1)