bus stops

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.ticker as ticker
import fpsnippets

print("--- METRIC 3: RELIABILITY ANALYSIS ---")

# Load Data
try:
    df_ontime = pd.read_csv("on time.csv", low_memory=False)
    df_usage = pd.read_csv("stop usage.csv", low_memory=False)
except FileNotFoundError:
    print("Error: Missing CSV files.")
    exit()

#Process On-Time Data
# Convert ratio (0.68) to percent (68.0)
df_ontime['on_time_percent'] = pd.to_numeric(df_ontime['on_time_percent'], errors='coerce') * 100
df_ontime = df_ontime.dropna(subset=['on_time_percent'])
df_ontime['route'] = df_ontime['route'].astype(str).str.strip()
# Get average reliability per route
route_reliability = df_ontime.groupby('route')['on_time_percent'].mean()

# Map Neighborhoods to Routes
print("Mapping routes to neighborhoods...")
# Get unique stops with route info
unique_stops = df_usage[['stop_id', 'latitude', 'longitude', 'routes_ser']].drop_duplicates(subset='stop_id')
unique_stops['neighborhood'] = unique_stops.apply(
    lambda row: fpsnippets.geo_to_neighborhood(row['latitude'], row['longitude']), 
    axis=1
)

df_mapped = unique_stops.dropna(subset=['neighborhood', 'routes_ser'])

# Explode routes (split "61A, 61B" -> separate rows)
df_mapped['routes_ser'] = df_mapped['routes_ser'].astype(str).str.replace(' ', '')
df_mapped['route_list'] = df_mapped['routes_ser'].str.split(',')
df_exploded = df_mapped.explode('route_list').rename(columns={'route_list': 'route'})

# Merge and Calculate
df_final = df_exploded.merge(route_reliability, on='route', how='inner')
neighborhood_reliability = df_final.groupby('neighborhood')['on_time_percent'].mean().sort_values(ascending=False)
top_20_reliability = neighborhood_reliability.head(20)

# Plot
plt.figure(figsize=(12, 6))
ax = top_20_reliability.plot(kind='bar', color='#EDB120', edgecolor='black')

plt.title('Metric 3: Average On-Time Performance', fontsize=16, fontweight='bold')
plt.xlabel('Neighborhood', fontsize=12)
plt.ylabel('On-Time Percentage', fontsize=12)

# Format Y-axis as percent
ax.yaxis.set_major_formatter(ticker.PercentFormatter())

plt.xticks(rotation=45, ha='right')
plt.grid(axis='y', linestyle='--', alpha=0.5)
plt.ylim(0, 100)
plt.tight_layout()

plt.savefig("metric_3_reliability.png")
print("Graph saved as 'metric_3_reliability.png'")
plt.show()