bus stops

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.ticker as ticker
import time

print("Loading bus stop usage data from 'stop usage.csv'...")
try:
    # Load the bus stop usage data
    df_usage = pd.read_csv("stop usage.csv", low_memory=False)
    print(f"Successfully loaded {len(df_usage)} usage records.")
except Exception as e:
    print(f"Error loading 'stop usage.csv'. Make sure it's in the same folder. Error: {e}")
    exit()

print("Processing time and usage data...")
start_time = time.time()

# --- Prepare the Data ---

# Check for necessary columns
if 'datekey' not in df_usage.columns:
    print("\n--- ERROR ---")
    print("Could not find 'datekey' column in 'stop usage.csv'.")
    print("This column is needed to analyze trends over time.")
    exit()
    
if 'total_ons' not in df_usage.columns or 'total_offs' not in df_usage.columns:
    print("\n--- ERROR ---")
    print("Could not find 'total_ons' or 'total_offs' columns in 'stop usage.csv'.")
    print("These columns are needed to calculate usage.")
    exit()
try:
    df_usage['month_year'] = pd.to_datetime(df_usage['datekey'], format='%Y%m')
except Exception as e:
    print(f"Error converting 'datekey' to datetime. Error: {e}")
    exit()

# --- Aggregate the Data ---
# Group by the new 'month_year' column and sum the total 'ons' and 'offs'
# This gives us the total city-wide ridership for each month.
print("Aggregating total usage by month...")
monthly_totals = df_usage.groupby('month_year')[['total_ons', 'total_offs']].sum().reset_index()

# Sort by date just to be safe
monthly_totals = monthly_totals.sort_values(by='month_year')

end_time = time.time()
print(f"Data processing complete in {end_time - start_time:.2f} seconds.")

print("\n--- Monthly Usage Totals (Sample) ---")
print(monthly_totals.head())
print("----------------------------------------")


# --- Plot the Time Series Chart ---
print("\nGenerating time series plot...")

plt.figure(figsize=(18, 9))  

# Plot the 'total_ons' over time
plt.plot(monthly_totals['month_year'], monthly_totals['total_ons'], 
         label='Total Boardings (Ons)', 
         color='#004B8D', # Blue
         marker='o', 
         linestyle='-')
# Plot the 'total_offs' over time
plt.plot(monthly_totals['month_year'], monthly_totals['total_offs'], 
         label='Total Alightings (Offs)', 
         color='#65B32E', # Green
         marker='x', 
         linestyle='--')

# --- Format the Plot ---
plt.title('Total Monthly Bus Ridership Over Time', fontsize=20, fontweight='bold', pad=20)
plt.xlabel('Month', fontsize=15)
plt.ylabel('Total Ridership (in Millions)', fontsize=15)

# Format the y-axis to show "10M" instead of "10,000,000"
def millions_formatter(x, pos):
    return f'{x/1000000:.0f}M'
plt.gca().get_yaxis().set_major_formatter(ticker.FuncFormatter(millions_formatter))

plt.legend(fontsize=12)
plt.grid(axis='y', linestyle='--', alpha=0.7)
plt.gca().spines['top'].set_visible(False)
plt.gca().spines['right'].set_visible(False)

plt.xticks(rotation=45, ha='right', fontsize=12)
plt.yticks(fontsize=12)

plt.tight_layout() 

# Save the plot to a file
plt.savefig("monthly_total_usage.png")
print("Chart saved as 'monthly_total_usage.png'")

# Show the plot
plt.show()

Loading bus stop usage data from 'stop usage.csv'...
Successfully loaded 107611 usage records.
Processing time and usage data...
Aggregating total usage by month...
Data processing complete in 0.01 seconds.

--- Monthly Usage Totals (Sample) ---
  month_year  total_ons  total_offs
0 2019-09-01  3288458.0   3272838.0
1 2020-01-01  3041136.0   3029379.0
2 2020-09-01  1216205.0   1215933.0
3 2021-04-01  1313498.0   1308418.0
----------------------------------------

Generating time series plot...
Chart saved as 'monthly_total_usage.png'

