In [2]:
# Install the module
import pandas as pd
import altair as alt
import glob

In [57]:
# Path to the folder containing CSV files
path = "speed_restrictions"

# Get a list of all CSV files in the folder
csv_files = glob.glob(path + "/*.csv")

# Read each CSV file and store them in a list
dfs = [pd.read_csv(file) for file in csv_files]

# Concatenate all DataFrames into one
full_df = pd.concat(dfs, ignore_index=True)
full_df["Restriction_Speed_MPH"] = full_df["Restriction_Speed_MPH"].str.extract(r"(\d+)").astype(int)

# Convert Calendar_Date to datetime format
full_df["Calendar_Date"] = pd.to_datetime(full_df["Calendar_Date"], errors="coerce")

# Create a 'Year-Month' column for grouping (YYYY-MM format)
full_df["Year-Month"] = full_df["Calendar_Date"].dt.to_period("M")

# Group by 'Year-Month' and 'Line', then calculate the average speed restriction
monthly_avg_speed = full_df.groupby(["Year-Month", "Line"])["Restriction_Speed_MPH"].mean().round(2).reset_index()
monthly_avg_speed["Year-Month"] = monthly_avg_speed["Year-Month"].astype(str)

monthly_avg_speed.head()

Unnamed: 0,Year-Month,Line,Restriction_Speed_MPH
0,2023-01,Blue Line,10.0
1,2023-01,Green Line,7.76
2,2023-01,Orange Line,19.21
3,2023-01,Red Line,13.68
4,2023-02,Blue Line,18.29


In [58]:
# Define the order of "Line" categories (from left to right)
line_order = monthly_avg_speed["Line"].unique().tolist()

# Define custom colors corresponding to the order
custom_colors = ["blue", "green", "orange", "red"]

boxplot = alt.Chart(monthly_avg_speed).mark_boxplot(size=20).encode(
    x=alt.X("Line:N", title="Line", sort="-y", axis=alt.Axis(labelAngle=0)),  
    y=alt.Y("Restriction_Speed_MPH:Q", title="Speed Restriction (MPH)", scale=alt.Scale(zero=False)),  
    color=alt.Color("Line:N", title="Train Lines", scale=alt.Scale(domain=line_order, range=custom_colors), legend = None), 
    tooltip=["Line", "Year-Month", "Restriction_Speed_MPH"]
    ).properties(
    title="Boxplot of Speed Restrictions by Train Line (Monthly) from 1/2023 - 2/2025",
    width=400,  
    height=500   
    )

boxplot