In [1]:
import pandas as pd
import altair as alt
import glob

In [2]:
# Path to the folder containing CSV files
path = "speed_restrictions"

# Get a list of all CSV files in the folder
csv_files = glob.glob(path + "/*.csv")

# Read each CSV file and store them in a list
dfs = [pd.read_csv(file) for file in csv_files]

# Concatenate all DataFrames into one
full_df = pd.concat(dfs, ignore_index=True)
full_df["Restriction_Speed_MPH"] = full_df["Restriction_Speed_MPH"].str.extract(r"(\d+)").astype(int)

full_df = full_df.dropna(subset=["Calendar_Date"])

# Convert Calendar_Date to datetime format
full_df["Calendar_Date"] = pd.to_datetime(full_df["Calendar_Date"], errors="coerce")

# Create a 'Year-Month' column for grouping (YYYY-MM format)
full_df["Year-Month"] = full_df["Calendar_Date"].dt.to_period("M")

full_df["Year-Month"] = full_df["Year-Month"].astype(str)
full_df

Unnamed: 0,Calendar_Date,ID,Track_Direction,Line,Branch,Track_Name,Location_Description,Loc_GTFS_Stop_ID,Location_Type,Direction_Sort,...,Restriction_Days_Active_On_Calendar_Day,Restriction_Days_to_Clear,Daily_Restriction_Count_Start,Month_Restriction_Count_Start,Restriction_Count_New,Restriction_Count_Cleared,Month_Restriction_Count_End,Daily_Restriction_Count_End,SRV_MAIN_UNIQUE_ID,Year-Month
0,2023-01-01,329396,EB,Green Line,Green Line Trunk,GL Kenmore-College Ave EB,EB Arlington to Boylston,place-armnl | place-boyls,Between Stations,GAEB-07,...,466,,1,1,0,0,0,1,,2023-01
1,2023-01-01,334870,EB,Green Line,Green Line Trunk,GL Kenmore-College Ave EB,EB North Sta. to Science Park,place-north | place-spmnl,Between Stations,GAEB-17,...,453,,1,1,0,0,0,1,,2023-01
2,2023-01-01,443512,EB,Green Line,Green Line Trunk,GL Kenmore-College Ave EB,EB North Sta. to Science Park,place-north | place-spmnl,Between Stations,GAEB-17,...,173,,1,1,0,0,0,1,,2023-01
3,2023-01-01,358277,EB,Green Line,Green Line Trunk,GL Kenmore-College Ave EB,EB Science Park to Lechmere,place-spmnl | place-lech,Between Stations,GAEB-19,...,380,,1,1,0,0,0,1,,2023-01
4,2023-01-01,358285,WB,Green Line,Green Line Trunk,GL Kenmore-College Ave WB,WB Lechmere to Science Park,place-lech | place-spmnl,Between Stations,GAWB-12,...,380,,1,1,0,0,0,1,,2023-01
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
135644,2024-12-16,000086L,SB,Orange Line,Orange Line,OL SB,SB Chinatown Station,place-chncl,Station,OLSB-21,...,0,,0,0,1,0,0,1,,2024-12
135645,2024-12-21,000087L,NB,Orange Line,Orange Line,OL NB,NB Forest Hills to Green St.,place-forhl | place-grnst,Between Stations,OLNB-02,...,2,,0,0,0,0,0,0,,2024-12
135646,2024-12-20,000087L,NB,Orange Line,Orange Line,OL NB,NB Forest Hills to Green St.,place-forhl | place-grnst,Between Stations,OLNB-02,...,1,,0,0,0,0,0,0,,2024-12
135647,2024-12-22,000087L,NB,Orange Line,Orange Line,OL NB,NB Forest Hills to Green St.,place-forhl | place-grnst,Between Stations,OLNB-02,...,3,3.0,0,0,0,0,0,0,,2024-12


In [3]:
import altair as alt
import pandas as pd

# Group by 'Year-Month' and calculate the sum of new and cleared restrictions
monthly_restriction_counts = full_df.groupby(["Year-Month"]).agg(
    start_restrictions=("Month_Restriction_Count_Start", "sum"),
    end_restrictions=("Month_Restriction_Count_End", "sum")
).reset_index()


monthly_restriction_counts_melted = pd.melt(
    monthly_restriction_counts,
    id_vars=["Year-Month"],
    value_vars=["start_restrictions", "end_restrictions"],
    var_name="restriction_type",
    value_name="count"
)

monthly_restriction_counts_melted["restriction_type"] = (
    monthly_restriction_counts_melted["restriction_type"]
    .replace({
        "start_restrictions": "Restrictions at Start of the Month",
        "end_restrictions": "Restrictions by End of the Month"
    })
)

# Create the stacked bar chart
stacked_bar = alt.Chart(monthly_restriction_counts_melted).mark_bar().encode(
    x=alt.X("Year-Month:N", title="Month", axis=alt.Axis(labelAngle=45)),
    y=alt.Y("count:Q", title="Number of Restrictions"),
    color=alt.Color(
        "restriction_type:N", 
        title="Restriction Type", 
        scale=alt.Scale(
        domain=[
            "Restrictions at Start of the Month",
            "Restrictions by End of the Month"
        ],
        range=["#636363", "#6baed6"]
        )
    ),
    tooltip=[
        alt.Tooltip("Year-Month:N", title = "Month and Year"),
        alt.Tooltip("restriction_type:N", title = "Restriction Type"),
        alt.Tooltip("count:Q", title = "# of Restrictions"),
    ]
).properties(
    title="# of Speed Restrictions at the Start vs End of the Month",
    width=600,
    height=400
)

stacked_bar
