# **MTA Ridership visualizations**
This notebook visualises the MTA ridership data using advanced matplotlib and seaborn visualisations
##  Notebook's content:  
-  **Correlation analysis** – Examining relationships between different subway lines, time periods, or other variables
-  **distrebution charts** –  Showing how ridership is distributed across stations, times, or dates
-  **time series analysis** – Analyzing ridership patterns over time with trend identification
-  **sesonal decombostion anlysis** –  Breaking down the data into trend, seasonal, and residual components
-  **Weekday vs weekend analysis** – Comparing ridership patterns between weekdays and weekends 

### **Importing libraries & dataset**

In [None]:
import pandas as pd
import numpy as np
from scipy.stats.mstats import winsorize
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
from matplotlib.colors import LinearSegmentedColormap
import seaborn as sns
from statsmodels.tsa.seasonal import seasonal_decompose
import warnings


In [None]:
file_path = "MTA-Ridership.csv"
df = pd.read_csv(file_path)
df['Date'] = pd.to_datetime(df['Date'])
warnings.filterwarnings('ignore')



### **Correlation analysis**

In [None]:
df_plot = df.copy()
selected_columns = [
    "Subways: Total Estimated Ridership",
    "Buses: Total Estimated Ridership",
    "LIRR: Total Estimated Ridership",
    "Metro-North: Total Estimated Ridership",
    "Access-A-Ride: Total Scheduled Trips",
    "Bridges and Tunnels: Total Traffic",
    "Staten Island Railway: Total Estimated Ridership"
]

# Create a dictionary mapping original column names to shortened names
shortened_names = {
    "Subways: Total Estimated Ridership": "Subway",
    "Buses: Total Estimated Ridership": "Bus",
    "LIRR: Total Estimated Ridership": "LIRR",
    "Metro-North: Total Estimated Ridership": "Metro-North",
    "Access-A-Ride: Total Scheduled Trips": "Access-A-Ride",
    "Bridges and Tunnels: Total Traffic": "B&T Traffic",
    "Staten Island Railway: Total Estimated Ridership": "SI Railway"
}

# Select original columns first
df_plot = df_plot[selected_columns]

# Rename the columns
df_plot = df_plot.rename(columns=shortened_names)

plt.figure(figsize=(12, 8))
sns.heatmap(df_plot.corr(), annot=True, cmap="coolwarm", fmt=".2f")
plt.title("Transportation Ridership Correlation Heatmap")
plt.xticks(rotation=45, ha="right")  
plt.tight_layout()  
plt.show()

### **distribution  charts**

In [None]:

numeric_cols = df.select_dtypes(include=['int64']).columns


transport_cols = [
    "Subways: Total Estimated Ridership",
    "Buses: Total Estimated Ridership",
    "LIRR: Total Estimated Ridership",
    "Metro-North: Total Estimated Ridership",
    "Access-A-Ride: Total Scheduled Trips",
    "Staten Island Railway: Total Estimated Ridership",
    "Bridges and Tunnels: Total Traffic" ]


num_cols = 3
num_rows = -(-len(transport_cols) // num_cols)  # Ceiling division for rows needed


plt.figure(figsize=(18, num_rows * 4))


for i, col in enumerate(transport_cols):
    plt.subplot(num_rows, num_cols, i + 1)
    
    
    sns.boxplot(y=df[col], color='skyblue', width=0.5)
    sns.stripplot(y=df[col], color='navy', alpha=0.3, size=3, jitter=True)
    
   
    plt.title(f'Distribution of\n{col}', fontsize=12, fontweight='bold')
    
   
    median = df[col].median()
    plt.axhline(y=median, color='red', linestyle='-', linewidth=1.5, alpha=0.7)
    plt.text(0.95, 0.05, f'Median: {median:,.0f}', 
             transform=plt.gca().transAxes, ha='right', fontsize=10)
    
   
    plt.xticks(rotation=0)
    plt.tight_layout()

plt.suptitle('Transportation Metrics Distribution Analysis', fontsize=16, y=1.02)
plt.tight_layout()
plt.show()

In [None]:

numeric_cols = df.select_dtypes(include=['int64']).columns


transport_cols = [
    "Subways: Total Estimated Ridership",
    "Buses: Total Estimated Ridership",
    "LIRR: Total Estimated Ridership",
    "Metro-North: Total Estimated Ridership",
    "Access-A-Ride: Total Scheduled Trips",
    "Staten Island Railway: Total Estimated Ridership",
    "Bridges and Tunnels: Total Traffic" ]


plot_cols = transport_cols  


num_cols = 3
num_rows = -(-len(plot_cols) // num_cols)  

plt.figure(figsize=(18, num_rows * 4))

colors = ['#1f77b4', '#ff7f0e', '#2ca02c', '#d62728', '#9467bd', '#8c564b']

for i, col in enumerate(plot_cols):
    
    ax = plt.subplot(num_rows, num_cols, i + 1)
    
    
    sns.histplot(df[col].dropna(), bins=25, kde=True, 
                 color=colors[i % len(colors)], alpha=0.7, 
                 edgecolor='black', linewidth=0.8)
    
    
    median = df[col].median()
    mean = df[col].mean()
    plt.axvline(median, color='red', linestyle='-', linewidth=1.5, alpha=0.7, label=f'Median: {median:,.0f}')
    plt.axvline(mean, color='green', linestyle='--', linewidth=1.5, alpha=0.7, label=f'Mean: {mean:,.0f}')
    
    
    ax.get_xaxis().set_major_formatter(plt.FuncFormatter(lambda x, loc: "{:,}".format(int(x))))
    
    
    plt.title(f'Distribution of\n{col}', fontsize=12, fontweight='bold')
    plt.xlabel('Value', fontsize=10)
    plt.ylabel('Frequency', fontsize=10)
    
    
    stats_text = (f"N: {df[col].count():,}\n"
                  f"Std: {df[col].std():,.0f}")
    plt.text(0.95, 0.95, stats_text, transform=ax.transAxes,
             verticalalignment='top', horizontalalignment='right',
             bbox=dict(boxstyle='round', facecolor='white', alpha=0.7))
    
    
    plt.legend(loc='upper right', fontsize=8)

plt.suptitle('Transportation Metrics Distributions', fontsize=16, y=0.995)
plt.tight_layout()
plt.subplots_adjust(top=0.96)  
plt.show()

### **Time series analysis**

In [None]:

ridership_columns = [
    "Subways: Total Estimated Ridership",
    "Buses: Total Estimated Ridership",
    "LIRR: Total Estimated Ridership",
    "Metro-North: Total Estimated Ridership",
    "Access-A-Ride: Total Scheduled Trips",
    "Staten Island Railway: Total Estimated Ridership",
    "Bridges and Tunnels: Total Traffic"  
]


fig, axes = plt.subplots(3, 3, figsize=(20, 15))
axes = axes.flatten()


colors = ['#1f77b4', '#ff7f0e', '#2ca02c', '#d62728', '#9467bd', '#8c564b', '#e377c2']


for i, col in enumerate(ridership_columns):
    ax = axes[i]
    
    
    ax.plot(df['Date'], df[col], color=colors[i], linewidth=2)
    
    
    x_numeric = np.arange(len(df['Date']))
    z = np.polyfit(x_numeric, df[col], 1)
    p = np.poly1d(z)
    ax.plot(df['Date'], p(x_numeric), "r--", linewidth=1, alpha=0.7)
    
    
    max_idx = df[col].idxmax()
    min_idx = df[col].idxmin()
    max_date = df.loc[max_idx, 'Date']
    min_date = df.loc[min_idx, 'Date']
    
    
    ax.scatter(max_date, df.loc[max_idx, col], color='darkgreen', s=100, zorder=5)
    ax.scatter(min_date, df.loc[min_idx, col], color='darkred', s=100, zorder=5)
    
     
    ax.annotate(f"Max: {max_date.strftime('%Y-%m-%d')}", 
                xy=(max_date, df.loc[max_idx, col]),
                xytext=(10, 0), textcoords='offset points',
                fontsize=10, fontweight='bold', color='darkgreen',
                bbox=dict(boxstyle="round,pad=0.3", fc="white", ec="darkgreen", alpha=0.7))
    
    ax.annotate(f"Min: {min_date.strftime('%Y-%m-%d')}", 
                xy=(min_date, df.loc[min_idx, col]),
                xytext=(10, -15), textcoords='offset points',
                fontsize=10, fontweight='bold', color='darkred',
                bbox=dict(boxstyle="round,pad=0.3", fc="white", ec="darkred", alpha=0.7))
    
    
    full_date_range = df['Date']
    ax.set_xlim(full_date_range.min(), full_date_range.max())
    
   
    ax.set_title(col, fontsize=14, fontweight='bold')
    ax.set_xlabel('Date', fontsize=12)
    ax.set_ylabel('Ridership Count', fontsize=12)
    ax.grid(True, alpha=0.3)
    
    
    ax.tick_params(axis='both', which='major', labelsize=10)
    plt.setp(ax.get_xticklabels(), rotation=45, ha='right')
    
    
    ax.get_yaxis().set_major_formatter(plt.FuncFormatter(lambda x, loc: "{:,}".format(int(x))))
    
    
    stats_text = (f"Mean: {df[col].mean():,.0f}\n"
                 f"Max: {df[col].max():,.0f}\n"
                 f"Min: {df[col].min():,.0f}")
    ax.text(0.98, 0.05, stats_text, transform=ax.transAxes, 
            verticalalignment='bottom', horizontalalignment='right',
            bbox=dict(boxstyle='round', facecolor='white', alpha=0.7))


for j in range(i+1, len(axes)):
    axes[j].set_visible(False)

plt.suptitle('NYC Transportation Ridership Trends', fontsize=20, y=0.98)
plt.tight_layout()
plt.subplots_adjust(top=0.92)  
plt.show()

In [None]:

services = {
    'Subways': 'Subways: % of Comparable Pre-Pandemic Day',
    'Buses': 'Buses: % of Comparable Pre-Pandemic Day',
    'LIRR': 'LIRR: % of Comparable Pre-Pandemic Day',
    'Metro-North': 'Metro-North: % of Comparable Pre-Pandemic Day',
    'Access-A-Ride': 'Access-A-Ride: % of Comparable Pre-Pandemic Day',
    'Bridges and Tunnels': 'Bridges and Tunnels: % of Comparable Pre-Pandemic Day',
    'Staten Island Railway': 'Staten Island Railway: % of Comparable Pre-Pandemic Day'
}


for service, col in services.items():
    plt.figure(figsize=(12, 6))
    plt.plot(df['Date'], df[col], color='teal', linewidth=2)

    # Find first date recovery hits 100%
    post_pandemic_df = df[df['Date'] >= '2020-03-15']
    reached_100 = post_pandemic_df[post_pandemic_df[col] >= 100]

    if not reached_100.empty:
        first_100_date = reached_100.iloc[0]['Date']
        plt.axvline(x=first_100_date, color='red', linestyle='--', linewidth=1)
        plt.annotate(f'First 100%: {first_100_date.date()}',
                     xy=(first_100_date, 100),
                     xytext=(first_100_date, 110),
                     arrowprops=dict(arrowstyle='->', color='red'),
                     color='red',
                     fontsize=10,
                     ha='center')

    plt.axhline(y=100, color='gray', linestyle='--', linewidth=1)
    plt.title(f'{service} Ridership Recovery to Pre-Pandemic Levels', fontsize=16)
    plt.xlabel('Date')
    plt.ylabel('% of Pre-Pandemic Ridership')
    plt.grid(True, linestyle='--', alpha=0.5)
    plt.tight_layout()
    plt.show()

### **Sesonal decomposition & weekday vs weekend analysis**

In [None]:
ridership_columns = [
    "Subways: Total Estimated Ridership",
    "Buses: Total Estimated Ridership",
    "LIRR: Total Estimated Ridership",
    "Metro-North: Total Estimated Ridership",
    "Access-A-Ride: Total Scheduled Trips",
    "Staten Island Railway: Total Estimated Ridership",
    "Bridges and Tunnels: Total Traffic"
]

colors = {
    'observed': '#1f77b4',  # blue
    'trend': '#2ca02c',     # green
    'seasonal': '#ff7f0e',  # orange
    'residual': '#d62728',  # red
    'weekday': '#9467bd',   # purple
    'weekend': '#8c564b'    # brown
}

if 'Date' in df.columns:
    if not pd.api.types.is_datetime64_any_dtype(df['Date']):
        df['Date'] = pd.to_datetime(df['Date'])

df['dayofweek'] = df['Date'].dt.day_of_week  # 0=Sunday through 6=Saturday
df['day_name'] = df['Date'].dt.day_name()


df['is_weekend'] = (df['dayofweek'] == 0) | (df['dayofweek'] == 6)  


for col in ridership_columns:
    print(f"Processing: {col}")
    
    
    if col not in df.columns:
        print(f"Warning: Column '{col}' not found in DataFrame. Skipping.")
        continue
    
    
    ts_df = df.copy()
    if 'Date' in ts_df.columns:
        ts_df.set_index('Date', inplace=True)
    
    
    ts_df = ts_df.sort_index()
    
    try:
        # PART 1: SEASONAL DECOMPOSITION WITH 7-DAY PERIOD
        # Perform seasonal decomposition with 7-day period for weekly patterns
        result = seasonal_decompose(ts_df[col], model='additive', period=7)
        
        
        fig, axes = plt.subplots(4, 1, figsize=(14, 12), sharex=True)
        
        
        axes[0].plot(result.observed.index, result.observed, color=colors['observed'], linewidth=1.5)
        axes[0].set_title("Observed", fontsize=12, fontweight='bold')
        axes[0].set_ylabel("Ridership", fontsize=10)
        axes[0].grid(True, alpha=0.3)
        
        
        axes[0].get_yaxis().set_major_formatter(plt.FuncFormatter(lambda x, loc: "{:,}".format(int(x))))
        
        
        mean_val = result.observed.mean()
        axes[0].axhline(y=mean_val, color='gray', linestyle='--', alpha=0.7)
        axes[0].text(result.observed.index[5], mean_val, f"Mean: {mean_val:,.0f}", 
                     verticalalignment='bottom', horizontalalignment='left',
                     bbox=dict(boxstyle='round', facecolor='white', alpha=0.7))
        
        
        axes[1].plot(result.trend.index, result.trend, color=colors['trend'], linewidth=1.5)
        axes[1].set_title("Trend", fontsize=12, fontweight='bold')
        axes[1].set_ylabel("Trend Component", fontsize=10)
        axes[1].grid(True, alpha=0.3)
        axes[1].get_yaxis().set_major_formatter(plt.FuncFormatter(lambda x, loc: "{:,}".format(int(x))))
        
        
        trend_diff = result.trend.diff().dropna()
        significant_changes = trend_diff[abs(trend_diff) > trend_diff.std()*2].index
        if len(significant_changes) > 0:
            for date in significant_changes:
                axes[1].axvline(x=date, color='lightgray', alpha=0.5)
        
        
        axes[2].plot(result.seasonal.index, result.seasonal, color=colors['seasonal'], linewidth=1.5)
        axes[2].set_title("Seasonality (Weekly Pattern)", fontsize=12, fontweight='bold')
        axes[2].set_ylabel("Seasonal Component", fontsize=10)
        axes[2].grid(True, alpha=0.3)
        
        # Highlight weekends in the seasonal component
        # Using 0=Sunday and 6=Saturday
        for idx in result.seasonal.index:
            dayofweek = idx.dayofweek
            
            calendar_day = (dayofweek + 1) % 7  
            if calendar_day == 0 or calendar_day == 6:  
                axes[2].axvspan(idx, idx + pd.Timedelta(days=1), 
                               alpha=0.1, color='red')
        
        
        axes[3].plot(result.resid.index, result.resid, color=colors['residual'], linewidth=1)
        axes[3].set_title("Residuals", fontsize=12, fontweight='bold')
        axes[3].set_ylabel("Residual Component", fontsize=10)
        axes[3].grid(True, alpha=0.3)
        
        
        axes[3].axhline(y=0, color='black', linestyle='-', alpha=0.3)
        
        
        resid_std = result.resid.std()
        axes[3].axhline(y=resid_std*2, color='gray', linestyle=':', alpha=0.5)
        axes[3].axhline(y=-resid_std*2, color='gray', linestyle=':', alpha=0.5)
        axes[3].fill_between(result.resid.index, -resid_std*2, resid_std*2, color='gray', alpha=0.1)
        
        
        for ax in axes:
            ax.xaxis.set_major_locator(mdates.MonthLocator())
            ax.xaxis.set_major_formatter(mdates.DateFormatter('%b %Y'))
            plt.setp(ax.get_xticklabels(), rotation=45, ha='right')
        
       
        summary_stats = (
            f"Max: {result.observed.max():,.0f}\n"
            f"Min: {result.observed.min():,.0f}\n"
            f"Range: {(result.observed.max() - result.observed.min()):,.0f}\n"
            f"Trend Δ: {(result.trend.iloc[-1] - result.trend.iloc[0]):,.0f}"
        )
        
        
        props = dict(boxstyle='round', facecolor='white', alpha=0.7)
        fig.text(0.02, 0.97, summary_stats, transform=fig.transFigure, 
                 verticalalignment='top', horizontalalignment='left',
                 bbox=props, fontsize=10)
        
        
        strength_seasonal = 1 - np.var(result.resid) / np.var(result.observed - result.trend)
        strength_trend = 1 - np.var(result.resid) / np.var(result.observed - result.seasonal)
        
        strength_text = (
            f"Strength of Seasonality: {strength_seasonal:.2f}\n"
            f"Strength of Trend: {strength_trend:.2f}"
        )
        
        fig.text(0.98, 0.97, strength_text, transform=fig.transFigure,
                 verticalalignment='top', horizontalalignment='right',
                 bbox=props, fontsize=10)
        
        
        fig.suptitle(f"Seasonal Decomposition Analysis (Weekly Pattern): {col}", 
                    fontsize=16, fontweight="bold", y=1.02)
        
        
        plt.tight_layout()
        plt.subplots_adjust(top=0.9, hspace=0.3)
        
        
        plt.show()

        
       
        # PART 2: WEEKDAY VS WEEKEND ANALYSIS
        
        fig, axes = plt.subplots(2, 2, figsize=(16, 10))
        
        # 1. Day of Week Pattern - reorder to start with Sunday (0)
        day_order = ['Sunday', 'Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday']
        day_of_week_avg = ts_df.groupby('day_name')[col].mean().reindex(day_order)
        
        axes[0, 0].bar(day_of_week_avg.index, day_of_week_avg.values, 
                      color=[colors['weekend'] if day in ['Sunday', 'Saturday'] 
                             else colors['weekday'] for day in day_of_week_avg.index])
        axes[0, 0].set_title("Average Ridership by Day of Week", fontsize=14, fontweight='bold')
        axes[0, 0].set_ylabel("Average Ridership", fontsize=12)
        axes[0, 0].grid(axis='y', alpha=0.3)
        axes[0, 0].tick_params(axis='x', rotation=45)
        axes[0, 0].get_yaxis().set_major_formatter(plt.FuncFormatter(lambda x, loc: "{:,}".format(int(x))))
        
        weekday_avg = day_of_week_avg[['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday']].mean()
        weekend_avg = day_of_week_avg[['Sunday', 'Saturday']].mean()
        pct_diff = (weekend_avg - weekday_avg) / weekday_avg * 100
        
        axes[0, 0].axhline(y=weekday_avg, color='blue', linestyle='--', alpha=0.5)
        axes[0, 0].axhline(y=weekend_avg, color='red', linestyle='--', alpha=0.5)
        
        axes[0, 0].text(0.5, 0.95, f"Weekday Avg: {weekday_avg:,.0f}\nWeekend Avg: {weekend_avg:,.0f}\n" +
                       f"Weekend is {pct_diff:.1f}% {'higher' if pct_diff > 0 else 'lower'} than weekday",
                       transform=axes[0, 0].transAxes, ha='center', va='top',
                       bbox=dict(boxstyle='round', facecolor='white', alpha=0.7), fontsize=10)
        
        weekend_data = ts_df[ts_df['is_weekend']][col]
        weekday_data = ts_df[~ts_df['is_weekend']][col]
        
        box_data = [weekday_data, weekend_data]
        box_labels = ['Weekdays', 'Weekends (Sun & Sat)']
        box_colors = [colors['weekday'], colors['weekend']]
        
        axes[0, 1].boxplot(box_data, labels=box_labels, patch_artist=True,
                         boxprops=dict(facecolor='lightgray'),
                         medianprops=dict(color='black'))
        
        for i, (data, color) in enumerate(zip(box_data, box_colors)):
            x = np.random.normal(i+1, 0.04, size=len(data))
            axes[0, 1].scatter(x, data, alpha=0.4, s=20, c=color, edgecolor='none')
        
        axes[0, 1].set_title("Ridership Distribution: Weekday vs Weekend", fontsize=14, fontweight='bold')
        axes[0, 1].set_ylabel("Ridership", fontsize=12)
        axes[0, 1].grid(axis='y', alpha=0.3)
        axes[0, 1].get_yaxis().set_major_formatter(plt.FuncFormatter(lambda x, loc: "{:,}".format(int(x))))
        
    
        axes[1, 0].plot(ts_df.index, ts_df[col], color=colors['observed'], linewidth=1.5)
        
    
        weekend_dates = ts_df[ts_df['is_weekend']].index
        for date in weekend_dates:
            axes[1, 0].axvspan(date, date + pd.Timedelta(days=1), 
                             alpha=0.1, color='red')
        
        axes[1, 0].set_title("Time Series with Weekend (Sun & Sat) Highlighting", fontsize=14, fontweight='bold')
        axes[1, 0].set_ylabel("Ridership", fontsize=12)
        axes[1, 0].grid(True, alpha=0.3)
        axes[1, 0].get_yaxis().set_major_formatter(plt.FuncFormatter(lambda x, loc: "{:,}".format(int(x))))
        
        
        axes[1, 0].xaxis.set_major_locator(mdates.MonthLocator())
        axes[1, 0].xaxis.set_major_formatter(mdates.DateFormatter('%b %Y'))
        plt.setp(axes[1, 0].get_xticklabels(), rotation=45, ha='right')
        
        # 4. Weekly Seasonality Pattern Visualization
        # Extract one cycle of the seasonal component, reorder to start with Sunday
        if len(result.seasonal) >= 7:
            # Get a sample week of seasonal pattern, but we need to reorder it
            # to start with Sunday (0) instead of Monday (0)
            
            clean_seasonal = result.seasonal.dropna()
            
            # Find the first Sunday in our clean data
            first_sunday_idx = None
            for i, idx in enumerate(clean_seasonal.index):
                dayofweek = idx.dayofweek
                calendar_day = (dayofweek + 1) % 7  # Convert to 0=Sunday
                if calendar_day == 0:  # It's a Sunday
                    first_sunday_idx = i
                    break
            
            if first_sunday_idx is not None and first_sunday_idx + 7 <= len(clean_seasonal):
                # Get a full week starting from Sunday
                sample_week = clean_seasonal.iloc[first_sunday_idx:first_sunday_idx+7]
                
                # Create day labels in proper order
                day_labels = day_order  
                
                # Match values to correct days
                day_values = []
                for i, date in enumerate(sample_week.index):
                    day_values.append(sample_week.iloc[i])
                
                bar_colors = [colors['weekend'], colors['weekday'], colors['weekday'], 
                              colors['weekday'], colors['weekday'], colors['weekday'],
                              colors['weekend']]  # Sun, M-F, Sat
                
                axes[1, 1].bar(day_labels, day_values, color=bar_colors)
                axes[1, 1].set_title("Weekly Seasonal Pattern (Starting Sunday)", fontsize=14, fontweight='bold')
                axes[1, 1].set_ylabel("Seasonal Effect", fontsize=12)
                axes[1, 1].grid(axis='y', alpha=0.3)
                
                # Add reference line at zero
                axes[1, 1].axhline(y=0, color='black', linestyle='-', alpha=0.3)
                
                for i, val in enumerate(day_values):
                    color = 'green' if val > 0 else 'red'
                    axes[1, 1].annotate(f"{val:+,.0f}", 
                                      xy=(i, val), 
                                      xytext=(0, 5 if val > 0 else -15),
                                      textcoords='offset points',
                                      ha='center', 
                                      color=color,
                                      fontweight='bold')
        
        fig.suptitle(f"Weekday vs Weekend Analysis: {col}", 
                    fontsize=16, fontweight="bold", y=0.98)
        
        weekday_weekend_stats = (
            f"Weekday Average: {weekday_data.mean():,.0f}\n"
            f"Weekend Average: {weekend_data.mean():,.0f}\n"
            f"Difference: {(weekend_data.mean() - weekday_data.mean()):+,.0f} ({pct_diff:+.1f}%)\n"
            f"Weekday Std Dev: {weekday_data.std():,.0f}\n"
            f"Weekend Std Dev: {weekend_data.std():,.0f}"
        )
        
        fig.text(0.02, 0.02, weekday_weekend_stats, transform=fig.transFigure,
                 verticalalignment='bottom', horizontalalignment='left',
                 bbox=props, fontsize=10)
        
        plt.tight_layout()
        plt.subplots_adjust(top=0.93)
        
        plt.show()
        
    except Exception as e:
        print(f"Error processing {col}: {str(e)}")