In [1]:
# Loading libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import os

In [2]:
# Data Loading and processing

output_dir = "outputs"
csv_path = os.path.join(output_dir, "cleandbf.csv")
cleaned_df = pd.read_csv(csv_path)
cleaned_df['FE_VIA'] = cleaned_df['FE_VIA'].astype(float)

csv_path = os.path.join(output_dir, "OD_ADULTS_Timeindex_Motive_location_tripexpansionfactors.csv")
final_df = pd.read_csv((csv_path), header=[0,1], low_memory=False)

In [3]:
# Trip Frequency calculations

# Defining time windows
def time_window_to_index(time_window):
    time_window = str(time_window).replace('–', '-').replace('—', '-')
    start_time = time_window.split('-')[0].strip()
    if ':' in start_time:
        hour, minute = map(int, start_time.split(':'))
    else:
        hour, minute = int(start_time), 0
    return hour * 4 + minute // 15

cleaned_df['dep_idx'] = cleaned_df['quarter_departure'].apply(time_window_to_index)
cleaned_df['arr_idx'] = cleaned_df['quarter_arrival'].apply(time_window_to_index)

# Generating 15-min interval labels
all_intervals = [f"{h:02d}:{m:02d}" for h in range(24) for m in [0, 15, 30, 45]]
interval_to_time = {i: t for i, t in enumerate(all_intervals)}

window_size = 3  # simple smoothing

# Aggregating, Normalising, and Smoothing
def get_smoothed_normalized(idx_col):
    vals = cleaned_df.groupby(idx_col)['FE_VIA'].sum().reindex(range(96), fill_value=0)
    norm_vals = vals / vals.sum() if vals.sum() > 0 else vals
    smooth = norm_vals.rolling(window=window_size, center=True, min_periods=1).mean()
    return smooth.values

dep_smoothed = get_smoothed_normalized('dep_idx')
arr_smoothed = get_smoothed_normalized('arr_idx')

# Plotting Departures and Arrivals

def plot_overall(data, title, ylabel, filename, ymax=0.08):
    fig, ax = plt.subplots(figsize=(20, 8))
    times = [interval_to_time[i] for i in range(96)]
    ax.bar(times, data, color="#3C5A99", width=0.8, alpha=0.8)
    hour_ticks = [interval_to_time[i] for i in range(0, 96, 4)]
    ax.set_xticks(hour_ticks)
    ax.set_xticklabels(hour_ticks, rotation=90, fontsize=18)
    ax.set_axisbelow(True)
    ax.grid(True, axis='y', linestyle='--', alpha=0.45)
    ax.set_ylim(0, ymax)
    ax.set_xlabel("Quarter-hour Interval", fontsize=18)
    ax.set_ylabel(ylabel, fontsize=18)
    ax.set_title(title, fontsize=22, pad=12)
    plt.tight_layout()
    plt.savefig(filename, dpi=300, bbox_inches='tight', pad_inches=0.07)
    plt.close(fig)

plot_overall(
    dep_smoothed,
    "Normalised Weighted Departures by Quarter-Hour",
    "Proportion of Weighted Departures",
    os.path.join(output_dir, "Normalised_weighted_departures_all_groups.png"),
    ymax=0.06
)

plot_overall(
    arr_smoothed,
    "Normalised Weighted Arrivals by Quarter-Hour",
    "Proportion of Weighted Arrivals",
    os.path.join(output_dir, "Normalised_weighted_arrivals_all_groups.png"),
    ymax=0.06
)

In [4]:
# Defining economic groups

def econ_group(code):
    code = int(code)
    if code == 1:
        return 'High (A)'
    elif code in [2, 3]:
        return 'Upper middle (B1/B2)'
    elif code in [4, 5]:
        return 'Lower middle (C1/C2)'
    elif code == 6:
        return 'Low (D/E)'
    else:
        return 'Unknown'

cleaned_df['FE_VIA'] = cleaned_df['FE_VIA'].astype(float)
cleaned_df['econ_group'] = cleaned_df['CRITERIOBR'].apply(econ_group)

groups = ['High (A)', 'Upper middle (B1/B2)', 'Lower middle (C1/C2)', 'Low (D/E)']
colors = ['#1b9e77', '#d95f02', '#7570b3', '#e7298a']  # colorblind-friendly
window_size = 3  # for rolling smoothing

# Preparing smoothed and normalised data
def get_smoothed_normalized_by_group(idx_col):
    smoothed = {}
    for group in groups:
        group_df = cleaned_df[cleaned_df['econ_group'] == group]
        vals = group_df.groupby(idx_col)['FE_VIA'].sum().reindex(range(96), fill_value=0)
        norm_vals = vals / vals.sum() if vals.sum() > 0 else vals
        smooth = norm_vals.rolling(window=window_size, center=True, min_periods=1).mean()
        smoothed[group] = smooth.values
    return smoothed

departures_by_group = get_smoothed_normalized_by_group('dep_idx')
arrivals_by_group = get_smoothed_normalized_by_group('arr_idx')

# Plotting Function
def plot_normalized_group(data_dict, title, ylabel, filename, ymax=0.06):
    fig, ax = plt.subplots(figsize=(20, 8))
    for group, color in zip(groups, colors):
        data = data_dict[group]
        ax.plot([interval_to_time[i] for i in range(96)], data, label=group, color=color, linewidth=2.5)
    ax.set_ylim(0, ymax)
    # Only hour labels:
    hour_ticks = [interval_to_time[i] for i in range(0, 96, 4)]
    ax.set_xticks(hour_ticks)
    ax.set_xticklabels(hour_ticks, rotation=90, ha='right', fontsize=16)
    ax.grid(True, axis='y', linestyle='--', alpha=0.5)
    ax.set_axisbelow(True)
    ax.legend(
        title="Economic Group",
        fontsize=16,
        title_fontsize=16,
        loc='lower center',
        bbox_to_anchor=(0.5, -0.40),
        ncol=len(groups),
        frameon=False
    )
    ax.set_title(title, fontsize=18, pad=10)
    ax.set_xlabel("Time of Day", fontsize=18)
    ax.set_ylabel(ylabel, fontsize=18)
    plt.tight_layout()
    plt.savefig(filename, dpi=300, bbox_inches='tight')
    plt.close(fig)

# Ploting both departures and arrivals
plot_normalized_group(
    departures_by_group,
    "Normalised Weighted Departures by Quarter-Hour (by Economic Group)",
    "Proportion of Weighted Departures",
    os.path.join(output_dir,"Normalised_weighted_departures_by_econ_group.png"),
    ymax=0.06
)

plot_normalized_group(
    arrivals_by_group,
    "Normalised Weighted Arrivals by Quarter-Hour (by Economic Group)",
    "Proportion of Weighted Arrivals",
    os.path.join(output_dir,"Normalised_weighted_arrivals_by_econ_group.png"),
    ymax=0.06
)


In [5]:
# Heatmap creation

motive_df = final_df.xs('motive', level=1, axis=1)
fevia_df = final_df.xs('tripexpfactor', level=1, axis=1)
motive_data = motive_df.replace('transit', np.nan).apply(pd.to_numeric, errors='coerce')
fevia_data = fevia_df.replace('transit', np.nan).apply(pd.to_numeric, errors='coerce')

# Interval labels
intervals = []
for i in range(96):
    start_hour = i // 4
    start_min = (i % 4) * 15
    end_hour = (i + 1) // 4
    end_min = ((i + 1) % 4) * 15 - 1
    if end_min < 0:
        end_min = 59
        end_hour -= 1
    start_label = f'{start_hour:02d}:{start_min:02d}'
    end_label = f'{end_hour:02d}:{end_min:02d}'
    intervals.append(f'{start_label}-{end_label}')

# Motive codes/labels 
plot_motives = [1, 2, 3, 4, 5, 6, 7, 10, 11]
motive_labels = {
    1: "Work-industry",
    2: "Work-Trade/Commerce",
    3: "Work-Service",
    4: "School/Education",
    5: "Shopping",
    6: "Healthcare",
    7: "Recreation/Visits/Leisure",
    8: "Residency",
    9: "Job Search",
    10: "Personal Matters",
    11: "Meals"
}

# Computing normalised matrix (ALL GROUPS) 
weighted_counts = pd.DataFrame(index=[motive_labels[m] for m in plot_motives], columns=intervals, data=0.0)
for m in plot_motives:
    mask = (motive_data == m)
    weighted_per_time = fevia_data.where(mask).sum(axis=0)
    weighted_counts.loc[motive_labels[m], :] = weighted_per_time.values

total = weighted_counts.values.sum()
weighted_counts_norm = weighted_counts / total if total > 0 else weighted_counts

# Finding global max value for colorbar scaling across ALL plots
all_max = weighted_counts_norm.values.max()

In [6]:
csv_path = os.path.join(output_dir, "OD_ADULTS_Timeindex_Motive_location_tripexpansionfactors.csv")
wide_df = pd.read_csv((csv_path), header=[0,1], low_memory=False)

csv_path = os.path.join(output_dir, "cleandbf.csv")
od_df = pd.read_csv((csv_path),dtype=str, low_memory=False)
od_df['FE_VIA'] = cleaned_df['FE_VIA'].astype(float)

od_df['econ_group'] = od_df['CRITERIOBR'].astype(int).apply(econ_group)
id_to_econ = od_df[['ID_PESS', 'econ_group']].drop_duplicates().set_index('ID_PESS')['econ_group']
wide_df[('ID_PESS', 'Unnamed: 0_level_1')] = wide_df[('ID_PESS', 'Unnamed: 0_level_1')].astype(str)
wide_df['econ_group'] = wide_df[('ID_PESS', 'Unnamed: 0_level_1')].map(id_to_econ)
groups = ['High (A)', 'Upper middle (B1/B2)', 'Lower middle (C1/C2)', 'Low (D/E)']

# Finding groupwise max for global normalisation (so all colorbars are the same)
all_group_max = []
group_norms = {}
for grp in groups:
    group_df = wide_df[wide_df['econ_group'] == grp]
    if group_df.empty:
        continue
    motive_df = group_df.xs('motive', level=1, axis=1)
    fevia_df = group_df.xs('tripexpfactor', level=1, axis=1)
    motive_data = motive_df.replace('transit', np.nan).apply(pd.to_numeric, errors='coerce')
    fevia_data = fevia_df.replace('transit', np.nan).apply(pd.to_numeric, errors='coerce')
    weighted_counts = pd.DataFrame(index=[motive_labels[m] for m in plot_motives], columns=intervals, data=0.0)
    for m in plot_motives:
        mask = (motive_data == m)
        weighted_per_time = fevia_data.where(mask).sum(axis=0)
        weighted_counts.loc[motive_labels[m], :] = weighted_per_time.values
    total = weighted_counts.values.sum()
    norm = weighted_counts / total if total > 0 else weighted_counts
    group_norms[grp] = norm
    all_group_max.append(norm.values.max())

# Taking the highest value across all plots
vmax = max([all_max] + all_group_max)
vmin = 0

hour_ticks = list(range(0, len(intervals), 4))
hour_labels = [intervals[i][:5] for i in hour_ticks] 

plt.figure(figsize=(14, 7))  # Suitable for A4 width (landscape), or increase for more detail
ax = sns.heatmap(
    weighted_counts_norm.astype(float),
    cmap='Spectral_r',
    vmin=vmin,
    vmax=vmax,
    cbar_kws={'label': 'Proportion of expansion-weighted trips'},
    linewidths=0.5,
    linecolor=(0.5, 0.5, 0.5, 0.3)
)

# Set only hourly ticks (every 4th label from intervals)
ax.set_xticks(hour_ticks)
ax.set_xticklabels(hour_labels, rotation=90, fontsize=14)

# Motive labels on y-axis
ax.set_yticklabels(weighted_counts_norm.index.tolist(), rotation=0, fontsize=14)

# Axis labels and title
plt.xlabel('Time of Day (Hour)', fontsize=16)
plt.ylabel('Trip Motive', fontsize=16)
plt.title('Normalized Weighted Trip Motive Frequency per Interval (ALL GROUPS)', fontsize=20, pad=16)

cbar = ax.collections[0].colorbar
cbar.set_label('Proportion of expansion-weighted trips', fontsize=16)

plt.tight_layout()
plt.savefig(os.path.join(output_dir,"weighted_motive_time_heatmap_NORMALISED_rainbow_READABLE.png"), dpi=300, bbox_inches='tight', pad_inches=0.09)
plt.close()


In [7]:
import matplotlib.pyplot as plt
import seaborn as sns

fig, axes = plt.subplots(nrows=4, ncols=1, figsize=(12, 20), sharex=True, sharey=True)
groups = ['High (A)', 'Upper middle (B1/B2)', 'Lower middle (C1/C2)', 'Low (D/E)']

# x-ticks: show only the hours
hour_ticks = list(range(0, len(intervals), 4))
hour_labels = [intervals[i][:5] for i in hour_ticks]  # Shows 'HH:MM'

for ax, grp in zip(axes.flat, groups):
    norm_data = group_norms.get(grp)
    if norm_data is not None and not norm_data.isnull().all().all():
        sns.heatmap(
            norm_data,
            cmap='Spectral_r',
            vmin=vmin,
            vmax=vmax,
            cbar=False,
            linewidths=0.5,
            linecolor=(0.5, 0.5, 0.5, 0.3),
            ax=ax
        )
        ax.set_title(grp, fontsize=18, pad=10)
        ax.set_xlabel('')
        ax.set_ylabel('')
        ax.set_xticks(hour_ticks)
        ax.set_xticklabels(hour_labels, rotation=90, fontsize=14)
        ax.set_yticklabels(norm_data.index.tolist(), rotation=0, fontsize=14)
    else:
        ax.axis('off')

# Common labels and a single colorbar
fig.text(0.5, 0.07, 'Time of Day (Hour)', ha='center', fontsize=16)
fig.text(0.01, 0.5, 'Trip Motive', va='center', rotation='vertical', fontsize=16)
cbar_ax = fig.add_axes([0.93, 0.15, 0.026, 0.7])
from matplotlib import cm
norm = plt.Normalize(vmin=vmin, vmax=vmax)
sm = cm.ScalarMappable(cmap='Spectral_r', norm=norm)
sm.set_array([])
cbar = fig.colorbar(sm, cax=cbar_ax, label='Proportion of expansion-weighted trips')
cbar.set_label('Proportion of expansion-weighted trips', fontsize=14)
plt.suptitle('Normalized Weighted Trip Motive Frequency per Interval by Economic Group', fontsize=21, y=0.97)
plt.tight_layout(rect=[0, 0.08, 0.92, 0.94])  # leave space for labels and colorbar
plt.savefig(os.path.join(output_dir,'combined_econ_group_heatmaps_NORMALISED_rainbow_vertical.png'), dpi=300, bbox_inches='tight', pad_inches=0.07)
plt.close()


  plt.tight_layout(rect=[0, 0.08, 0.92, 0.94])  # leave space for labels and colorbar


In [9]:
# Per Economic Groups

final_df['econ_group'] = cleaned_df['CRITERIOBR'].astype(int).apply(econ_group)
id_to_econ = (
    final_df[[('ID_PESS', 'Unnamed: 0_level_1'), ('econ_group', '')]]
    .drop_duplicates()
    .set_index(('ID_PESS', 'Unnamed: 0_level_1'))[('econ_group', '')]
)
id_to_econ.index = id_to_econ.index.astype(str)

final_df[('ID_PESS', 'Unnamed: 0_level_1')] = final_df[('ID_PESS', 'Unnamed: 0_level_1')].astype(str)
final_df[('econ_group', '')] = final_df[('ID_PESS', 'Unnamed: 0_level_1')].map(id_to_econ)

groups = ['High (A)', 'Upper middle (B1/B2)', 'Lower middle (C1/C2)', 'Low (D/E)']

# Finding groupwise max for global normalisation (so all colorbars are the same)
all_group_max = []
group_norms = {}
for grp in groups:
    group_df = final_df[final_df['econ_group'] == grp]
    if group_df.empty:
        continue
    motive_df = group_df.xs('motive', level=1, axis=1)
    fevia_df = group_df.xs('tripexpfactor', level=1, axis=1)
    motive_data = motive_df.replace('transit', np.nan).apply(pd.to_numeric, errors='coerce')
    fevia_data = fevia_df.replace('transit', np.nan).apply(pd.to_numeric, errors='coerce')
    weighted_counts = pd.DataFrame(index=[motive_labels[m] for m in plot_motives], columns=intervals, data=0.0)
    for m in plot_motives:
        mask = (motive_data == m)
        weighted_per_time = fevia_data.where(mask).sum(axis=0)
        weighted_counts.loc[motive_labels[m], :] = weighted_per_time.values
    total = weighted_counts.values.sum()
    norm = weighted_counts / total if total > 0 else weighted_counts
    group_norms[grp] = norm
    all_group_max.append(norm.values.max())

# Taking highest value across all plots 
vmax = max([all_max] + all_group_max)
vmin = 0


hour_ticks = list(range(0, len(intervals), 4))
hour_labels = [intervals[i][:5] for i in hour_ticks] 

plt.figure(figsize=(14, 7))  
ax = sns.heatmap(
    weighted_counts_norm.astype(float),
    cmap='Spectral_r',
    vmin=vmin,
    vmax=vmax,
    cbar_kws={'label': 'Proportion of expansion-weighted trips'},
    linewidths=0.5,
    linecolor=(0.5, 0.5, 0.5, 0.3)
)

ax.set_xticks(hour_ticks)
ax.set_xticklabels(hour_labels, rotation=90, fontsize=14)

ax.set_yticklabels(weighted_counts_norm.index.tolist(), rotation=0, fontsize=14)

plt.xlabel('Time of Day (Hour)', fontsize=16)
plt.ylabel('Trip Motive', fontsize=16)
plt.title('Normalized Weighted Trip Motive Frequency per Interval (ALL GROUPS)', fontsize=20, pad=16)

cbar = ax.collections[0].colorbar
cbar.set_label('Proportion of expansion-weighted trips', fontsize=16)

plt.tight_layout()
plt.savefig(os.path.join(output_dir,"weighted_motive_time_heatmap_NORMALISED_rainbow_READABLE.png"), dpi=300, bbox_inches='tight', pad_inches=0.09)
plt.close()


In [10]:
import matplotlib.pyplot as plt
import seaborn as sns
from matplotlib import cm

# Prepa

def plot_vertical_econ_heatmap(group_norms, groups, intervals, vmin, vmax, output_dir, filename):
    fig, axes = plt.subplots(nrows=4, ncols=1, figsize=(12, 20), sharex=True, sharey=True)
    hour_ticks = list(range(0, len(intervals), 4))
    hour_labels = [intervals[i][:5] for i in hour_ticks]
    for ax, grp in zip(axes.flat, groups):
        norm_data = group_norms.get(grp)
        if norm_data is not None and not norm_data.isnull().all().all():
            sns.heatmap(
                norm_data,
                cmap='Spectral_r',
                vmin=vmin,
                vmax=vmax,
                cbar=False,
                linewidths=0.5,
                linecolor=(0.5, 0.5, 0.5, 0.3),
                ax=ax
            )
            ax.set_title(grp, fontsize=18, pad=10)
            ax.set_xlabel('')
            ax.set_ylabel('')
            ax.set_xticks(hour_ticks)
            ax.set_xticklabels(hour_labels, rotation=90, fontsize=14)
            ax.set_yticklabels(norm_data.index.tolist(), rotation=0, fontsize=14)
        else:
            ax.axis('off')
    fig.text(0.5, 0.07, 'Time of Day (Hour)', ha='center', fontsize=16)
    fig.text(0.01, 0.5, 'Trip Motive', va='center', rotation='vertical', fontsize=16)
    cbar_ax = fig.add_axes([0.93, 0.15, 0.026, 0.7])
    norm = plt.Normalize(vmin=vmin, vmax=vmax)
    sm = cm.ScalarMappable(cmap='Spectral_r', norm=norm)
    sm.set_array([])
    cbar = fig.colorbar(sm, cax=cbar_ax)
    cbar.set_label('Proportion of expansion-weighted trips', fontsize=14)
    plt.suptitle('Normalized Weighted Trip Motive Frequency per Interval by Economic Group', fontsize=21, y=0.97)
    plt.tight_layout(rect=[0, 0.08, 0.92, 0.94])
    plt.savefig(os.path.join(output_dir, filename), dpi=300, bbox_inches='tight', pad_inches=0.07)
    plt.close()


plot_vertical_econ_heatmap(
    group_norms, groups, intervals, vmin, vmax,
    output_dir, "combined_econ_group_heatmaps_NORMALISED_rainbow_vertical.png"
)

  plt.tight_layout(rect=[0, 0.08, 0.92, 0.94])
