In [50]:
#Clustering Dashboard Page

In [51]:
import os

os.getcwd()

from pathlib import Path

# Project root = two levels up from notebooks/EDA
PROJECT_ROOT = Path.cwd().parents[1]

In [52]:
DATA_PROCESSED = PROJECT_ROOT / "data"

DATA_PROCESSED.mkdir(parents=True, exist_ok=True)

In [53]:
import pandas as pd

clusters_df = pd.read_csv(DATA_PROCESSED / "pca_df.csv")
clusters_df.head()

Unnamed: 0,route_id,cluster
0,B11,1
1,B25,1
2,B26,2
3,B35,1
4,B41,1


In [54]:
speedjrny_df = pd.read_csv(DATA_PROCESSED / "df_speedjrny.csv")
speedjrny_df.head()

Unnamed: 0,route_id,speed_peak_total_effect_pct,speed_overall_total_effect_pct,journeyperformance_overall_total_effect_pct,journeyperformance_off_peak_total_effect_pct,journeyperformance_peak_total_effect_pct,speed_offpeak_total_effect_pct
0,B11,0.341355,1.630937,4.905607,3.947306,5.977799,2.028047
1,B25,0.954338,1.063916,1.206224,0.10481,2.749699,-0.437916
2,B26,4.963512,5.937148,7.927406,4.98085,10.939128,7.085533
3,B35,0.912781,0.462497,-0.833366,-0.801084,-1.138577,0.251281
4,B41,3.231192,3.066334,2.49747,3.098419,1.771663,2.988319


In [55]:
merged_df = speedjrny_df.merge(
    clusters_df,
    on="route_id",
    how="inner"
)

merged_df.head()

Unnamed: 0,route_id,speed_peak_total_effect_pct,speed_overall_total_effect_pct,journeyperformance_overall_total_effect_pct,journeyperformance_off_peak_total_effect_pct,journeyperformance_peak_total_effect_pct,speed_offpeak_total_effect_pct,cluster
0,B11,0.341355,1.630937,4.905607,3.947306,5.977799,2.028047,1
1,B25,0.954338,1.063916,1.206224,0.10481,2.749699,-0.437916,1
2,B26,4.963512,5.937148,7.927406,4.98085,10.939128,7.085533,2
3,B35,0.912781,0.462497,-0.833366,-0.801084,-1.138577,0.251281,1
4,B41,3.231192,3.066334,2.49747,3.098419,1.771663,2.988319,1


In [56]:
merged_df = merged_df.assign(
    route_id=lambda d: d["route_id"].astype(str),
    cluster=lambda d: d["cluster"].astype(int),
    speed_overall_total_effect_pct=lambda d: d["speed_overall_total_effect_pct"]
        .astype(float)
        .round(2),
    journeyperformance_overall_total_effect_pct=lambda d: d["journeyperformance_overall_total_effect_pct"]
        .astype(float)
        .round(2),
)

merged_df.head()

Unnamed: 0,route_id,speed_peak_total_effect_pct,speed_overall_total_effect_pct,journeyperformance_overall_total_effect_pct,journeyperformance_off_peak_total_effect_pct,journeyperformance_peak_total_effect_pct,speed_offpeak_total_effect_pct,cluster
0,B11,0.341355,1.63,4.91,3.947306,5.977799,2.028047,1
1,B25,0.954338,1.06,1.21,0.10481,2.749699,-0.437916,1
2,B26,4.963512,5.94,7.93,4.98085,10.939128,7.085533,2
3,B35,0.912781,0.46,-0.83,-0.801084,-1.138577,0.251281,1
4,B41,3.231192,3.07,2.5,3.098419,1.771663,2.988319,1


In [57]:
id_col = "route_id"
cluster_col = "cluster"

effect_cols = [
    "speed_peak_total_effect_pct",
    "speed_overall_total_effect_pct",
    "journeyperformance_overall_total_effect_pct",
    "journeyperformance_off_peak_total_effect_pct",
    "journeyperformance_peak_total_effect_pct",
    "speed_offpeak_total_effect_pct",
]

merged_df = merged_df.copy()

# Route ID â†’ string
merged_df[id_col] = merged_df[id_col].astype(str)

# Cluster â†’ integer
merged_df[cluster_col] = merged_df[cluster_col].astype(int)

# Effects â†’ float, rounded to 2 decimals
merged_df[effect_cols] = (
    merged_df[effect_cols]
        .astype(float)
        .round(2)
)

merged_df.head(2)

Unnamed: 0,route_id,speed_peak_total_effect_pct,speed_overall_total_effect_pct,journeyperformance_overall_total_effect_pct,journeyperformance_off_peak_total_effect_pct,journeyperformance_peak_total_effect_pct,speed_offpeak_total_effect_pct,cluster
0,B11,0.34,1.63,4.91,3.95,5.98,2.03,1
1,B25,0.95,1.06,1.21,0.1,2.75,-0.44,1


In [58]:
# Converting to TypeScript
df = merged_df.copy()

df = df.rename(columns={
    "route_id": "routeId",
    "speed_overall_total_effect_pct": "speedOverall",
    "speed_peak_total_effect_pct": "speedPeak",
    "speed_offpeak_total_effect_pct": "speedOffPeak",
    "journeyperformance_overall_total_effect_pct": "jrnyOverall",
    "journeyperformance_peak_total_effect_pct": "jrnyPeak",
    "journeyperformance_off_peak_total_effect_pct": "jrnyOffPeak"
})

# Convert NaN â†’ None
df = df.where(pd.notnull(df), None)


import math

def to_ts_value(v):
    if v is None:
        return "null"
    if isinstance(v, float) and math.isnan(v):
        return "null"
    return round(float(v), 4)


rows = []

for _, r in df.iterrows():
    rows.append(
        f"""  {{
    routeId: "{r.routeId}",
    speedOverall: {to_ts_value(r.speedOverall)},
    speedPeak: {to_ts_value(r.speedPeak)},
    speedOffPeak: {to_ts_value(r.speedOffPeak)},
    jrnyOverall: {to_ts_value(r.jrnyOverall)},
    jrnyPeak: {to_ts_value(r.jrnyPeak)},
    jrnyOffPeak: {to_ts_value(r.jrnyOffPeak)}
  }}"""
    )

# ðŸ”§ FIX: join rows *outside* the f-string
rows_joined = ",\n".join(rows)

ts = f"""
export type SpeedJrny = {{
  routeId: string;
  speedOverall: number | null;
  speedPeak: number | null;
  speedOffPeak: number | null;
  jrnyOverall: number | null; 
  jrnyPeak: number | null;
  jrnyOffPeak: number | null;
}};

export const speedJrny: SpeedJrnyRow[] = [
{rows_joined}
];
"""

output_path = (
    "/Users/danielbrown/Desktop/mta-ace-buses/src/data/processed/SpeedJrny.ts"
)

with open(output_path, "w") as f:
    f.write(ts)

print("âœ… SpeedJrny.ts written")


âœ… SpeedJrny.ts written
