In [57]:
from pathlib import Path
import numpy as np
import pandas as pd
import torch

from modechoice.config import MODES
from modechoice.io import load_bundle
from modechoice.pipeline import ModeChoicePipeline
from modechoice.model import NestedLogitHetero
from modechoice.tensors import Standardizer
from modechoice.data import ensure_long_format
from modechoice.features import add_relative_features_long
from modechoice.tensors import build_choice_tensors_hetero

from modechoice.pricing_population import make_mask, run_scenario_grid

In [None]:
ROOT = Path.cwd()
if not (ROOT / "pyproject.toml").exists():
    ROOT = ROOT.parent

DEVICE = "cpu"
torch.set_default_dtype(torch.float32)

BUNDLE_DIR = ROOT / "artifacts" / "modechoice_bundle"
DATA_PATH  = ROOT / "dataset" / "ModeCanada.csv"

# print("ROOT:", ROOT) # uncomment to see
print("BUNDLE_DIR exists:", BUNDLE_DIR.exists())
print("DATA_PATH exists:", DATA_PATH.exists())

BUNDLE_DIR exists: True
DATA_PATH exists: True


In [69]:
pipe = load_bundle(
    BUNDLE_DIR,
    PipelineCls=ModeChoicePipeline,
    ModelCls=NestedLogitHetero,
    ScalerCls=Standardizer,
)

pipe.model.asc, pipe.model.beta  # sanity
print("Loaded feat_names:", len(pipe.scaler.feat_names))
print("Feats:", pipe.scaler.feat_names)
print("Lambdas:", pipe.model.lambdas())

Loaded feat_names: 14
Feats: ['cost', 'ivt', 'ovt', 'freq', 'urban_x_ovt', 'gen_time', 'income_train', 'urban_train', 'income_car', 'urban_car', 'income_bus', 'urban_bus', 'income_air', 'urban_air']
Lambdas: {'air': 0.6149392127990723, 'land': 0.9137387871742249}


In [60]:
pipe.scaler.mu = np.asarray(pipe.scaler.mu, dtype=np.float32)
pipe.scaler.sd = np.asarray(pipe.scaler.sd, dtype=np.float32)

In [None]:
feat_names = pipe.scaler.feat_names

per_mode_feats = set()
for m in MODES:
    per_mode_feats.add(f"income_{m}")
    per_mode_feats.add(f"urban_{m}")

item_feat_names = [f for f in feat_names if f not in per_mode_feats]

print("Derived item_feat_names:", item_feat_names)
print("Derived D_item:", len(item_feat_names))
print("Total D_model:", len(feat_names))

Derived item_feat_names: ['cost', 'ivt', 'ovt', 'freq', 'urban_x_ovt', 'gen_time']
Derived D_item: 6
Total D_model: 14


In [None]:
df_raw = pd.read_csv(DATA_PATH)
df_long = ensure_long_format(df_raw)

df_long = add_relative_features_long(df_long, w_ovt=2.0, freq_period_minutes=1440.0)

all_t, _ = build_choice_tensors_hetero(df_long, scaler=pipe.scaler, fit_scaler=False, item_feat_names=item_feat_names)

assert all_t["feat_names"] == pipe.scaler.feat_names, "Feature mismatch vs trained scaler!"

print("all_t keys:", all_t.keys())
print("X_item:", all_t["X_item"].shape)
print("X_item_orig:", all_t["X_item_orig"].shape)
print("avail:", all_t["avail"].shape)
print("y:", all_t["y"].shape)

all_t keys: dict_keys(['X_item', 'X_item_orig', 'avail', 'y', 'cases', 'feat_names'])
X_item: torch.Size([4324, 4, 14])
X_item_orig: (4324, 4, 14)
avail: torch.Size([4324, 4])
y: torch.Size([4324])


In [None]:
# Capacity vector must be length 4 in MODES order: [train, car, bus, air]
cap = torch.tensor([1000.0, 2000.0, 800.0, 600.0], dtype=torch.float32)

# Decide what to control
control_modes = ["train", "air"]
controllable_mask = make_mask(control_modes)  # torch.bool length 4

# Build scenario vectors (exogenous shocks) in MODES order.
#   if controllable_mask is not None:
#       controlled -> decision_mult (ignores scenario shock)
#       uncontrolled -> scenario_mult
scenario_mult_list = [
    [1.00, 1.00, 1.00, 1.00],  # baseline
    [1.00, 1.05, 1.00, 1.00],  # car +5% exogenous
    [1.00, 0.95, 1.10, 1.00],  # car -5%, bus +10%
    [1.00, 1.10, 1.05, 1.00],  # car +10%, bus +5%
]

In [64]:
asc, beta, raw_la, raw_ll = pipe.model.asc, pipe.model.beta, pipe.model.raw_lam_air, pipe.model.raw_lam_land

grid_df = run_scenario_grid(
    tensors=all_t,
    scaler=pipe.scaler,
    asc=asc, beta=beta, raw_la=raw_la, raw_ll=raw_ll,
    cap=cap,
    scenario_mult_list=scenario_mult_list,
    controllable_mask=controllable_mask,
    steps=300,
    lr=0.05,
    k_smooth=30.0,
    warm_start=True,
    verbose_every=0,
)

grid_df.head()

Unnamed: 0,scenario_id,revenue_total,scenario_mult_train,decision_mult_train,total_mult_train,avg_price_train,demand_train,sold_train,revenue_train,scenario_mult_car,...,demand_bus,sold_bus,revenue_bus,scenario_mult_air,decision_mult_air,total_mult_air,avg_price_air,demand_air,sold_air,revenue_air
0,0,428896.09375,1.0,2.04159,2.04159,111.668533,94.681328,94.681328,10572.924805,1.0,...,23.786421,23.786421,609.536316,1.0,1.016179,1.016179,160.170624,1620.691162,1586.668091,254137.625
1,1,443151.375,1.0,2.097899,2.097899,114.748413,94.408249,94.408249,10833.196289,1.05,...,27.20035,27.20035,697.019592,1.0,1.042366,1.042366,164.298141,1614.134277,1580.329834,259645.25
2,2,414777.75,1.0,1.986605,1.986605,108.660995,94.852798,94.852798,10306.799805,0.95,...,18.667549,18.667549,526.199524,1.0,0.990293,0.990293,156.090424,1627.125732,1592.888184,248634.59375
3,3,457515.6875,1.0,2.155267,2.155267,117.886299,94.04911,94.04911,11087.101562,1.1,...,29.456518,29.456518,792.576477,1.0,1.068815,1.068815,168.467148,1607.460571,1573.87854,265146.84375


In [65]:
cols = [
    "scenario_id", "revenue_total",
    "scenario_mult_train","scenario_mult_car","scenario_mult_bus","scenario_mult_air",
    "decision_mult_train","decision_mult_car","decision_mult_bus","decision_mult_air",
    "total_mult_train","total_mult_car","total_mult_bus","total_mult_air",
    "avg_price_train","avg_price_air",
    "demand_train","demand_air",
    "sold_train","sold_air",
    "revenue_train","revenue_air",
]
top = grid_df.sort_values("revenue_total", ascending=False).head(10)
top[cols].round(4)

Unnamed: 0,scenario_id,revenue_total,scenario_mult_train,scenario_mult_car,scenario_mult_bus,scenario_mult_air,decision_mult_train,decision_mult_car,decision_mult_bus,decision_mult_air,...,total_mult_bus,total_mult_air,avg_price_train,avg_price_air,demand_train,demand_air,sold_train,sold_air,revenue_train,revenue_air
3,3,457515.6875,1.0,1.1,1.05,1.0,2.1553,1.0,1.0,1.0688,...,1.05,1.0688,117.8863,168.4671,94.0491,1607.4606,94.0491,1573.8785,11087.1016,265146.8438
1,1,443151.375,1.0,1.05,1.0,1.0,2.0979,1.0,1.0,1.0424,...,1.0,1.0424,114.7484,164.2981,94.4082,1614.1343,94.4082,1580.3298,10833.1963,259645.25
0,0,428896.0938,1.0,1.0,1.0,1.0,2.0416,1.0,1.0,1.0162,...,1.0,1.0162,111.6685,160.1706,94.6813,1620.6912,94.6813,1586.6681,10572.9248,254137.625
2,2,414777.75,1.0,0.95,1.1,1.0,1.9866,1.0,1.0,0.9903,...,1.1,0.9903,108.661,156.0904,94.8528,1627.1257,94.8528,1592.8882,10306.7998,248634.5938


In [66]:
modes = MODES
rows = []
for _, r in grid_df.iterrows():
    for m in modes:
        rows.append({
            "scenario_id": int(r["scenario_id"]),
            "mode": m,
            "scenario_mult": float(r[f"scenario_mult_{m}"]),
            "decision_mult": float(r[f"decision_mult_{m}"]),
            "total_mult": float(r[f"total_mult_{m}"]),
            "avg_price": float(r[f"avg_price_{m}"]),
            "demand": float(r[f"demand_{m}"]),
            "sold": float(r[f"sold_{m}"]),
            "revenue": float(r[f"revenue_{m}"]),
            "revenue_total": float(r["revenue_total"]),
        })

long_df = pd.DataFrame(rows)
long_df

Unnamed: 0,scenario_id,mode,scenario_mult,decision_mult,total_mult,avg_price,demand,sold,revenue,revenue_total
0,0,train,1.0,2.04159,2.04159,111.668533,94.681328,94.681328,10572.924805,428896.09375
1,0,car,1.0,1.0,1.0,63.763718,2584.84082,2565.346191,163576.015625,428896.09375
2,0,bus,1.0,1.0,1.0,25.625391,23.786421,23.786421,609.536316,428896.09375
3,0,air,1.0,1.016179,1.016179,160.170624,1620.691162,1586.668091,254137.625,428896.09375
4,1,train,1.0,2.097899,2.097899,114.748413,94.408249,94.408249,10833.196289,443151.375
5,1,car,1.05,1.0,1.05,66.951904,2588.257324,2568.648682,171975.921875,443151.375
6,1,bus,1.0,1.0,1.0,25.625391,27.20035,27.20035,697.019592,443151.375
7,1,air,1.0,1.042366,1.042366,164.298141,1614.134277,1580.329834,259645.25,443151.375
8,2,train,1.0,1.986605,1.986605,108.660995,94.852798,94.852798,10306.799805,414777.75
9,2,car,0.95,1.0,0.95,60.575531,2583.354004,2563.908936,155310.140625,414777.75


In [67]:
policy_view = (
    long_df[long_df["mode"].isin(control_modes)]
    .sort_values(["scenario_id","mode"])
    .pivot_table(index="scenario_id", columns="mode", values=["decision_mult","avg_price","sold","revenue"], aggfunc="first")
)
policy_view.round(4)

Unnamed: 0_level_0,avg_price,avg_price,decision_mult,decision_mult,revenue,revenue,sold,sold
mode,air,train,air,train,air,train,air,train
scenario_id,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2
0,160.1706,111.6685,1.0162,2.0416,254137.625,10572.9248,1586.6681,94.6813
1,164.2981,114.7484,1.0424,2.0979,259645.25,10833.1963,1580.3298,94.4082
2,156.0904,108.661,0.9903,1.9866,248634.5938,10306.7998,1592.8882,94.8528
3,168.4671,117.8863,1.0688,2.1553,265146.8438,11087.1016,1573.8785,94.0491
