In [3]:
import pandas as pd
from biogeme.database import Database
import biogeme.biogeme as bio
from biogeme import models
from biogeme.expressions import Beta, Variable, log

# ------------------------------------------------------------
# 1) Load Excel and merge
# ------------------------------------------------------------
xls_path = r"C:\Users\parit\Downloads\Entry_form.xlsx"
resp = pd.read_excel(xls_path, sheet_name="Responses")
demo = pd.read_excel(xls_path, sheet_name="Demographics")
df = resp.merge(demo, on="participant_number", how="left")
df.columns = [c.strip() for c in df.columns]

# ------------------------------------------------------------
# 2) Required columns
# ------------------------------------------------------------
required_cols = [
    "participant_number","choice_set","choice",
    # A attributes
    "A_D2D","A_D2E","A_O","A_CD","A_CP",
    "A_CTD_green","A_CTD_yellow","A_CTD_red",
    "A_D","A_T2DR","A_T2DS","A_TS","A_TR_doorchange","A_TR_nochange",
    # B attributes
    "B_D2D","B_D2E","B_O","B_CD","B_CP",
    "B_CTD_green","B_CTD_yellow","B_CTD_red",
    "B_D","B_T2DR","B_T2DS","B_TS","B_TR_doorchange","B_TR_nochange",
]
missing = [c for c in required_cols if c not in df.columns]
if missing:
    raise ValueError(f"Missing columns: {missing}")

# ------------------------------------------------------------
# 3) Helper
# ------------------------------------------------------------
def norm_choice(x):
    return "" if pd.isna(x) else str(x).strip().lower()

# ------------------------------------------------------------
# 4) Wide → Long
# ------------------------------------------------------------
rows = []
for _, r in df.iterrows():
    ch = norm_choice(r["choice"])

    # Alt 1: Door A
    rows.append({
        "participant_number": r["participant_number"],
        "choice_set": r["choice_set"],
        "alt_id": 1,
        "chosen": 1 if ch in ["door a","a"] else 0,
        "D2D": r["A_D2D"], "D2E": r["A_D2E"], "O": r["A_O"],
        "CD": r["A_CD"], "CP": r["A_CP"],
        "CTD_green": r["A_CTD_green"], "CTD_yellow": r["A_CTD_yellow"], "CTD_red": r["A_CTD_red"],
        "D": r["A_D"], "T2DR": r["A_T2DR"], "T2DS": r["A_T2DS"], "TS": r["A_TS"],
        "TR_doorchange": r["A_TR_doorchange"], "TR_nochange": r["A_TR_nochange"],
        "avail_A": 1, "avail_B": 1, "avail_None": 1,
    })

    # Alt 2: Door B
    rows.append({
        "participant_number": r["participant_number"],
        "choice_set": r["choice_set"],
        "alt_id": 2,
        "chosen": 1 if ch in ["door b","b"] else 0,
        "D2D": r["B_D2D"], "D2E": r["B_D2E"], "O": r["B_O"],
        "CD": r["B_CD"], "CP": r["B_CP"],
        "CTD_green": r["B_CTD_green"], "CTD_yellow": r["B_CTD_yellow"], "CTD_red": r["B_CTD_red"],
        "D": r["B_D"], "T2DR": r["B_T2DR"], "T2DS": r["B_T2DS"], "TS": r["B_TS"],
        "TR_doorchange": r["B_TR_doorchange"], "TR_nochange": r["B_TR_nochange"],
        "avail_A": 1, "avail_B": 1, "avail_None": 1,
    })

    # Alt 3: None of both
    rows.append({
        "participant_number": r["participant_number"],
        "choice_set": r["choice_set"],
        "alt_id": 3,
        "chosen": 1 if ch in ["none of both","none","no choice"] else 0,
        # All attributes neutral for opt-out
        "D2D": 0, "D2E": 0, "O": 0, "CD": 0, "CP": 0,
        "CTD_green": 0, "CTD_yellow": 0, "CTD_red": 0,
        "D": 0, "T2DR": 0, "T2DS": 0, "TS": 0,
        "TR_doorchange": 0, "TR_nochange": 0, 
        "avail_A": 1, "avail_B": 1, "avail_None": 1,
    })

long = pd.DataFrame(rows)

# ------------------------------------------------------------
# 5) Keep only valid choice situations
# ------------------------------------------------------------
grp = long.groupby(["participant_number","choice_set"])["chosen"].sum()
bad = grp[grp != 1].index
if len(bad):
    bad_idx = pd.MultiIndex.from_tuples(bad, names=["participant_number","choice_set"])
    mask = long.set_index(["participant_number","choice_set"]).index.isin(bad_idx)
    long = long[~mask].copy()

# CHOICE variable
choice_map = long[long["chosen"]==1].set_index(["participant_number","choice_set"])["alt_id"]
long["CHOICE"] = long.set_index(["participant_number","choice_set"]).index.map(choice_map)

# ------------------------------------------------------------
# 6) Save to CSV for Biogeme
# ------------------------------------------------------------
long.to_csv("dce_long_format_with_TRnone.csv", index=False)
print("Saved: dce_long_format_with_TRnone.csv")
print(long.head())


Saved: dce_long_format_with_TRnone.csv
   participant_number  choice_set  alt_id  chosen  D2D  D2E  O  CD  CP  \
0                   0           1       1       0   30   40  0   0   4   
1                   0           1       2       1   70    0  1   0   4   
2                   0           1       3       0    0    0  0   0   0   
3                   0           2       1       1   30   80  0   5   1   
4                   0           2       2       0   10    0  0  10   4   

   CTD_green  ...   D  T2DR  T2DS  TS  TR_doorchange  TR_nochange  avail_A  \
0          1  ...   0     4     0   0              1            0        1   
1          0  ...  10     4     0   0              0            1        1   
2          0  ...   0     0     0   0              0            0        1   
3          0  ...  25     4    10   1              1            0        1   
4          1  ...  10     4    10   1              1            0        1   

   avail_B  avail_None  CHOICE  
0        1    

In [5]:
import pandas as pd
from biogeme.database import Database
import biogeme.biogeme as bio
from biogeme import models
from biogeme.expressions import Beta, Variable, log

# ------------------------------------------------------------
# 1) Load the long-format dataset
# ------------------------------------------------------------
df = pd.read_csv("dce_long_format_with_TRnone.csv")
database = Database('door_choice', df)

# ------------------------------------------------------------
# 2) Variables
# ------------------------------------------------------------
CHOICE     = Variable('CHOICE')
alt_id     = Variable('alt_id')

# Core attributes
D2D        = Variable('D2D')
D2E        = Variable('D2E')
O          = Variable('O')
CD         = Variable('CD')
CP         = Variable('CP')
CTD_green  = Variable('CTD_green')
CTD_yellow = Variable('CTD_yellow')
CTD_red    = Variable('CTD_red')

# New attributes
D           = Variable('D')
T2DR        = Variable('T2DR')
T2DS        = Variable('T2DS')
TS          = Variable('TS')
TR_doorchange = Variable('TR_doorchange')
TR_nochange   = Variable('TR_nochange')

# Availability
avail_A    = Variable('avail_A')
avail_B    = Variable('avail_B')
avail_None = Variable('avail_None')

# ------------------------------------------------------------
# 3) Parameters
# ------------------------------------------------------------

# ----- Alternative-specific constants -----
ASC_A    = Beta('ASC_A',    0.0,  None, None, 1)   # base, fixed
ASC_B    = Beta('ASC_B',    0.0,  None, None, 0)
ASC_None = Beta('ASC_None', 0.0,  None, None, 0)

# ====== Core attributes (near-zero starts, sign only) ======
b_D2D        = Beta('b_D2D',       -1e-3, None, None, 0)  # farther door → worse
b_D2E        = Beta('b_D2E',       -1e-3, None, None, 0)  # farther exit → worse
b_O          = Beta('b_O',         -1e-3, None, None, 0)  # obstacle → worse
b_CD         = Beta('b_CD',        -1e-3, None, None, 0)  # more people at door → worse
b_CP         = Beta('b_CP',        -1e-3, None, None, 0)  # denser platform → worse

# CTD informational cues (relative to None)
b_CTD_green  = Beta('b_CTD_green',  +1e-3, None, None, 0) # green = good
b_CTD_yellow = Beta('b_CTD_yellow', +1e-3, None, None, 0) # weakly positive / neutral
b_CTD_red    = Beta('b_CTD_red',    -1e-3, None, None, 0) # red = bad

# ====== New attributes (near-zero starts, sign only) ======
# Door-level discount (more discount → better)
b_D            = Beta('b_D',             +1e-3, None, None, 0)

# Timing: more time to next/current departure typically reduces urgency → (weakly) positive
# If your prior logic was “more wait is worse”, flip these to -1e-3.
b_T2DR         = Beta('b_T2DR',          +1e-3, None, None, 0)  # time to depart (this train)
b_T2DS         = Beta('b_T2DS',          +1e-3, None, None, 0)  # time until next
b_TS           = Beta('b_TS',            -1e-3, None, None, 0)  # next-train indicator; often less attractive than current

# Transfer history (reference = no transfer):
# If both TR dummies are 0 → "no transfer" (baseline). Positive means people didn’t mind it.
b_TR_doorchange = Beta('b_TR_doorchange', +1e-3, None, None, 0)
b_TR_nochange   = Beta('b_TR_nochange',   +1e-3, None, None, 0)

# ------------------------------------------------------------
# 4) Utility functions
# ------------------------------------------------------------
VA = (alt_id == 1) * (ASC_A
    + b_D2D*D2D + b_D2E*D2E + b_O*O + b_CD*CD + b_CP*CP
    + b_CTD_green*CTD_green + b_CTD_yellow*CTD_yellow + b_CTD_red*CTD_red
    + b_D*D + b_T2DR*T2DR + b_T2DS*T2DS + b_TS*TS
    + b_TR_doorchange*TR_doorchange + b_TR_nochange*TR_nochange 
)

VB = (alt_id == 2) * (ASC_B
    + b_D2D*D2D + b_D2E*D2E + b_O*O + b_CD*CD + b_CP*CP
    + b_CTD_green*CTD_green + b_CTD_yellow*CTD_yellow + b_CTD_red*CTD_red
    + b_D*D + b_T2DR*T2DR + b_T2DS*T2DS + b_TS*TS
    + b_TR_doorchange*TR_doorchange + b_TR_nochange*TR_nochange 
)

VNone = (alt_id == 3) * (ASC_None)

V  = {1: VA, 2: VB, 3: VNone}
AV = {1: avail_A, 2: avail_B, 3: avail_None}

# ------------------------------------------------------------
# 5) Model
# ------------------------------------------------------------
prob    = models.logit(V, AV, CHOICE)
loglike = log(prob)

bgm = bio.BIOGEME(database, loglike)
bgm.model_name = 'door_choice_mnl_extended_with_TRnone'

# ------------------------------------------------------------
# 6) Estimate
# ------------------------------------------------------------
results = bgm.estimate()


print("\n--- Estimated parameters (pandas) ---")
try:
    # New API (preferred)
    params_df = results.get_pandas_estimated_parameters()
except Exception:
    # Fallback if running an older Biogeme
    params_df = results.get_estimated_parameters()
print(params_df)

print("\n--- Fit statistics ---")
try:
    stats = results.get_general_statistics()
except Exception:
    # Fallback: minimal stats from available attributes
    stats = {
        "Final log likelihood": results.data.logLike if hasattr(results, "data") else None,
        "Null log likelihood":  getattr(results, "nullLogLike", None),
        "Rho-square":           getattr(results, "rhoSquare", None),
        "Adj. rho-square":      getattr(results, "rhoSquareBar", None),
        "Number of obs":        getattr(results, "numberOfObservations", None),
        "Number of params":     getattr(results, "numberOfFreeParameters", None),
    }
for k, v in stats.items():
    print(f"{k}: {v}")


--- Estimated parameters (pandas) ---
               Name     Value  Robust std err.  Robust t-stat.  Robust p-value
0             b_D2D -0.040251         0.008734       -4.608805    4.049907e-06
1             b_D2E -0.017101         0.003915       -4.367733    1.255430e-05
2               b_O -0.965951         0.251391       -3.842421    1.218265e-04
3              b_CD -0.214648         0.030710       -6.989443    2.759792e-12
4              b_CP  0.093685         0.082574        1.134559    2.565603e-01
5       b_CTD_green  1.555353         0.331650        4.689737    2.735561e-06
6      b_CTD_yellow  1.138224         0.332027        3.428108    6.078031e-04
7         b_CTD_red  1.654396         0.313638        5.274849    1.328658e-07
8               b_D  0.056054         0.007464        7.509587    5.928591e-14
9            b_T2DR  0.539176         0.111038        4.855779    1.199145e-06
10           b_T2DS  0.161018         0.056346        2.857662    4.267741e-03
11           

  params_df = results.get_estimated_parameters()
