In [12]:
import pandas as pd
import numpy as np
from scipy.optimize import minimize

# Load data
data = pd.read_csv('./data/swissmetro.dat', sep='\t')

In [13]:
data.head()

Unnamed: 0,ID,SURVEY,PURPOSE,FIRST,TICKET,WHO,LUGGAGE,S_O_P,AGE,MALE,...,TRAIN_TT,TRAIN_CO,TRAIN_FR,SM_TT,SM_CO,SM_FR,SM_SEATS,CAR_TT,CAR_CO,CHOICE
0,1,0,1,0,1,1,0,2,3,0,...,112,48,120,63,52,20,0,117,65,2
1,1,0,1,0,1,1,0,2,3,0,...,103,48,30,60,49,10,0,117,84,2
2,1,0,1,0,1,1,0,2,3,0,...,130,48,60,67,58,30,0,117,52,2
3,1,0,1,0,1,1,0,2,3,0,...,103,40,30,63,52,20,0,72,52,2
4,1,0,1,0,1,1,0,2,3,0,...,130,36,60,63,42,20,0,90,84,2


In [9]:

# Apply exclusions if needed
# Example:
#data = data[data['PURPOSE'] == 1]

# Determine number of individuals in the data
N = len(data['ID'].unique())

# Define model parameters
beta = np.array([0, 0, 0, 0, 1])
beta_names = ["ASC_CAR", "ASC_SM", "BETA_TT", "BETA_TC", "lambda_CAR_TRAIN"]
fixed_params = {}  # No fixed parameters in this example

# Define log-likelihood function
def log_likelihood(beta):
    # Define utility functions
    data['U_TRAIN'] = beta[2] * data['TRAIN_TT'] + beta[3] * (data['GA'] == 0) * data['TRAIN_CO']
    data['U_SM'] = beta[1] + beta[2] * data['SM_TT'] + beta[3] * (data['GA'] == 0) * data['SM_CO']
    data['U_CAR'] = beta[0] + beta[2] * data['CAR_TT'] + beta[3] * data['CAR_CO']
    
    # Avoid numerical issues
    data.loc[data['U_TRAIN'] > 700, 'U_TRAIN'] = 700
    data.loc[data['U_SM'] > 700, 'U_SM'] = 700
    data.loc[data['U_CAR'] > 700, 'U_CAR'] = 700
    data.loc[data['U_TRAIN'] < -700, 'U_TRAIN'] = -700
    data.loc[data['U_SM'] < -700, 'U_SM'] = -700
    data.loc[data['U_CAR'] < -700, 'U_CAR'] = -700
    
    # Calculate logsum for nests with > 1 alt
    data['logsum_CAR_TRAIN'] = np.log(data['TRAIN_AV'] * np.exp(data['U_TRAIN'] / beta[4])
                                      + data['CAR_AV'] * np.exp(data['U_CAR'] / beta[4])
                                      + (1 - data['TRAIN_AV']) * (1 - data['CAR_AV']))
    
    # Nest probabilities
    data['P_nest_CAR_TRAIN'] = np.exp(beta[4] * data['logsum_CAR_TRAIN']) / \
                               (np.exp(beta[4] * data['logsum_CAR_TRAIN']) + data['SM_AV'] * np.exp(data['U_SM']))
    data['P_nest_SM'] = 1 - data['P_nest_CAR_TRAIN']
    
    # Within nest probabilities for nests with > 1 alt
    data['P_CAR_in_CAR_TRAIN'] = (data['CAR_AV'] * np.exp(data['U_CAR'] / beta[4])) / \
                                  (data['TRAIN_AV'] * np.exp(data['U_TRAIN'] / beta[4]) +
                                   data['CAR_AV'] * np.exp(data['U_CAR'] / beta[4]))
    data['P_TRAIN_in_CAR_TRAIN'] = 1 - data['P_CAR_in_CAR_TRAIN']
    
    # Full probabilities
    data['P_SM'] = data['P_nest_SM']
    data['P_TRAIN'] = data['P_nest_CAR_TRAIN'] * data['P_TRAIN_in_CAR_TRAIN']
    data['P_CAR'] = data['P_nest_CAR_TRAIN'] * data['P_CAR_in_CAR_TRAIN']
    
    # Calculate probability for chosen alternative for each row
    data['P'] = (data['CHOICE'] == 1) * data['P_TRAIN'] + \
                (data['CHOICE'] == 2) * data['P_SM'] + \
                (data['CHOICE'] == 3) * data['P_CAR']
    
    # Calculate log-likelihood
    LL = data.groupby('ID')['P'].prod().apply(np.log).sum()
    
    return -LL  # We minimize negative log-likelihood

# Run the model
result = minimize(log_likelihood, beta, method='BFGS')

# Print results
print("Optimized Parameters:")
for i, name in enumerate(beta_names):
    print(f"{name}: {result.x[i]}")


  df = fun(x) - f0
  df = fun(x) - f0
  df = fun(x) - f0
  df = fun(x) - f0
  df = fun(x) - f0


Optimized Parameters:
ASC_CAR: 0.3711811541383965
ASC_SM: 0.37613607649390896
BETA_TT: -0.009540856740380382
BETA_TC: -0.006312636195557501
lambda_CAR_TRAIN: 0.4871959478254065


In [10]:
data.head()

Unnamed: 0,ID,SURVEY,PURPOSE,FIRST,TICKET,WHO,LUGGAGE,S_O_P,AGE,MALE,...,U_CAR,logsum_CAR_TRAIN,P_nest_CAR_TRAIN,P_nest_SM,P_CAR_in_CAR_TRAIN,P_TRAIN_in_CAR_TRAIN,P_SM,P_TRAIN,P_CAR,P
0,1,0,1,0,1,1,0,2,3,0,...,-1.15542,-1.87586,0.410786,0.589214,0.609137,0.390863,0.589214,0.160561,0.250225,0.589214
1,1,0,1,0,1,1,0,2,3,0,...,-1.275361,-1.935179,0.392414,0.607586,0.505313,0.494687,0.607586,0.194122,0.198292,0.607586
2,1,0,1,0,1,1,0,2,3,0,...,-1.073356,-1.880231,0.428786,0.571214,0.724047,0.275953,0.571214,0.118325,0.310461,0.571214
3,1,0,1,0,1,1,0,2,3,0,...,-0.644018,-1.061705,0.508983,0.491017,0.770912,0.229088,0.491017,0.116602,0.392381,0.491017
4,1,0,1,0,1,1,0,2,3,0,...,-1.017757,-1.754525,0.409815,0.590185,0.715706,0.284294,0.590185,0.116508,0.293307,0.590185
