In [5]:
import pandas as pd
import numpy as np

In [6]:
from functions_NLM import estimate_nested_logit

In [7]:
# Load data
data = pd.read_csv('./data/swissmetro.dat', sep='\t')

In [8]:
# Determine number of individuals in the data
N = len(data['ID'].unique())

# Define model parameters
beta = np.array([0, 0, 0, 0, 1])
beta_names = ["ASC_CAR", "ASC_SM", "BETA_TT", "BETA_TC", "lambda_CAR_TRAIN"]
fixed_params = {}  # No fixed parameters in this example

# Define log-likelihood function
def log_likelihood_SM(beta, data):
    # Define utility functions
    data['U_TRAIN'] = beta[2] * data['TRAIN_TT'] + beta[3] * (data['GA'] == 0) * data['TRAIN_CO']
    data['U_SM'] = beta[1] + beta[2] * data['SM_TT'] + beta[3] * (data['GA'] == 0) * data['SM_CO']
    data['U_CAR'] = beta[0] + beta[2] * data['CAR_TT'] + beta[3] * data['CAR_CO']
    
    # Avoid numerical issues
    data.loc[data['U_TRAIN'] > 700, 'U_TRAIN'] = 700
    data.loc[data['U_SM'] > 700, 'U_SM'] = 700
    data.loc[data['U_CAR'] > 700, 'U_CAR'] = 700
    data.loc[data['U_TRAIN'] < -700, 'U_TRAIN'] = -700
    data.loc[data['U_SM'] < -700, 'U_SM'] = -700
    data.loc[data['U_CAR'] < -700, 'U_CAR'] = -700
    
    # Calculate logsum for nests with > 1 alt
    data['logsum_CAR_TRAIN'] = np.log(data['TRAIN_AV'] * np.exp(data['U_TRAIN'] / beta[4])
                                      + data['CAR_AV'] * np.exp(data['U_CAR'] / beta[4])
                                      + (1 - data['TRAIN_AV']) * (1 - data['CAR_AV']))
    
    # Nest probabilities
    data['P_nest_CAR_TRAIN'] = np.exp(beta[4] * data['logsum_CAR_TRAIN']) / \
                               (np.exp(beta[4] * data['logsum_CAR_TRAIN']) + data['SM_AV'] * np.exp(data['U_SM']))
    data['P_nest_SM'] = 1 - data['P_nest_CAR_TRAIN']
    
    # Within nest probabilities for nests with > 1 alt
    data['P_CAR_in_CAR_TRAIN'] = (data['CAR_AV'] * np.exp(data['U_CAR'] / beta[4])) / \
                                  (data['TRAIN_AV'] * np.exp(data['U_TRAIN'] / beta[4]) +
                                   data['CAR_AV'] * np.exp(data['U_CAR'] / beta[4]))
    data['P_TRAIN_in_CAR_TRAIN'] = 1 - data['P_CAR_in_CAR_TRAIN']
    
    # Full probabilities
    data['P_SM'] = data['P_nest_SM']
    data['P_TRAIN'] = data['P_nest_CAR_TRAIN'] * data['P_TRAIN_in_CAR_TRAIN']
    data['P_CAR'] = data['P_nest_CAR_TRAIN'] * data['P_CAR_in_CAR_TRAIN']
    
    # Calculate probability for chosen alternative for each row
    data['P'] = (data['CHOICE'] == 1) * data['P_TRAIN'] + \
                (data['CHOICE'] == 2) * data['P_SM'] + \
                (data['CHOICE'] == 3) * data['P_CAR']
    
    # Calculate log-likelihood
    LL = data.groupby('ID')['P'].prod().apply(np.log).sum()
    
    return -LL  # We minimize negative log-likelihood


In [9]:
# Estimate parameters
result, se, t_stat, p_value = estimate_nested_logit(data, beta, beta_names, log_likelihood_SM)

  df = fun(x) - f0
  df = fun(x) - f0
  df = fun(x) - f0
  df = fun(x) - f0
  df = fun(x) - f0


Optimization Results:
          Parameter  Estimate  Robust Asymptotic SE  t-statistic  p-value
0           ASC_CAR  0.371181              0.003690   100.580885      0.0
1            ASC_SM  0.376136              0.027474    13.690681      0.0
2           BETA_TT -0.009541              0.000419   -22.744084      0.0
3           BETA_TC -0.006313              0.000306   -20.633566      0.0
4  lambda_CAR_TRAIN  0.487196              0.018970    25.682932      0.0
