## Install and import `xlogit` package

Install `xlogit` using `pip` as shown below. In addition, import the package and check if GPU processing is available.

In [None]:
!pip install xlogit
from xlogit import MixedLogit
MixedLogit.check_if_gpu_available()

Collecting xlogit
  Downloading xlogit-0.2.7-py3-none-any.whl (36 kB)
Installing collected packages: xlogit
Successfully installed xlogit-0.2.7
1 GPU device(s) available. xlogit will use GPU processing


True

### Read data

In [None]:
import pandas as pd
import numpy as np
df_wide = pd.read_table("/content/airline.dat", sep='\t')
df_wide['custom_id'] = np.arange(len(df_wide))

In [None]:
df_wide

Unnamed: 0,SubjectId,OriginGMT,DestinationGMT,Direction,q02_TripPurpose,q03_WhoPays,q11_DepartureOrArrivalIsImportant,q12_IdealDepTime,q13_IdealArrTime,q14_PartySize,...,Legroom_2,Legroom_3,Fare_1,Fare_2,Fare_3,BestAlternative_1,BestAlternative_2,BestAlternative_3,TripPurpose,custom_id
0,1,300,360,0,2,1,2,-1,480,2,...,3,4,835,835,730,0,0,1,2,0
1,2,480,480,1,2,1,2,-1,540,1,...,2,3,315,315,350,0,1,0,2,1
2,3,480,480,1,2,1,1,-1,-1,2,...,1,4,195,160,170,0,1,0,2,2
3,4,480,480,1,2,3,1,630,-1,2,...,3,1,135,125,140,0,0,1,2,3
4,5,300,480,0,2,1,0,-1,-1,3,...,2,3,270,300,300,1,0,0,2,4
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3604,3609,480,300,2,3,1,2,-1,660,3,...,3,4,650,480,650,0,1,0,3,3604
3605,3610,480,300,2,2,1,2,-1,480,1,...,3,4,760,690,590,0,0,1,2,3605
3606,3611,300,360,0,1,1,2,-1,870,1,...,1,4,290,290,300,0,0,1,1,3606
3607,3612,300,480,0,2,1,2,-1,1320,1,...,4,3,480,360,420,1,0,0,2,3607


In [None]:
df_wide['choice'] = 0
df_wide.loc[df_wide['BestAlternative_1'] == 1, 'choice'] = 1
df_wide.loc[df_wide['BestAlternative_2'] == 1, 'choice'] = 2
df_wide.loc[df_wide['BestAlternative_3'] == 1, 'choice'] = 3
df_wide = df_wide.drop(['BestAlternative_1', 'BestAlternative_2', 'BestAlternative_3'], axis=1)
df_wide

Unnamed: 0,SubjectId,OriginGMT,DestinationGMT,Direction,q02_TripPurpose,q03_WhoPays,q11_DepartureOrArrivalIsImportant,q12_IdealDepTime,q13_IdealArrTime,q14_PartySize,...,TripTimeHours_3,Legroom_1,Legroom_2,Legroom_3,Fare_1,Fare_2,Fare_3,TripPurpose,custom_id,choice
0,1,300,360,0,2,1,2,-1,480,2,...,5.61667,2,3,4,835,835,730,2,0,3
1,2,480,480,1,2,1,2,-1,540,1,...,4.53333,2,2,3,315,315,350,2,1,2
2,3,480,480,1,2,1,1,-1,-1,2,...,5.03333,3,1,4,195,160,170,2,2,2
3,4,480,480,1,2,3,1,630,-1,2,...,1.96667,3,3,1,135,125,140,2,3,3
4,5,300,480,0,2,1,0,-1,-1,3,...,5.80000,3,2,3,270,300,300,2,4,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3604,3609,480,300,2,3,1,2,-1,660,3,...,6.26667,3,3,4,650,480,650,3,3604,2
3605,3610,480,300,2,2,1,2,-1,480,1,...,6.33333,2,3,4,760,690,590,2,3605,3
3606,3611,300,360,0,1,1,2,-1,870,1,...,3.30000,2,1,4,290,290,300,1,3606,3
3607,3612,300,480,0,2,1,2,-1,1320,1,...,7.65000,3,4,3,480,360,420,2,3607,1


In [None]:
df_wide['choice'] = df_wide['choice'].map({1: 'one', 2:'two', 3: 'three'})

In [None]:
df_wide

Unnamed: 0,SubjectId,OriginGMT,DestinationGMT,Direction,q02_TripPurpose,q03_WhoPays,q11_DepartureOrArrivalIsImportant,q12_IdealDepTime,q13_IdealArrTime,q14_PartySize,...,TripTimeHours_3,Legroom_1,Legroom_2,Legroom_3,Fare_1,Fare_2,Fare_3,TripPurpose,custom_id,choice
0,1,300,360,0,2,1,2,-1,480,2,...,5.61667,2,3,4,835,835,730,2,0,three
1,2,480,480,1,2,1,2,-1,540,1,...,4.53333,2,2,3,315,315,350,2,1,two
2,3,480,480,1,2,1,1,-1,-1,2,...,5.03333,3,1,4,195,160,170,2,2,two
3,4,480,480,1,2,3,1,630,-1,2,...,1.96667,3,3,1,135,125,140,2,3,three
4,5,300,480,0,2,1,0,-1,-1,3,...,5.80000,3,2,3,270,300,300,2,4,one
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3604,3609,480,300,2,3,1,2,-1,660,3,...,6.26667,3,3,4,650,480,650,3,3604,two
3605,3610,480,300,2,2,1,2,-1,480,1,...,6.33333,2,3,4,760,690,590,2,3605,three
3606,3611,300,360,0,1,1,2,-1,870,1,...,3.30000,2,1,4,290,290,300,1,3606,three
3607,3612,300,480,0,2,1,2,-1,1320,1,...,7.65000,3,4,3,480,360,420,2,3607,one


In [None]:
rename_dict = {
    'DepartureTimeHours_1': 'DepartureTimeHours_one',
    'ArrivalTimeHours_1': 'ArrivalTimeHours_one',
    'FlyingTimeHours_1' : 'FlyingTimeHours_one',
    'TripTimeHours_1': 'TripTimeHours_one',
    'Legroom_1': 'Legroom_one',
    'Fare_1': 'Fare_one',
    'DepartureTimeHours_2': 'DepartureTimeHours_two',
    'ArrivalTimeHours_2': 'ArrivalTimeHours_two',
    'FlyingTimeHours_2' : 'FlyingTimeHours_two',
    'TripTimeHours_2': 'TripTimeHours_two',
    'Legroom_2': 'Legroom_two',
    'Fare_2': 'Fare_two',
    'DepartureTimeHours_3': 'DepartureTimeHours_three',
    'ArrivalTimeHours_3': 'ArrivalTimeHours_three',
    'FlyingTimeHours_3' : 'FlyingTimeHours_three',
    'TripTimeHours_3': 'TripTimeHours_three',
    'Legroom_3': 'Legroom_three',
    'Fare_3': 'Fare_three'
}
df_wide = df_wide.rename(columns=rename_dict)

In [None]:
df_wide

Unnamed: 0,SubjectId,OriginGMT,DestinationGMT,Direction,q02_TripPurpose,q03_WhoPays,q11_DepartureOrArrivalIsImportant,q12_IdealDepTime,q13_IdealArrTime,q14_PartySize,...,TripTimeHours_three,Legroom_one,Legroom_two,Legroom_three,Fare_one,Fare_two,Fare_three,TripPurpose,custom_id,choice
0,1,300,360,0,2,1,2,-1,480,2,...,5.61667,2,3,4,835,835,730,2,0,three
1,2,480,480,1,2,1,2,-1,540,1,...,4.53333,2,2,3,315,315,350,2,1,two
2,3,480,480,1,2,1,1,-1,-1,2,...,5.03333,3,1,4,195,160,170,2,2,two
3,4,480,480,1,2,3,1,630,-1,2,...,1.96667,3,3,1,135,125,140,2,3,three
4,5,300,480,0,2,1,0,-1,-1,3,...,5.80000,3,2,3,270,300,300,2,4,one
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3604,3609,480,300,2,3,1,2,-1,660,3,...,6.26667,3,3,4,650,480,650,3,3604,two
3605,3610,480,300,2,2,1,2,-1,480,1,...,6.33333,2,3,4,760,690,590,2,3605,three
3606,3611,300,360,0,1,1,2,-1,870,1,...,3.30000,2,1,4,290,290,300,1,3606,three
3607,3612,300,480,0,2,1,2,-1,1320,1,...,7.65000,3,4,3,480,360,420,2,3607,one


In [None]:
from xlogit.utils import wide_to_long

df = wide_to_long(df_wide, id_col='custom_id', alt_name='alt', sep='_',
                  alt_list=['one', 'two', 'three'], empty_val=0,
                  varying=['DepartureTimeHours', 'ArrivalTimeHours', 'FlyingTimeHours', 'TripTimeHours', 'Legroom', 'Fare'], alt_is_prefix=False)
df

Unnamed: 0,custom_id,alt,DepartureTimeHours,ArrivalTimeHours,FlyingTimeHours,TripTimeHours,Legroom,Fare,SubjectId,OriginGMT,...,AirplaneSecondFlight_2,AirplaneSecondFlight_3,DepartureTimeMins_1,DepartureTimeMins_2,DepartureTimeMins_3,ArrivalTimeMins_1,ArrivalTimeMins_2,ArrivalTimeMins_3,TripPurpose,choice
0,0,one,7.0,10.6167,4.61667,4.61667,2,835,1,300,...,8,7,420,540,540,637,847,817,2,three
1,0,two,9.0,14.1167,5.11667,6.11667,3,835,1,300,...,8,7,420,540,540,637,847,817,2,three
2,0,three,9.0,13.6167,5.11667,5.61667,4,730,1,300,...,8,7,420,540,540,637,847,817,2,three
3,1,one,7.5,10.0333,2.53333,2.53333,2,315,2,480,...,8,1,450,900,720,602,1112,992,2,two
4,1,two,15.0,18.5333,3.03333,3.53333,2,315,2,480,...,8,1,450,900,720,602,1112,992,2,two
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
10822,3607,two,11.0,16.6500,6.65000,8.65000,4,360,3612,300,...,5,5,600,660,660,789,999,939,2,one
10823,3607,three,11.0,15.6500,6.65000,7.65000,3,420,3612,300,...,5,5,600,660,660,789,999,939,2,one
10824,3608,one,10.5,12.2500,1.75000,1.75000,1,360,3613,360,...,4,7,630,630,1080,735,855,1305,2,three
10825,3608,two,10.5,14.2500,2.25000,3.75000,1,375,3613,360,...,4,7,630,630,1080,735,855,1305,2,three


### Reshape data

The imported dataframe is in wide format, and it needs to be reshaped to long format for processing by `xlogit`, which offers the convenient `wide_to_long` utility for this reshaping process. The user needs to specify the column that uniquely identifies each sample, the names of the alternatives, the columns that vary across alternatives, and whether the alternative names are a prefix or suffix of the column names. Additionally, the user can specify a value (`empty_val`) to be used by default when an alternative is not available for a certain variable. Additional usage examples for the `wide_to_long` function are available in xlogit's documentation at https://xlogit.readthedocs.io/en/latest/notebooks/convert_data_wide_to_long.html. Also, details about the function parameters are available at the [API reference ](https://xlogit.readthedocs.io/en/latest/api/utils.html#xlogit.utils.wide_to_long).

### Create model specification

In [None]:
df['ASC_1'] = np.ones(len(df))*(df['alt'] == 'one')
df['ASC_2'] = np.ones(len(df))*(df['alt'] == 'two')

### Estimate model parameters

The `fit` method estimates the model by taking as input the data from the previous step along with additional specification criteria, such as the distribution of the random parameters (`randvars`), the number of random draws (`n_draws`), and the availability of alternatives for the choice situations (`avail`). We set the optimization method as `L-BFGS-B` as this is a robust routine that usually helps solve convergence issues.  Once the estimation routine is completed, the `summary` method can be used to display the estimation results.

In [None]:
from xlogit import MultinomialLogit

varnames=['ASC_1', 'ASC_2','TripTimeHours', 'Fare']
model = MultinomialLogit()
model.fit(X=df[varnames],
          y=df['choice'],
          varnames=varnames,
          alts=df['alt'],
          ids=df['custom_id'],)
model.summary()

Optimization terminated successfully.
    Message: The gradients are close to zero
    Iterations: 9
    Function evaluations: 10
Estimation time= 0.1 seconds
---------------------------------------------------------------------------
Coefficient              Estimate      Std.Err.         z-val         P>|z|
---------------------------------------------------------------------------
ASC_1                   1.4434138     0.1247570    11.5698029         2e-30 ***
ASC_2                   0.2234718     0.0657035     3.4012141      0.000678 ***
TripTimeHours          -0.3173177     0.0657406    -4.8268179      1.44e-06 ***
Fare                   -0.0188392     0.0006736   -27.9698216      5.5e-156 ***
---------------------------------------------------------------------------
Significance:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Log-Likelihood= -2425.219
AIC= 4858.437
BIC= 4883.202


In [None]:
from xlogit import MixedLogit
varnames=['ASC_1', 'ASC_2', 'Fare', 'TripTimeHours']
model = MixedLogit()
model.fit(X=df[varnames],
          y=df['choice'],
          varnames=varnames,
          alts=df['alt'],
          ids=df['custom_id'],
          panels=df["SubjectId"],
          randvars={'TripTimeHours': 'n'},
          n_draws=1500,
          optim_method='L-BFGS-B')
model.summary()

GPU processing enabled.
Optimization terminated successfully.
    Message: CONVERGENCE: REL_REDUCTION_OF_F_<=_FACTR*EPSMCH
    Iterations: 38
    Function evaluations: 49
Estimation time= 26.7 seconds
---------------------------------------------------------------------------
Coefficient              Estimate      Std.Err.         z-val         P>|z|
---------------------------------------------------------------------------
ASC_1                   1.2256740     0.1664268     7.3646424      2.19e-13 ***
ASC_2                   0.2364305     0.0689051     3.4312483      0.000608 ***
Fare                   -0.0211866     0.0012590   -16.8279086      3.11e-61 ***
TripTimeHours          -0.5941886     0.1461566    -4.0654249       4.9e-05 ***
sd.TripTimeHours        0.5864625     0.1550072     3.7834542      0.000157 ***
---------------------------------------------------------------------------
Significance:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Log-Likelihood= -2421.624
AIC= 4

In [None]:
# Create model specification
# Alternative Specific Constants
df['asc_one'] = np.ones(len(df))*(df['alt'] == 'one')
df['asc_two'] = np.ones(len(df))*(df['alt'] == 'two')

# Travel cost (One coefficient per alternative)
df['cost_one'] = df['Fare']*(df['alt'] == 'one')
df['cost_two'] = df['Fare']*(df['alt'] == 'two')
df['cost_three'] = df['Fare']*(df['alt'] == 'three')

# Travel time (One coefficient for train and sm and other for car)
df['flytime_one_two'] = df['FlyingTimeHours']*((df['alt'] == 'one') | (df['alt'] == 'two'))
df['flytime_three'] = df['FlyingTimeHours']*(df['alt'] == 'three')

df['triptime_one_two'] = df['TripTimeHours']*((df['alt'] == 'one') | (df['alt'] == 'two'))
df['triptime_three'] = df['TripTimeHours']*(df['alt'] == 'three')

# legroom(One coefficient per alternative)
df['legroom_one'] = df['Legroom']*(df['alt'] == 'one')
df['legroom_two'] = df['Legroom']*(df['alt'] == 'two')
df['legroom_three'] = df['Legroom']*(df['alt'] == 'three')

df['arrival_one'] = df['ArrivalTimeHours']*(df['alt'] == 'one')
df['arrival_two'] = df['ArrivalTimeHours']*(df['alt'] == 'two')
df['arrival_three'] = df['ArrivalTimeHours']*(df['alt'] == 'three')

df['dep_one'] = df['DepartureTimeHours']*(df['alt'] == 'one')
df['dep_two'] = df['DepartureTimeHours']*(df['alt'] == 'two')
df['dep_three'] = df['DepartureTimeHours']*(df['alt'] == 'three')



In [None]:
from xlogit import MultinomialLogit
varnames=['asc_one', 'asc_two', 'flytime_one_two', 'flytime_three', 'cost_one',
          'cost_two', 'cost_three', 'legroom_one', 'legroom_two', 'legroom_three',
          'arrival_one', 'arrival_two', 'arrival_three', 'dep_one', 'dep_two', 'dep_three',
          'triptime_one_two', 'triptime_three']
model = MultinomialLogit()
model.fit(X=df[varnames],
          y=df['choice'],
          varnames=varnames,
          alts=df['alt'],
          ids=df['custom_id'])
model.summary()

Optimization terminated successfully.
    Message: The gradients are close to zero
    Iterations: 12
    Function evaluations: 13
Estimation time= 0.3 seconds
---------------------------------------------------------------------------
Coefficient              Estimate      Std.Err.         z-val         P>|z|
---------------------------------------------------------------------------
asc_one                 1.3218455    84.0294376     0.0157307         0.987    
asc_two                 1.1642401     0.4391041     2.6513989       0.00805 ** 
flytime_one_two        -0.4340280   168.0579238    -0.0025826         0.998    
flytime_three          -0.2268955   168.0579192    -0.0013501         0.999    
cost_one               -0.0194305     0.0007036   -27.6139512     1.92e-152 ***
cost_two               -0.0208069     0.0008095   -25.7045882     5.96e-134 ***
cost_three             -0.0210308     0.0008426   -24.9587569     5.66e-127 ***
legroom_one             0.2425505     0.0366831     

In [None]:
from xlogit import MixedLogit
varnames=['asc_one', 'asc_two', 'flytime_one_two', 'flytime_three', 'cost_one',
          'cost_two', 'cost_three', 'legroom_one', 'legroom_two', 'legroom_three',
          'arrival_one', 'arrival_two', 'arrival_three', 'dep_one', 'dep_two', 'dep_three',
          'triptime_one_two', 'triptime_three']
model = MixedLogit()
model.fit(X=df[varnames],
          y=df['choice'],
          varnames=varnames,
          alts=df['alt'],
          ids=df['custom_id'],
          panels=df["SubjectId"],
          randvars={'flytime_one_two': 'n', 'flytime_three' : 'n' },
          n_draws=1500,
          optim_method='L-BFGS-B')
model.summary()

GPU processing enabled.


  loglik = np.log(lik) if weights is None else np.log(lik)*weights


In [None]:
from xlogit import MixedLogit
varnames=['asc_one', 'asc_two', 'flytime_one_two', 'flytime_three', 'cost_one',
          'cost_two', 'cost_three', 'legroom_one', 'legroom_two', 'legroom_three',
          'arrival_one', 'arrival_two', 'arrival_three', 'dep_one', 'dep_two', 'dep_three',
          'triptime_one_two', 'triptime_three']
model = MixedLogit()
model.fit(X=df[varnames],
          y=df['choice'],
          varnames=varnames,
          alts=df['alt'],
          ids=df['custom_id'],
          panels=df["SubjectId"],
          randvars={'flytime_one_two': 'n', 'flytime_three' : 'n', 'arrival_one' : 'n', 'arrival_two':'n', 'arrival_three':'n'},
          n_draws=1500,
          optim_method='L-BFGS-B')
model.summary()

GPU processing enabled.


  loglik = np.log(lik) if weights is None else np.log(lik)*weights


Optimization terminated successfully.
    Message: CONVERGENCE: REL_REDUCTION_OF_F_<=_FACTR*EPSMCH
    Iterations: 1
    Function evaluations: 3
Estimation time= 178.5 seconds
---------------------------------------------------------------------------
Coefficient              Estimate      Std.Err.         z-val         P>|z|
---------------------------------------------------------------------------
asc_one                 1.3218455           nan           nan           nan    
asc_two                 1.1642401     0.5665768     2.0548674          0.04 *  
flytime_one_two        -0.4340280           nan           nan           nan    
flytime_three          -0.2268955           nan           nan           nan    
cost_one               -0.0194305           nan           nan           nan    
cost_two               -0.0208069           nan           nan           nan    
cost_three             -0.0210308           nan           nan           nan    
legroom_one             0.2425505   

  self.stderr = np.sqrt(np.diag(self.covariance))
