<a href="https://colab.research.google.com/github/arteagac/xlogit/blob/master/examples/mixed_logit_model.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Install `xlogit` package

In [1]:
!pip install git+https://github.com/arteagac/xlogit

Collecting git+https://github.com/arteagac/xlogit
  Cloning https://github.com/arteagac/xlogit to /tmp/pip-req-build-72ts81rm
  Running command git clone -q https://github.com/arteagac/xlogit /tmp/pip-req-build-72ts81rm
Building wheels for collected packages: xlogit
  Building wheel for xlogit (setup.py) ... [?25l[?25hdone
  Created wheel for xlogit: filename=xlogit-0.0.1-cp36-none-any.whl size=11837 sha256=7369463827fa71a5b9f0ab869a5231b4f3a07642232759def111638f41e6f178
  Stored in directory: /tmp/pip-ephem-wheel-cache-avk_5yal/wheels/64/50/8d/a97e0500aac20b521a2896234d6598045323a7d0daca37648a
Successfully built xlogit
Installing collected packages: xlogit
Successfully installed xlogit-0.0.1


# Electricity Dataset

# Read data

In [3]:
import pandas as pd
import numpy as np
df = pd.read_csv("https://raw.githubusercontent.com/arteagac/xlogit/master/examples/data/electricity_long.csv")
df

Unnamed: 0,choice,id,alt,pf,cl,loc,wk,tod,seas,chid
0,0,1,1,7,5,0,1,0,0,1
1,0,1,2,9,1,1,0,0,0,1
2,0,1,3,0,0,0,0,0,1,1
3,1,1,4,0,5,0,1,1,0,1
4,0,1,1,7,0,0,1,0,0,2
...,...,...,...,...,...,...,...,...,...,...
17227,0,361,4,0,1,1,0,0,1,4307
17228,1,361,1,9,0,0,1,0,0,4308
17229,0,361,2,7,0,0,0,0,0,4308
17230,0,361,3,0,1,0,1,0,1,4308


## Fit the model

In [5]:
varnames = ["pf", "cl", "loc", "wk", "tod", "seas"]
X = df[varnames].values
y = df['choice'].values

from xlogit import MixedLogit
model = MixedLogit()
model.fit(X, y, 
          varnames, 
          alt=df['alt'], 
          randvars={'pf': 'n','cl':'n','loc':'n','wk':'n','tod':'n','seas':'n'}, 
          panel=df.id.values,
          n_draws=600)
model.summary()

Estimation with GPU processing enabled.
Optimization terminated successfully.
         Current function value: 3887.712137
         Iterations: 45
         Function evaluations: 57
         Gradient evaluations: 57
Estimation time= 6.0 seconds
---------------------------------------------------------------------------
Coefficient              Estimate      Std.Err.         z-val         P>|z|
---------------------------------------------------------------------------
pf                     -0.9906084     0.0351627   -28.1720993       3.2e-92 ***
cl                     -0.2211027     0.0226081    -9.7797819      2.67e-19 ***
loc                     2.3065648     0.1163005    19.8328064      9.35e-59 ***
wk                      1.6616047     0.0889379    18.6827449      5.34e-54 ***
tod                    -9.5584034     0.3058567   -31.2512515     8.77e-104 ***
seas                   -9.6547711     0.2968670   -32.5222090     2.08e-108 ***
sd.pf                   0.2076202     0.0166995 

# Fishing Dataset

## Read data

In [6]:
import pandas as pd
df = pd.read_csv("https://raw.githubusercontent.com/arteagac/xlogit/master/examples/data/fishing_long.csv")
df

Unnamed: 0,id,alt,choice,income,price,catch
0,1,beach,0,7083.33170,157.930,0.0678
1,1,boat,0,7083.33170,157.930,0.2601
2,1,charter,1,7083.33170,182.930,0.5391
3,1,pier,0,7083.33170,157.930,0.0503
4,2,beach,0,1249.99980,15.114,0.1049
...,...,...,...,...,...,...
4723,1181,pier,0,416.66668,36.636,0.4522
4724,1182,beach,0,6250.00130,339.890,0.2537
4725,1182,boat,1,6250.00130,235.436,0.6817
4726,1182,charter,0,6250.00130,260.436,2.3014


## Fit model

In [7]:
varnames = ['price','catch']
X = df[varnames].values
y = df['choice'].values

from xlogit import MixedLogit
model = MixedLogit()
model.fit(X, y, varnames= varnames,
          alt=['beach', 'boat', 'charter', 'pier'],
          randvars = {'price': 'n', 'catch': 'n'})
model.summary()

Estimation with GPU processing enabled.
Optimization terminated successfully.
         Current function value: 1300.226850
         Iterations: 21
         Function evaluations: 29
         Gradient evaluations: 29
Estimation time= 1.0 seconds
---------------------------------------------------------------------------
Coefficient              Estimate      Std.Err.         z-val         P>|z|
---------------------------------------------------------------------------
price                  -0.0274061     0.0022827   -12.0062499       2.2e-30 ***
catch                   1.3345446     0.1735364     7.6902874      2.29e-13 ***
sd.price                0.0104608     0.0020466     5.1113049      1.93e-06 ***
sd.catch                1.5857201     0.3746104     4.2329844      0.000109 ***
---------------------------------------------------------------------------
Significance:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Log-Likelihood= -1300.227
AIC= 2608.454
BIC= 2628.754


# Car Dataset

## Read data

In [9]:
import pandas as pd
import numpy as np

df = pd.read_csv("https://raw.githubusercontent.com/arteagac/xlogit/master/examples/data/car100_long.csv")
df.price = -1*df.price/10000
df.operating_cost = -1*df.operating_cost
df

Unnamed: 0,person_id,choice_situation,choice,price,operating_cost,range,electric,gas,hybrid,high_performance,medium_performance
0,1,1,0,-4.6763,-47.43,0.0,0,0,1,0,0
1,1,1,1,-5.7209,-27.43,1.3,1,0,0,1,1
2,1,1,0,-8.7960,-32.41,1.2,1,0,0,0,1
3,1,2,1,-3.3768,-4.89,1.3,1,0,0,1,1
4,1,2,0,-9.0336,-30.19,0.0,0,0,1,0,1
...,...,...,...,...,...,...,...,...,...,...,...
4447,100,1483,0,-2.8036,-14.45,1.6,1,0,0,0,0
4448,100,1483,0,-1.9360,-54.76,0.0,0,1,0,1,1
4449,100,1484,1,-2.4054,-50.57,0.0,0,1,0,0,0
4450,100,1484,0,-5.2795,-21.25,0.0,0,0,1,0,1


## Fit the model

In [10]:
varnames = ['high_performance','medium_performance','price', 'operating_cost',
            'range', 'electric', 'hybrid'] 

X = df[varnames].values
y = df['choice'].values

from xlogit import MixedLogit
model = MixedLogit()
model.fit(X, y, varnames = varnames,
          alt=['car','bus','bike'],
          randvars = {'price': 'ln', 'operating_cost': 'n',
                      'range': 'ln', 'electric':'n', 'hybrid': 'n'}, 
          panel=df.person_id.values, #Panel column
          n_draws = 100) 
model.summary()

Estimation with GPU processing enabled.
Optimization terminated successfully.
         Current function value: 1296.872299
         Iterations: 47
         Function evaluations: 60
         Gradient evaluations: 60
Estimation time= 1.3 seconds
---------------------------------------------------------------------------
Coefficient              Estimate      Std.Err.         z-val         P>|z|
---------------------------------------------------------------------------
high_performance        0.0981498     0.0971076     1.0107323         0.476    
medium_performance      0.5730667     0.1007914     5.6856682      5.72e-07 ***
price                  -0.7671921     0.1347869    -5.6918906      5.57e-07 ***
operating_cost          0.0126422     0.0053612     2.3580818        0.0518 .  
range                  -0.5746895     0.3466185    -1.6579886         0.202    
electric               -1.6610387     0.3236984    -5.1314391      5.95e-06 ***
hybrid                  0.7271779     0.1602652 