<a href="https://colab.research.google.com/github/arteagac/mixedlogit/blob/master/examples/mixed_logit_model.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Install the `mixedlogit` library

In [1]:
!pip install mixedlogit

Collecting mixedlogit
  Downloading https://files.pythonhosted.org/packages/a4/50/4a32ecd08f1d0eccbb6c17661404075f29b6b06ea0177a752622e189b2fa/mixedlogit-0.0.2-py3-none-any.whl
Installing collected packages: mixedlogit
Successfully installed mixedlogit-0.0.2


# Electricity Dataset

# Read data

In [2]:
import pandas as pd
import numpy as np
df = pd.read_csv("https://raw.githubusercontent.com/timothyb0912/pylogit/master/examples/data/electricity_r_data_long.csv")
# Remove unbalanced panels (future versions will handle unbalanced panels)
count_mix_by_id = np.unique(df.id.values, return_counts=True)
df = df[~df.id.isin(count_mix_by_id[0][count_mix_by_id[1] != 48])] 
df["choice"] = df["choice"].astype(int) # The choice variable has to be a number

varnames = ["pf", "cl", "loc", "wk", "tod", "seas"]
X = df[varnames].values
y = df['choice'].values
isvars=  [] 
asvars= varnames
alternatives =['beach','boat','charter','pier']

## Fit the model

In [3]:
from mixedlogit import MixedLogit

model = MixedLogit()
model.fit(X, y, 
          varnames, 
          alternatives=alternatives, 
          asvars=varnames,
          randvars={'pf': 'n','cl':'n','loc':'n','wk':'n','tod':'n','seas':'n'}, 
          mixby=df.id.values,
          n_draws = 600)
model.summary()

**** GPU Processing Enabled ****
Estimation succesfully completed after 54 iterations. Use .summary() to see the estimated values
--------------------------------------------------------------------------------
Coefficient          	Estimate 	Std.Err. 	z-val   	P>|z|   
--------------------------------------------------------------------------------
pf                   	-0.9881581 	0.0282180 	-35.018653 	0.000000 ***  
cl                   	-0.2214258 	0.0232726 	-9.514449 	0.000000 ***  
loc                  	2.3441905 	0.1285696 	18.232846 	0.000000 ***  
wk                   	1.6755598 	0.0907727 	18.458848 	0.000000 ***  
tod                  	-9.4701927 	0.2522352 	-37.545091 	0.000000 ***  
seas                 	-9.6527236 	0.2337246 	-41.299570 	0.000000 ***  
sd.pf                	-0.2092002 	0.0145773 	-14.351118 	0.000000 ***  
sd.cl                	-0.3965299 	0.0205752 	-19.272252 	0.000000 ***  
sd.loc               	-1.8385343 	0.1221602 	-15.050190 	0.000000 ***  
sd.wk

# Fishing Dataset

## Read data

In [4]:
import pandas as pd
df = pd.read_csv("https://raw.githubusercontent.com/arteagac/mixedlogit/master/examples/data/fishing_long.csv")

varnames = ['price','catch']
X = df[varnames].values
y = df['choice'].values

## Fit model

In [5]:
from mixedlogit import MixedLogit

model = MixedLogit()
model.fit(X, y, varnames= varnames,
          asvars=['price', 'catch'],
          alternatives=['beach', 'boat', 'charter', 'pier'],
          randvars = {'price': 'n', 'catch': 'n'})
model.summary()

**** GPU Processing Enabled ****
Estimation succesfully completed after 27 iterations. Use .summary() to see the estimated values
--------------------------------------------------------------------------------
Coefficient          	Estimate 	Std.Err. 	z-val   	P>|z|   
--------------------------------------------------------------------------------
price                	-0.0274061 	0.0024847 	-11.029837 	0.000000 ***  
catch                	1.3345446 	0.1726896 	7.727997 	0.000000 **   
sd.price             	0.0104608 	0.0021156 	4.944513 	0.000004 **   
sd.catch             	1.5857199 	0.5797202 	2.735319 	0.019095 .    
--------------------------------------------------------------------------------
Significance:  *** 0    ** 0.001    * 0.01    . 0.05

Log-Likelihood= -1300.227


# Car Dataset

## Read data

In [6]:
import pandas as pd
import numpy as np
from mixedlogit import MixedLogit

df = pd.read_csv("https://raw.githubusercontent.com/arteagac/mixedlogit/master/examples/data/car100_long.csv")
# Remove unbalanced panels (future versions will handle unbalanced panels)
count_mix_by_id = np.unique(df.person_id.values, return_counts=True)
df = df[~df.person_id.isin(count_mix_by_id[0][count_mix_by_id[1] != 45])] 

df.price = -1*df.price/10000
df.operating_cost = -1*df.operating_cost

varnames = ['high_performance','medium_performance','price', 'operating_cost',
            'range', 'electric', 'hybrid'] 

X = df[varnames].values
y = df['choice'].values

## Fit the model

In [7]:
np.random.seed(0)
model = MixedLogit()
model.fit(X, y, 
          alternatives=['car','bus','bike'],
          varnames = varnames, 
          asvars = varnames,
          randvars = {'price': 'ln', 'operating_cost': 'n',
                      'range': 'ln', 'electric':'n', 'hybrid': 'n'}, 
          mixby = df.person_id.values, #Panel column
          n_draws = 600) 
model.summary()

**** GPU Processing Enabled ****
Estimation succesfully completed after 42 iterations. Use .summary() to see the estimated values
--------------------------------------------------------------------------------
Coefficient          	Estimate 	Std.Err. 	z-val   	P>|z|   
--------------------------------------------------------------------------------
high_performance     	0.0616266 	0.0957860 	0.643378 	0.645867      
medium_performance   	0.5658329 	0.0995584 	5.683427 	0.000001 **   
price                	-0.7487492 	0.1473184 	-5.082522 	0.000008 **   
operating_cost       	0.0106943 	0.0067425 	1.586101 	0.226961      
range                	-0.7585654 	0.5148421 	-1.473394 	0.269021      
electric             	-1.4404238 	0.3305194 	-4.358061 	0.000127 **   
hybrid               	0.7649744 	0.1651258 	4.632678 	0.000046 **   
sd.price             	0.9499825 	0.1623691 	5.850760 	0.000000 **   
sd.operating_cost    	0.0349346 	0.0059965 	5.825881 	0.000000 **   
sd.range             