<a href="https://colab.research.google.com/github/arteagac/xlogit/blob/master/examples/benchmark/google_colab_benchmark.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## Import required libraries and read input data

In [None]:
#import cupy as cp
import pandas as pd
import numpy as np
from time import time
import sys, io

rdf = pd.DataFrame(columns=["library", "draws", "time"])  # Dataframe to save results
data_folder = "https://raw.githubusercontent.com/arteagac/xlogit/master/examples/data"

## Part 1: Run benchmark

### Execute xlogit

In [None]:
# Install and import xlogit
!pip install xlogit==0.1.0
from xlogit import MixedLogit
MixedLogit.check_if_gpu_available()

In [None]:
# Prepare input data
df = pd.read_csv(data_folder+"/artificial_long.csv")  
varnames = ['price', 'time', 'conven', 'comfort', 'meals', 'petfr',
            'emipp', 'nonsig1', 'nonsig2', 'nonsig3']
X = df[varnames].values
y = df['choice'].values
randvars = {'meals': 'n', 'petfr': 'n', 'emipp': 'n'}
alts = [1, 2, 3]

#### Run estimation using GPU

In [None]:
print("Ndraws Time(s) Log-Likeli.")
for n_draws in [100, 200, 300]:
    np.random.seed(0)
    start_time = time()
    model = MixedLogit()
    model.fit(X, y, varnames, alts=alts, n_draws=n_draws, verbose=0, randvars=randvars)
    ellapsed = time() - start_time
    print("{:6} {:7.2f} {:11.2f}".format(n_draws, ellapsed, model.loglikelihood))
    rdf.loc[len(rdf)] = ['xlogit_gpu', n_draws, ellapsed]

#### Run estimation without using GPU

In [None]:
from xlogit import device
device.disable_gpu_acceleration()

print("Ndraws Time(s) Log-Likeli.")
for n_draws in [100, 200, 300]:
    np.random.seed(0)
    start_time = time()
    model = MixedLogit()
    model.fit(X, y, varnames, alts=alts, n_draws=n_draws, verbose=0, randvars=randvars)
    ellapsed = time() - start_time
    print("{:6} {:7.2f} {:11.2f}".format(n_draws, ellapsed, model.loglikelihood))
    rdf.loc[len(rdf)] = ['xlogit', n_draws, ellapsed]

### Execute pylogit

In [None]:
# Install and import pylogit
!pip install pylogit==0.2.2
import pylogit as pl
from collections import OrderedDict
import warnings
# Disable warnings shown by pylogit to better visualize the bechmkar outputs
warnings.filterwarnings('ignore') 

In [None]:
# Prepare input data
df = pd.read_csv(data_folder+"/artificial_long.csv")  
varnames = ['price', 'time', 'conven', 'comfort', 'meals', 'petfr',
            'emipp', 'nonsig1', 'nonsig2', 'nonsig3']
alt_id_col = "alt"
obs_id_col = "id"
choice_col = "choice"
mixing_id_col = "id"
mixing_vars = ["meals", "petfr", "emipp"]
spec, spec_names = OrderedDict(), OrderedDict()
for col in varnames:
    df[col] = df[col].astype(float)
    spec[col] = [[1, 2, 3]]
    spec_names[col] = [col]

# Run estimation
print("Ndraws Time(s) Log-Likeli.")
for n_draws in [100, 200, 300]:
    np.random.seed(0)
    start_time = time()
    model = pl.create_choice_model(data=df, alt_id_col=alt_id_col,
                                obs_id_col=obs_id_col, choice_col=choice_col,
                                specification=spec, mixing_vars=mixing_vars,
                                model_type="Mixed Logit", names=spec_names,
                                mixing_id_col=mixing_id_col)
    model.fit_mle(init_vals=np.zeros(len(varnames)+len(mixing_vars)),
                num_draws=n_draws, seed=123, print_res=False)
    ellapsed = time() - start_time
    print("{:6} {:7.2f} {:11.2f}".format(n_draws, ellapsed, model.log_likelihood))
    rdf.loc[len(rdf)] = ['pylogit', n_draws, ellapsed]

### Execute biogeme

In [None]:
# Install and import biogeme
!pip install biogeme==3.2.6
import biogeme.database as db
import biogeme.biogeme as bio
import biogeme.models as models
import biogeme.messaging as msg
from biogeme.expressions import Beta, bioDraws, log, MonteCarlo

In [None]:
# Prepare input data
df = pd.read_csv(data_folder+"/artificial_wide.csv")
df['choice'] = df['choice'].astype('str')
mapping = {'1': 1, '2': 2, '3': 3}

for k, v in mapping.items():
    df["aval_"+k] = np.ones(df.shape[0])
df = df.replace({'choice': mapping})
database = db.Database('artificial', df)

globals().update(database.variables)

# Fixed params
b_price = Beta('b_price', 0, None, None, 0)
b_time = Beta('b_time', 0, None, None, 0)
b_conven = Beta('b_conven', 0, None, None, 0)
b_comfort = Beta('b_comfort', 0, None, None, 0)
b_nonsig1 = Beta('b_nonsig1', 0, None, None, 0)
b_nonsig2 = Beta('b_nonsig2', 0, None, None, 0)
b_nonsig3 = Beta('b_nonsig3', 0, None, None, 0)

# Random params
u_meals = Beta('u_meals', 0, None, None, 0)
u_petfr = Beta('u_petfr', 0, None, None, 0)
u_emipp = Beta('u_emipp', 0, None, None, 0)
sd_meals = Beta('sd_meals', 0, None, None, 0)
sd_petfr = Beta('sd_petfr', 0, None, None, 0)
sd_emipp = Beta('sd_emipp', 0, None, None, 0)

b_meals = u_meals + sd_meals*bioDraws('b_meals', 'NORMAL')
b_petfr = u_petfr + sd_petfr*bioDraws('b_petfr', 'NORMAL')
b_emipp = u_emipp + sd_emipp*bioDraws('b_emipp', 'NORMAL')

V1 = price_1*b_price+time_1*b_time+conven_1*b_conven+comfort_1*b_comfort+\
    meals_1*b_meals+petfr_1*b_petfr+emipp_1*b_emipp+nonsig1_1*b_nonsig1+\
        nonsig2_1*b_nonsig2+nonsig3_1*b_nonsig3
V2 = price_2*b_price+time_2*b_time+conven_2*b_conven+comfort_2*b_comfort+\
    meals_2*b_meals+petfr_2*b_petfr+emipp_2*b_emipp+nonsig1_2*b_nonsig1+\
        nonsig2_2*b_nonsig2+nonsig3_2*b_nonsig3
V3 = price_3*b_price+time_3*b_time+conven_3*b_conven+comfort_3*b_comfort+\
    meals_3*b_meals+petfr_3*b_petfr+emipp_3*b_emipp+nonsig1_3*b_nonsig1+\
        nonsig2_3*b_nonsig2+nonsig3_3*b_nonsig3

V = {1: V1, 2: V2, 3: V3}
av = {1: aval_1, 2: aval_2, 3: aval_3}

prob = models.logit(V, av, choice)
logprob = log(MonteCarlo(prob))

# Define level of verbosity
logger = msg.bioMessage()
logger.setSilent()

# Run estimation
print("Ndraws Time(s) Log-Likeli.")
for n_draws in [100, 200, 300]:
    start_time = time()
    biogeme = bio.BIOGEME(database, logprob, numberOfDraws=n_draws, numberOfThreads=2)
    biogeme.modelName = 'MixedLogitArtificial'
    biogeme.generateHtml = False
    biogeme.generatePickle = False
    results = biogeme.estimate()
    ellapsed = time() - start_time
    print("{:6} {:7.2f} {:11.2f}".format(n_draws, ellapsed, results.data.logLike))
    rdf.loc[len(rdf)] = ['biogeme', n_draws, ellapsed]

## Part 2: Plot results

In [None]:
import matplotlib.pyplot as plt
import matplotlib
matplotlib.rcParams.update({'font.size': 14,
                            'axes.spines.right': False,
                            'axes.spines.top': False})
markers = ['^', 'd', 'x', 'o', '^', '|']
colors = [ "#984ea3", "#377eb8", "#ff7f00", "#e41a1c", "#984ea3",
          "#a4c500"]
libs = ['biogeme', 'pylogit', 'xlogit', 'xlogit_gpu']

In [None]:
# Plot time benchmark
plt.figure()
for i, lib in enumerate(libs):
    d = rdf[rdf.library == lib][["draws", "time"]].values.T
    plt.plot(d[0], d[1], marker=markers[i], c=colors[i])
plt.legend(libs)
plt.xlabel("Random draws")
plt.ylabel("Time (Seconds)")
plt.title("Estimation time (artificial dataset)")
plt.show()

## Part 3: Show comparison table

In [None]:
dfc = rdf.pivot(index='library', columns='draws', values='time')

# Compute estimation time compared to xlogit_gpu
for draws in [100, 200, 300]:
    col = dfc[draws]
    dfc['c'+str(draws)] = col.values/col[col.index == "xlogit_gpu"].values
dfc['cavg'] = dfc[['c'+str(i) for i in [100, 200, 300]]].values.mean(axis=1)
dfc = dfc.round(1)

# Print in a table format
print("\n\n********* TABLE COMPARISON ESTIMATION TIME *********")
print("{:12} {:^23} {:^23}".format("", "Estimation time", "Compared to xlogit_gpu"))
c = dfc.columns.values
print("{:12} {:6} {:6} {:6} {:>6} {:>6} {:>6} {:>6}".format(
    "draws-->", c[0], c[1], c[2], c[3], c[4], c[5], "c_avg"))
for row in dfc.iterrows():
    c = row[1].values
    print("{:12} {:6} {:6} {:6} {:6} {:6} {:6} {:6}".format(
        row[0], c[0], c[1], c[2], c[3], c[4], c[5], c[6]))

## Part 4: Show `xlogit` estimates

### Artificial dataset

In [None]:
device.enable_gpu_acceleration()
df = pd.read_csv(data_folder+"/artificial_long.csv")  
varnames = ['price', 'time', 'conven', 'comfort', 'meals', 'petfr',
            'emipp', 'nonsig1', 'nonsig2', 'nonsig3']

np.random.seed(0)
model = MixedLogit()
model.fit(X=df[varnames], y=df['choice'], varnames=varnames, alts=df['alt'],
          ids=df['id'], randvars={'meals': 'n', 'petfr': 'n', 'emipp': 'n'},
          n_draws=400)
model.summary()

### Electricity dataset

In [None]:
df = pd.read_csv(data_folder+"/electricity_long.csv")
varnames = ['pf', 'cl', 'loc', 'wk', 'tod', 'seas']

np.random.seed(0)
model = MixedLogit()
model.fit(X=df[varnames], y=df['choice'], varnames=varnames, alts=df['alt'],
          ids=df['id'], panels=df['chid'], randvars={'pf': 'n', 'cl': 'n',
          'loc': 'n', 'wk': 'n', 'tod': 'n', 'seas': 'n'}, n_draws=600)
model.summary()

### Fishing dataset

In [None]:
df = pd.read_csv(data_folder+"/fishing_long.csv")
X = df[['price', 'catch']]
y = df['choice']

np.random.seed(0)
model = MixedLogit()
model.fit(X, y,  varnames=['price', 'catch'], alts=df['alt'], ids=df['id'],
          randvars={'price': 'n', 'catch': 'n'}, n_draws=1000)
model.summary()