In [None]:
# default_exp MNL

# MNL

> API details.

In [None]:
#hide
from nbdev.showdoc import *

In [None]:
import math
import pickle
import numpy as np
import pandas as pd
import torch

### Load data set

In [None]:
data = pickle.load(open('./data/swissmetro_clean.pkl', "rb"))
data.shape, type(data)

((10692, 28), pandas.core.frame.DataFrame)

In [None]:
data['CHOICE'].value_counts()

2    6199
3    3080
1    1413
Name: CHOICE, dtype: int64

In [None]:
# Preprocessing

data['TRAIN_AV'] = data['TRAIN_AV'] * (data['SP'] != 0)
data['CAR_AV'] = data['CAR_AV'] * (data['SP'] != 0)

data['SM_CO'] = data['SM_CO'] * (data['GA'] == 0)
data['TRAIN_CO'] = data['TRAIN_CO'] * (data['GA'] == 0)

cols = ['TRAIN_TT','TRAIN_CO', 'SM_TT', 'SM_CO', 'CAR_TT', 'CAR_CO']
data[cols] = data[cols]/100.0

#### Utility function:

* V1 = ASC_TRAIN + B_TIME * TRAIN_TT_SCALED + B_COST * TRAIN_COST_SCALED
* V2 = ASC_SM    + B_TIME * SM_TT_SCALED    + B_COST * SM_COST_SCALED
* V3 = ASC_CAR   + B_TIME * CAR_TT_SCALED   + B_COST * CAR_CO_SCALED

av = {1: TRAIN_AV_SP, 2: SM_AV, 3: CAR_AV_SP}

CHOICE: 1: Train, 2: SM, 3: Car


### MNL test with MSELoss

In [None]:
class MNL(torch.nn.Module):
    
    def __init__(self):
        super().__init__()
        self.ASC_TRAIN = torch.nn.Parameter(torch.full((),2.0))
        self.ASC_SM    = torch.nn.Parameter(torch.full((),1.0))
        self.ASC_CAR   = torch.nn.Parameter(torch.full((),1.0))
        self.B_TIME    = torch.nn.Parameter(torch.full((),-1.0))
        self.B_COST    = torch.nn.Parameter(torch.full((),-0.5))        
        
    def forward(self, x):
        # Calculate V
        V1 = self.ASC_TRAIN + self.B_TIME * torch.tensor(x['TRAIN_TT'].values, dtype=torch.float) + self.B_COST * torch.tensor(x['TRAIN_CO'].values, dtype=torch.float)
        V2 = self.ASC_SM    + self.B_TIME * torch.tensor(x['SM_TT'].values, dtype=torch.float)    + self.B_COST * torch.tensor(x['SM_CO'].values, dtype=torch.float)
        V3 = self.ASC_CAR   + self.B_TIME * torch.tensor(x['CAR_TT'].values, dtype=torch.float)   + self.B_COST * torch.tensor(x['CAR_CO'].values, dtype=torch.float)
        
        SUM = torch.exp(V1) + torch.exp(V2) + torch.exp(V3)
        P1 = torch.exp(V1)/SUM
        P2 = torch.exp(V2)/SUM
        P3 = torch.exp(V3)/SUM
        
        ALTS = {'TRAIN': 1, 'SM': 2, 'CAR':3}
        output = P1 * ALTS['TRAIN'] + P2 * ALTS['SM'] + P3 * ALTS['CAR']
        return output
    
    def string(self):
        return f'ASC_TRAIN={self.ASC_TRAIN}, ASC_SM={self.ASC_SM}, ASC_CAR={self.ASC_CAR}, B_TIME={self.B_TIME}, B_COST={self.B_COST}'

In [None]:
# Data prepraration
x = data[cols]
y = data['CHOICE']
y = torch.tensor(y.values,dtype=torch.float)
x.shape, y.shape

((10692, 6), torch.Size([10692]))

In [None]:
model = MNL()
lr = 1e-9
criterion = torch.nn.MSELoss(reduction='sum')
optimizer = torch.optim.SGD(model.parameters(),lr=lr)

for t in range(2000):
    # Forward
    y_pred = model(x)
    # Compute loss
    loss = criterion(y_pred, y)
    if t % 100 == 99:
        print(t, loss)
    # Backward
    mode.zero_grad()
    loss.backward()
    optimizer.step()    

99 tensor(8372.7666, grad_fn=<MseLossBackward>)
199 tensor(8215.6748, grad_fn=<MseLossBackward>)
299 tensor(7981.7471, grad_fn=<MseLossBackward>)
399 tensor(7708.0303, grad_fn=<MseLossBackward>)
499 tensor(7435.6187, grad_fn=<MseLossBackward>)
599 tensor(7200.6538, grad_fn=<MseLossBackward>)
699 tensor(7027.1870, grad_fn=<MseLossBackward>)
799 tensor(6924.5605, grad_fn=<MseLossBackward>)
899 tensor(6889.4072, grad_fn=<MseLossBackward>)
999 tensor(6910.1782, grad_fn=<MseLossBackward>)
1099 tensor(6971.7646, grad_fn=<MseLossBackward>)
1199 tensor(7058.8242, grad_fn=<MseLossBackward>)
1299 tensor(7157.6372, grad_fn=<MseLossBackward>)
1399 tensor(7256.8555, grad_fn=<MseLossBackward>)
1499 tensor(7347.6250, grad_fn=<MseLossBackward>)
1599 tensor(7423.4067, grad_fn=<MseLossBackward>)
1699 tensor(7479.6465, grad_fn=<MseLossBackward>)
1799 tensor(7513.3608, grad_fn=<MseLossBackward>)
1899 tensor(7522.3906, grad_fn=<MseLossBackward>)
1999 tensor(7503.1631, grad_fn=<MseLossBackward>)


In [None]:
model.string()

'ASC_TRAIN=-0.07177126407623291, ASC_SM=1.6048898696899414, ASC_CAR=2.466876268386841, B_TIME=-1.4506571292877197, B_COST=-0.15838482975959778'

### MNL test with Negative Loglikelihood

In [None]:
class MNL(torch.nn.Module):
    
    def __init__(self):
        super().__init__()
        self.ASC_TRAIN = torch.nn.Parameter(torch.full((),0.1))
        self.ASC_SM    = torch.tensor([0]) # Fixed ASC_SM = 1 like in biogeme
        self.ASC_CAR   = torch.nn.Parameter(torch.full((),0.1))
        self.B_TIME    = torch.nn.Parameter(torch.full((),0.1))
        self.B_COST    = torch.nn.Parameter(torch.full((),0.1))        
        
    def forward(self, x, av=None):
        # Calculate V
        V1 = self.ASC_TRAIN + self.B_TIME * torch.tensor(x['TRAIN_TT'].values, dtype=torch.float) + self.B_COST * torch.tensor(x['TRAIN_CO'].values, dtype=torch.float)
        V2 = self.ASC_SM    + self.B_TIME * torch.tensor(x['SM_TT'].values, dtype=torch.float)    + self.B_COST * torch.tensor(x['SM_CO'].values, dtype=torch.float)
        V3 = self.ASC_CAR   + self.B_TIME * torch.tensor(x['CAR_TT'].values, dtype=torch.float)   + self.B_COST * torch.tensor(x['CAR_CO'].values, dtype=torch.float)
        # Join with availability
        V1 = V1 * torch.tensor(av['TRAIN_AV'].values, dtype=torch.float)
        V2 = V2 * torch.tensor(av['SM_AV'].values, dtype=torch.float)
        V3 = V3 * torch.tensor(av['CAR_AV'].values, dtype=torch.float)
        # Concat into one matrix
        V = torch.cat((V1.unsqueeze(-1),V2.unsqueeze(-1),V3.unsqueeze(-1)),1)
        # Get probality and loglikelihood
        probs = V.exp()/(V.exp().sum(-1,keepdim=True))
        return probs
    
  
    def string(self):
        return f'ASC_TRAIN={self.ASC_TRAIN}, ASC_SM={self.ASC_SM}, ASC_CAR={self.ASC_CAR}, B_TIME={self.B_TIME}, B_COST={self.B_COST}'

In [None]:
# Data prepraration
x = data[cols]

y = data['CHOICE']
y = torch.tensor(y.values,dtype=torch.long)
y = y - 1

av = data[['TRAIN_AV','CAR_AV','SM_AV']]

x.shape, y.shape

((10692, 6), torch.Size([10692]))

In [None]:
def nll(probs, target): return -probs[range(target.shape[0]), target].log().sum()

model = MNL()
lr = 1e-6
optimizer = torch.optim.SGD(model.parameters(),lr=lr)

for t in range(2000):
    # Forward
    probs = model(x, av)
    # Negative loglikelihood
    loss = nll(probs,y)
    if t % 100 == 99:
        print(t, loss.item())
    # Backward
    mode.zero_grad()
    loss.backward()
    optimizer.step()

99 10703.1953125
199 11286.107421875
299 11835.771484375
399 10371.9091796875
499 12089.0126953125
599 11036.29296875
699 10918.490234375
799 12167.994140625
899 10488.15234375
999 11623.80859375
1099 11591.677734375
1199 10428.90234375
1299 12215.85546875
1399 10625.486328125
1499 11304.4375
1599 11964.42578125
1699 10421.9794921875
1799 11967.2001953125
1899 11292.4248046875
1999 10732.845703125


In [None]:
import torch.nn.functional as F
loss_func = F.cross_entropy
def accuracy(out, yb): return (torch.argmax(out, dim=1)==yb).float().mean()
loss_func(model(x,av), y), accuracy(model(x,av),y)

In [None]:
model.string()

'ASC_TRAIN=-0.31197574734687805, ASC_SM=tensor([0]), ASC_CAR=-0.2863524258136749, B_TIME=-0.38315775990486145, B_COST=-0.394696980714798'

### TasteMNL with Negative Loglikelihood

In [None]:
class TasteMNL(torch.nn.Module):
    
    def __init__(self, N):
        super().__init__()
        self.linear = torch.nn.Linear(5,4)
        self.ASC_SM    = torch.tensor([0])
        self.ASC_TRAIN = torch.zeros(N)   
        self.ASC_CAR = torch.zeros(N)   
        self.B_TIME = torch.zeros(N)   
        self.B_COST = torch.zeros(N)   
        
    def forward(self, z, x, av=None):        
        
        Beta = self.linear(z)
        self.ASC_TRAIN = Beta[:,0]
        self.ASC_CAR = Beta[:,1]
        self.B_TIME = Beta[:,2]
        self.B_COST = Beta[:,3]
        
        # Calculate V
        V1 = self.ASC_TRAIN + self.B_TIME * torch.tensor(x['TRAIN_TT'].values, dtype=torch.float) + self.B_COST * torch.tensor(x['TRAIN_CO'].values, dtype=torch.float)
        V2 = self.ASC_SM    + self.B_TIME * torch.tensor(x['SM_TT'].values, dtype=torch.float)    + self.B_COST * torch.tensor(x['SM_CO'].values, dtype=torch.float)
        V3 = self.ASC_CAR   + self.B_TIME * torch.tensor(x['CAR_TT'].values, dtype=torch.float)   + self.B_COST * torch.tensor(x['CAR_CO'].values, dtype=torch.float)
        # Join with availability
        V1 = V1 * torch.tensor(av['TRAIN_AV'].values, dtype=torch.float)
        V2 = V2 * torch.tensor(av['SM_AV'].values, dtype=torch.float)
        V3 = V3 * torch.tensor(av['CAR_AV'].values, dtype=torch.float)
        # Concat into one matrix
        V = torch.cat((V1.unsqueeze(-1),V2.unsqueeze(-1),V3.unsqueeze(-1)),1)
        # Get probality and loglikelihood
        probs = V.exp()/(V.exp().sum(-1,keepdim=True))
        return probs
    
  
    def string(self):
        return f'ASC_TRAIN={self.ASC_TRAIN}, ASC_SM={self.ASC_SM}, ASC_CAR={self.ASC_CAR}, B_TIME={self.B_TIME}, B_COST={self.B_COST}'

In [None]:
# Data prepraration
# Personal attributes
z = data[['AGE','MALE','INCOME','FIRST','PURPOSE']].copy()
z = torch.tensor(z.values, dtype=torch.float)
# Alternative attributes
x = data[cols]
# Observed choice (Train:0, SM: 1, Car: 2)
y = data['CHOICE']
y = torch.tensor(y.values,dtype=torch.long)
y = y - 1
# Availability
av = data[['TRAIN_AV','SM_AV', 'CAR_AV']]

z.shape, x.shape, y.shape, av.shape

(torch.Size([10692, 5]), (10692, 6), torch.Size([10692]), (10692, 3))

In [None]:
N = z.shape[0]
model = TasteMNL(N)
lr = 1e-3
# optimizer = torch.optim.SGD(model.parameters(),lr=lr)
optimizer =  torch.optim.Adam(model.parameters(), lr = lr, weight_decay = 0.0)
for t in range(2000):
    # Forward
    probs = model(z, x, av)
    # Negative loglikelihood
    loss = nll(probs,y)
    if t % 100 == 99:
        print(t, loss.item())
    # Backward
    mode.zero_grad()
    loss.backward()
    optimizer.step()

99 11442.4404296875
199 10894.7841796875
299 11327.0634765625
399 10889.67578125
499 11191.970703125
599 11697.79296875
699 10631.19921875
799 12638.5673828125
899 11262.587890625
999 11491.1162109375
1099 11567.58203125
1199 12260.7451171875
1299 11954.2138671875
1399 13745.0087890625
1499 11210.6728515625
1599 13940.130859375
1699 14963.1513671875
1799 11559.541015625
1899 15724.818359375
1999 10556.94921875


In [None]:
import torch.nn.functional as F
loss_func = F.cross_entropy
def accuracy(out, yb): return (torch.argmax(out, dim=1)==yb).float().mean()
loss_func(model(z,x,av), y), accuracy(model(z,x,av),y)

(tensor(0.9804, grad_fn=<NllLossBackward>), tensor(0.5614))