In [None]:
# default_exp MNL

# MNL

> API details.

In [8]:
#hide
from nbdev.showdoc import *

In [9]:
import math
import pickle
import numpy as np
import pandas as pd
import torch

### Load data set

In [19]:
data = pd.read_csv('./data/swissmetro_clean.csv')
data.shape, type(data)

((10692, 28), pandas.core.frame.DataFrame)

In [20]:
data['CHOICE'].value_counts()

2    6199
3    3080
1    1413
Name: CHOICE, dtype: int64

In [21]:
# Preprocessing
data['TRAIN_AV'] = data['TRAIN_AV'] * (data['SP'] != 0)
data['CAR_AV'] = data['CAR_AV'] * (data['SP'] != 0)

data['SM_CO'] = data['SM_CO'] * (data['GA'] == 0)
data['TRAIN_CO'] = data['TRAIN_CO'] * (data['GA'] == 0)

cols = ['TRAIN_TT','TRAIN_CO', 'SM_TT', 'SM_CO', 'CAR_TT', 'CAR_CO']
data[cols] = data[cols]/100.0

In [22]:
data.head()

Unnamed: 0,GROUP,SURVEY,SP,ID,PURPOSE,FIRST,TICKET,WHO,LUGGAGE,AGE,...,TRAIN_TT,TRAIN_CO,TRAIN_HE,SM_TT,SM_CO,SM_HE,SM_SEATS,CAR_TT,CAR_CO,CHOICE
0,2,0,1,1,1,0,1,1,0,3,...,1.12,0.48,120,0.63,0.52,20,0,1.17,0.65,2
1,2,0,1,1,1,0,1,1,0,3,...,1.03,0.48,30,0.6,0.49,10,0,1.17,0.84,2
2,2,0,1,1,1,0,1,1,0,3,...,1.3,0.48,60,0.67,0.58,30,0,1.17,0.52,2
3,2,0,1,1,1,0,1,1,0,3,...,1.03,0.4,30,0.63,0.52,20,0,0.72,0.52,2
4,2,0,1,1,1,0,1,1,0,3,...,1.3,0.36,60,0.63,0.42,20,0,0.9,0.84,2


#### Utility function:

* V1 = ASC_TRAIN + B_TIME * TRAIN_TT_SCALED + B_COST * TRAIN_COST_SCALED
* V2 = ASC_SM    + B_TIME * SM_TT_SCALED    + B_COST * SM_COST_SCALED
* V3 = ASC_CAR   + B_TIME * CAR_TT_SCALED   + B_COST * CAR_CO_SCALED

av = {1: TRAIN_AV_SP, 2: SM_AV, 3: CAR_AV_SP}

CHOICE: 1: Train, 2: SM, 3: Car


### MNL test with MSELoss

In [23]:
class MNL(torch.nn.Module):
    
    def __init__(self):
        super().__init__()
        self.ASC_TRAIN = torch.nn.Parameter(torch.full((),2.0))
        self.ASC_SM    = torch.nn.Parameter(torch.full((),1.0))
        self.ASC_CAR   = torch.nn.Parameter(torch.full((),1.0))
        self.B_TIME    = torch.nn.Parameter(torch.full((),-1.0))
        self.B_COST    = torch.nn.Parameter(torch.full((),-0.5))        
        
    def forward(self, x):
        # Calculate V
        V1 = self.ASC_TRAIN + self.B_TIME * torch.tensor(x['TRAIN_TT'].values, dtype=torch.float) + self.B_COST * torch.tensor(x['TRAIN_CO'].values, dtype=torch.float)
        V2 = self.ASC_SM    + self.B_TIME * torch.tensor(x['SM_TT'].values, dtype=torch.float)    + self.B_COST * torch.tensor(x['SM_CO'].values, dtype=torch.float)
        V3 = self.ASC_CAR   + self.B_TIME * torch.tensor(x['CAR_TT'].values, dtype=torch.float)   + self.B_COST * torch.tensor(x['CAR_CO'].values, dtype=torch.float)
        
        SUM = torch.exp(V1) + torch.exp(V2) + torch.exp(V3)
        P1 = torch.exp(V1)/SUM
        P2 = torch.exp(V2)/SUM
        P3 = torch.exp(V3)/SUM
        
        ALTS = {'TRAIN': 1, 'SM': 2, 'CAR':3}
        output = P1 * ALTS['TRAIN'] + P2 * ALTS['SM'] + P3 * ALTS['CAR']
        return output
    
    def string(self):
        return f'ASC_TRAIN={self.ASC_TRAIN}, ASC_SM={self.ASC_SM}, ASC_CAR={self.ASC_CAR}, B_TIME={self.B_TIME}, B_COST={self.B_COST}'

In [24]:
# Data prepraration
x = data[cols]
y = data['CHOICE']
y = torch.tensor(y.values,dtype=torch.float)
x.shape, y.shape

((10692, 6), torch.Size([10692]))

In [25]:
model = MNL()
lr = 1e-9
criterion = torch.nn.MSELoss(reduction='sum')
optimizer = torch.optim.SGD(model.parameters(),lr=lr)

for t in range(2000):
    # Forward
    y_pred = model(x)
    # Compute loss
    loss = criterion(y_pred, y)
    if t % 100 == 99:
        print(t, loss)
    # Backward
    model.zero_grad()
    loss.backward()
    optimizer.step()    

99 tensor(5836.6802, grad_fn=<MseLossBackward>)
199 tensor(5835.8643, grad_fn=<MseLossBackward>)
299 tensor(5835.0493, grad_fn=<MseLossBackward>)
399 tensor(5834.2344, grad_fn=<MseLossBackward>)
499 tensor(5833.4204, grad_fn=<MseLossBackward>)
599 tensor(5832.6055, grad_fn=<MseLossBackward>)
699 tensor(5831.7910, grad_fn=<MseLossBackward>)
799 tensor(5830.9780, grad_fn=<MseLossBackward>)
899 tensor(5830.1646, grad_fn=<MseLossBackward>)
999 tensor(5829.3516, grad_fn=<MseLossBackward>)
1099 tensor(5828.5391, grad_fn=<MseLossBackward>)
1199 tensor(5827.7271, grad_fn=<MseLossBackward>)
1299 tensor(5826.9141, grad_fn=<MseLossBackward>)
1399 tensor(5826.1035, grad_fn=<MseLossBackward>)
1499 tensor(5825.2920, grad_fn=<MseLossBackward>)
1599 tensor(5824.4819, grad_fn=<MseLossBackward>)
1699 tensor(5823.6714, grad_fn=<MseLossBackward>)
1799 tensor(5822.8613, grad_fn=<MseLossBackward>)
1899 tensor(5822.0518, grad_fn=<MseLossBackward>)
1999 tensor(5821.2427, grad_fn=<MseLossBackward>)


In [26]:
model.string()

'ASC_TRAIN=1.9954700469970703, ASC_SM=1.0011920928955078, ASC_CAR=1.0030994415283203, B_TIME=-1.0009536743164062, B_COST=-0.49970197677612305'

### MNL test with Negative Loglikelihood

In [27]:
class MNL(torch.nn.Module):
    
    def __init__(self):
        super().__init__()
        self.ASC_TRAIN = torch.nn.Parameter(torch.full((),0.1))
        self.ASC_SM    = torch.tensor([0]) # Fixed ASC_SM = 1 like in biogeme
        self.ASC_CAR   = torch.nn.Parameter(torch.full((),0.1))
        self.B_TIME    = torch.nn.Parameter(torch.full((),0.1))
        self.B_COST    = torch.nn.Parameter(torch.full((),0.1))        
        
    def forward(self, x, av=None):
        # Calculate V
        V1 = self.ASC_TRAIN + self.B_TIME * torch.tensor(x['TRAIN_TT'].values, dtype=torch.float) + self.B_COST * torch.tensor(x['TRAIN_CO'].values, dtype=torch.float)
        V2 = self.ASC_SM    + self.B_TIME * torch.tensor(x['SM_TT'].values, dtype=torch.float)    + self.B_COST * torch.tensor(x['SM_CO'].values, dtype=torch.float)
        V3 = self.ASC_CAR   + self.B_TIME * torch.tensor(x['CAR_TT'].values, dtype=torch.float)   + self.B_COST * torch.tensor(x['CAR_CO'].values, dtype=torch.float)
        # Join with availability
        V1 = V1 * torch.tensor(av['TRAIN_AV'].values, dtype=torch.float)
        V2 = V2 * torch.tensor(av['SM_AV'].values, dtype=torch.float)
        V3 = V3 * torch.tensor(av['CAR_AV'].values, dtype=torch.float)
        # Concat into one matrix
        V = torch.cat((V1.unsqueeze(-1),V2.unsqueeze(-1),V3.unsqueeze(-1)),1)
        # Get probality and loglikelihood
        probs = V.exp()/(V.exp().sum(-1,keepdim=True))
        return probs
    
  
    def string(self):
        return f'ASC_TRAIN={self.ASC_TRAIN}, ASC_SM={self.ASC_SM}, ASC_CAR={self.ASC_CAR}, B_TIME={self.B_TIME}, B_COST={self.B_COST}'

In [28]:
# Data prepraration
x = data[cols]

y = data['CHOICE']
y = torch.tensor(y.values,dtype=torch.long)
y = y - 1

av = data[['TRAIN_AV','CAR_AV','SM_AV']]

x.shape, y.shape

((10692, 6), torch.Size([10692]))

In [37]:
def nll(probs, target): return -probs[range(target.shape[0]), target].log().sum()

model = MNL()
lr = 1e-6
optimizer = torch.optim.SGD(model.parameters(),lr=lr)

for t in range(2000):
    # Forward
    probs = model(x, av)
    # Negative loglikelihood
    loss = nll(probs,y)
    if t % 100 == 99:
        print(t, loss.item())
    # Backward
    model.zero_grad()
    loss.backward()
    optimizer.step()

99 11319.28515625
199 10877.2060546875
299 10651.65625
399 10529.3466796875
499 10459.466796875
599 10417.56640625
699 10391.2197265625
799 10373.8291015625
899 10361.771484375
999 10352.9892578125
1099 10346.2900390625
1199 10340.962890625
1299 10336.5732421875
1399 10332.8525390625
1499 10329.625
1599 10326.77734375
1699 10324.2353515625
1799 10321.943359375
1899 10319.8603515625
1999 10317.9619140625


In [40]:
import torch.nn.functional as F
loss_func = F.cross_entropy
def accuracy(out, yb): return (torch.argmax(out, dim=1)==yb).float().mean()

loss_func(model(x,av), y), accuracy(model(x,av),y)

(tensor(1.0285, grad_fn=<NllLossBackward>), tensor(0.5190))

In [41]:
model.string()

'ASC_TRAIN=-0.8027235269546509, ASC_SM=tensor([0]), ASC_CAR=-0.10149882733821869, B_TIME=-0.5671002268791199, B_COST=-0.1523554027080536'

### TasteMNL with Negative Loglikelihood

In [46]:
class TasteMNL(torch.nn.Module):
    
    def __init__(self, N):
        super().__init__()
        self.linear = torch.nn.Linear(5,4)
        self.ASC_SM    = torch.tensor([0])
        self.ASC_TRAIN = torch.zeros(N)   
        self.ASC_CAR = torch.zeros(N)   
        self.B_TIME = torch.zeros(N)   
        self.B_COST = torch.zeros(N)   
        
    def forward(self, z, x, av=None):        
        
        Beta = self.linear(z)
        self.ASC_TRAIN = Beta[:,0]
        self.ASC_CAR = Beta[:,1]
        self.B_TIME = Beta[:,2]
        self.B_COST = Beta[:,3]
        
        # Calculate V
        V1 = self.ASC_TRAIN + self.B_TIME * torch.tensor(x['TRAIN_TT'].values, dtype=torch.float) + \
            self.B_COST * torch.tensor(x['TRAIN_CO'].values, dtype=torch.float)
        V2 = self.ASC_SM    + self.B_TIME * torch.tensor(x['SM_TT'].values, dtype=torch.float)    + \
            self.B_COST * torch.tensor(x['SM_CO'].values, dtype=torch.float)
        V3 = self.ASC_CAR   + self.B_TIME * torch.tensor(x['CAR_TT'].values, dtype=torch.float)   + \
            self.B_COST * torch.tensor(x['CAR_CO'].values, dtype=torch.float)
        # Join with availability
        V1 = V1 * torch.tensor(av['TRAIN_AV'].values, dtype=torch.float)
        V2 = V2 * torch.tensor(av['SM_AV'].values, dtype=torch.float)
        V3 = V3 * torch.tensor(av['CAR_AV'].values, dtype=torch.float)
        # Concat into one matrix
        V = torch.cat((V1.unsqueeze(-1),V2.unsqueeze(-1),V3.unsqueeze(-1)),1)
        # Get probality and loglikelihood
        probs = V.exp()/(V.exp().sum(-1,keepdim=True))
        return probs
    
  
    def string(self):
        return f'ASC_TRAIN={self.ASC_TRAIN}, ASC_SM={self.ASC_SM}, ASC_CAR={self.ASC_CAR}, B_TIME={self.B_TIME}, B_COST={self.B_COST}'

In [47]:
# Data prepraration
# Personal attributes
z = data[['AGE','MALE','INCOME','FIRST','PURPOSE']].copy()
z = torch.tensor(z.values, dtype=torch.float)
# Alternative attributes
x = data[cols]
# Observed choice (Train:0, SM: 1, Car: 2)
y = data['CHOICE']
y = torch.tensor(y.values,dtype=torch.long)
y = y - 1
# Availability
av = data[['TRAIN_AV','SM_AV', 'CAR_AV']]

z.shape, x.shape, y.shape, av.shape

(torch.Size([10692, 5]), (10692, 6), torch.Size([10692]), (10692, 3))

In [48]:
N = z.shape[0]
model = TasteMNL(N)
lr = 1e-3
# optimizer = torch.optim.SGD(model.parameters(),lr=lr)
optimizer =  torch.optim.Adam(model.parameters(), lr = lr, weight_decay = 0.0)
for t in range(2000):
    # Forward
    probs = model(z, x, av)
    # Negative loglikelihood
    loss = nll(probs,y)
    if t % 100 == 99:
        print(t, loss.item())
    # Backward
    model.zero_grad()
    loss.backward()
    optimizer.step()

99 11177.083984375
199 10593.2978515625
299 10391.990234375
399 10254.3349609375
499 10151.583984375
599 10069.798828125
699 10002.2939453125
799 9945.92578125
899 9898.90234375
999 9859.8759765625
1099 9827.6494140625
1199 9801.1435546875
1299 9779.37890625
1399 9761.494140625
1499 9746.7451171875
1599 9734.505859375
1699 9724.2587890625
1799 9715.5859375
1899 9708.1611328125
1999 9701.7265625


In [49]:
import torch.nn.functional as F
loss_func = F.cross_entropy
def accuracy(out, yb): return (torch.argmax(out, dim=1)==yb).float().mean()
loss_func(model(z,x,av), y), accuracy(model(z,x,av),y)

(tensor(0.9994, grad_fn=<NllLossBackward>), tensor(0.5521))