In [None]:
# default_exp MNL

# MNL

> API details.

In [None]:
#hide
from nbdev.showdoc import *

In [None]:
import math
import pickle
import numpy as np
import pandas as pd
import torch

### Load data set

In [None]:
# data = pd.read_csv('./data/swissmetro_clean.csv')
data = pd.read_csv('./data/swissmetro_train.csv')
data.shape, type(data)

((7484, 28), pandas.core.frame.DataFrame)

In [None]:
data['CHOICE'].value_counts()

2    4339
3    2156
1     989
Name: CHOICE, dtype: int64

In [None]:
# Preprocessing
def cleaning(data): 
    data['TRAIN_AV'] = data['TRAIN_AV'] * (data['SP'] != 0)
    data['CAR_AV'] = data['CAR_AV'] * (data['SP'] != 0)

    data['SM_CO'] = data['SM_CO'] * (data['GA'] == 0)
    data['TRAIN_CO'] = data['TRAIN_CO'] * (data['GA'] == 0)

    cols = ['TRAIN_TT','TRAIN_CO', 'SM_TT', 'SM_CO', 'CAR_TT', 'CAR_CO']
    data[cols] = data[cols]/100.0
    return data

In [None]:
data = cleaning(data)
data.head()

Unnamed: 0,GROUP,SURVEY,SP,ID,PURPOSE,FIRST,TICKET,WHO,LUGGAGE,AGE,...,TRAIN_TT,TRAIN_CO,TRAIN_HE,SM_TT,SM_CO,SM_HE,SM_SEATS,CAR_TT,CAR_CO,CHOICE
0,3,1,1,651,3,0,3,3,0,4,...,1.82,1.11,30,0.81,1.32,30,0,2.1,1.55,3
1,2,0,1,17,1,0,2,1,0,1,...,1.32,0.21,30,0.54,0.28,20,0,1.17,0.25,2
2,3,1,1,916,3,1,3,2,1,4,...,2.2,2.21,60,1.62,2.25,20,0,1.44,0.84,3
3,3,1,1,965,4,0,1,1,0,5,...,2.06,0.6,30,1.58,0.72,20,0,1.6,1.1,2
4,3,1,1,1127,4,1,1,1,1,4,...,2.64,1.09,120,0.93,1.5,10,0,3.38,2.79,2


#### Utility function:

* V1 = ASC_TRAIN + B_TIME * TRAIN_TT_SCALED + B_COST * TRAIN_COST_SCALED
* V2 = ASC_SM    + B_TIME * SM_TT_SCALED    + B_COST * SM_COST_SCALED
* V3 = ASC_CAR   + B_TIME * CAR_TT_SCALED   + B_COST * CAR_CO_SCALED

av = {1: TRAIN_AV_SP, 2: SM_AV, 3: CAR_AV_SP}

CHOICE: 1: Train, 2: SM, 3: Car


### MNL test with MSELoss

In [None]:
class MNL(torch.nn.Module):
    
    def __init__(self):
        super().__init__()
        self.ASC_TRAIN = torch.nn.Parameter(torch.full((),2.0))
        self.ASC_SM    = torch.nn.Parameter(torch.full((),1.0))
        self.ASC_CAR   = torch.nn.Parameter(torch.full((),1.0))
        self.B_TIME    = torch.nn.Parameter(torch.full((),-1.0))
        self.B_COST    = torch.nn.Parameter(torch.full((),-0.5))        
        
    def forward(self, x):
        # Calculate V
        V1 = self.ASC_TRAIN + self.B_TIME * torch.tensor(x['TRAIN_TT'].values, dtype=torch.float) + self.B_COST * torch.tensor(x['TRAIN_CO'].values, dtype=torch.float)
        V2 = self.ASC_SM    + self.B_TIME * torch.tensor(x['SM_TT'].values, dtype=torch.float)    + self.B_COST * torch.tensor(x['SM_CO'].values, dtype=torch.float)
        V3 = self.ASC_CAR   + self.B_TIME * torch.tensor(x['CAR_TT'].values, dtype=torch.float)   + self.B_COST * torch.tensor(x['CAR_CO'].values, dtype=torch.float)
        
        SUM = torch.exp(V1) + torch.exp(V2) + torch.exp(V3)
        P1 = torch.exp(V1)/SUM
        P2 = torch.exp(V2)/SUM
        P3 = torch.exp(V3)/SUM
        
        ALTS = {'TRAIN': 1, 'SM': 2, 'CAR':3}
        output = P1 * ALTS['TRAIN'] + P2 * ALTS['SM'] + P3 * ALTS['CAR']
        return output
    
    def string(self):
        return f'ASC_TRAIN={self.ASC_TRAIN}, ASC_SM={self.ASC_SM}, ASC_CAR={self.ASC_CAR}, B_TIME={self.B_TIME}, B_COST={self.B_COST}'

In [None]:
# Data prepraration
x = data[cols]
y = data['CHOICE']
y = torch.tensor(y.values,dtype=torch.float)
x.shape, y.shape

((7484, 6), torch.Size([7484]))

In [None]:
model = MNL()
lr = 1e-9
criterion = torch.nn.MSELoss(reduction='sum')
optimizer = torch.optim.SGD(model.parameters(),lr=lr)

for t in range(2000):
    # Forward
    y_pred = model(x)
    # Compute loss
    loss = criterion(y_pred, y)
    if t % 100 == 99:
        print(t, loss)
    # Backward
    model.zero_grad()
    loss.backward()
    optimizer.step()    

99 tensor(4071.1445, grad_fn=<MseLossBackward>)
199 tensor(4070.7488, grad_fn=<MseLossBackward>)
299 tensor(4070.3530, grad_fn=<MseLossBackward>)
399 tensor(4069.9578, grad_fn=<MseLossBackward>)
499 tensor(4069.5625, grad_fn=<MseLossBackward>)
599 tensor(4069.1670, grad_fn=<MseLossBackward>)
699 tensor(4068.7720, grad_fn=<MseLossBackward>)
799 tensor(4068.3770, grad_fn=<MseLossBackward>)
899 tensor(4067.9822, grad_fn=<MseLossBackward>)
999 tensor(4067.5874, grad_fn=<MseLossBackward>)
1099 tensor(4067.1929, grad_fn=<MseLossBackward>)
1199 tensor(4066.7983, grad_fn=<MseLossBackward>)
1299 tensor(4066.4043, grad_fn=<MseLossBackward>)
1399 tensor(4066.0100, grad_fn=<MseLossBackward>)
1499 tensor(4065.6157, grad_fn=<MseLossBackward>)
1599 tensor(4065.2217, grad_fn=<MseLossBackward>)
1699 tensor(4064.8276, grad_fn=<MseLossBackward>)
1799 tensor(4064.4341, grad_fn=<MseLossBackward>)
1899 tensor(4064.0405, grad_fn=<MseLossBackward>)
1999 tensor(4063.6470, grad_fn=<MseLossBackward>)


In [None]:
model.string()

'ASC_TRAIN=1.9969005584716797, ASC_SM=1.0009536743164062, ASC_CAR=1.002145767211914, B_TIME=-1.0007152557373047, B_COST=-0.49982118606567383'

### MNL test with Negative Loglikelihood

In [None]:
class MNL(torch.nn.Module):
    
    def __init__(self):
        super().__init__()
        self.ASC_TRAIN = torch.nn.Parameter(torch.full((),0.1))
        self.ASC_SM    = torch.tensor([0]) # Fixed ASC_SM = 1 like in biogeme
        self.ASC_CAR   = torch.nn.Parameter(torch.full((),0.1))
        self.B_TIME    = torch.nn.Parameter(torch.full((),0.1))
        self.B_COST    = torch.nn.Parameter(torch.full((),0.1))        
        
    def forward(self, x, av=None):
        # Calculate V
        V1 = self.ASC_TRAIN + self.B_TIME * torch.tensor(x['TRAIN_TT'].values, dtype=torch.float) + self.B_COST * torch.tensor(x['TRAIN_CO'].values, dtype=torch.float)
        V2 = self.ASC_SM    + self.B_TIME * torch.tensor(x['SM_TT'].values, dtype=torch.float)    + self.B_COST * torch.tensor(x['SM_CO'].values, dtype=torch.float)
        V3 = self.ASC_CAR   + self.B_TIME * torch.tensor(x['CAR_TT'].values, dtype=torch.float)   + self.B_COST * torch.tensor(x['CAR_CO'].values, dtype=torch.float)
        # Join with availability
        V1 = V1 * torch.tensor(av['TRAIN_AV'].values, dtype=torch.float)
        V2 = V2 * torch.tensor(av['SM_AV'].values, dtype=torch.float)
        V3 = V3 * torch.tensor(av['CAR_AV'].values, dtype=torch.float)
        # Concat into one matrix
        V = torch.cat((V1.unsqueeze(-1),V2.unsqueeze(-1),V3.unsqueeze(-1)),1)
        # Get probality and loglikelihood
        probs = V.exp()/(V.exp().sum(-1,keepdim=True))
        return probs
    
  
    def string(self):
        return f'ASC_TRAIN={self.ASC_TRAIN}, ASC_SM={self.ASC_SM}, ASC_CAR={self.ASC_CAR}, B_TIME={self.B_TIME}, B_COST={self.B_COST}'

In [None]:
# Data prepraration
x = data[cols]

y = data['CHOICE']
y = torch.tensor(y.values,dtype=torch.long)
y = y - 1

av = data[['TRAIN_AV','CAR_AV','SM_AV']]

x.shape, y.shape

((7484, 6), torch.Size([7484]))

In [None]:
def nll(probs, target): return -probs[range(target.shape[0]), target].log().sum()

model = MNL()
lr = 1e-6
optimizer = torch.optim.SGD(model.parameters(),lr=lr)

for t in range(2000):
    # Forward
    probs = model(x, av)
    # Negative loglikelihood
    loss = nll(probs,y)
    if t % 100 == 99:
        print(t, loss.item())
    # Backward
    model.zero_grad()
    loss.backward()
    optimizer.step()

99 8071.12451171875
199 7776.70166015625
299 7597.798828125
399 7485.1640625
499 7412.060546875
599 7363.3046875
699 7329.9541015625
799 7306.57568359375
899 7289.78564453125
999 7277.43017578125
1099 7268.1083984375
1199 7260.8984375
1299 7255.181640625
1399 7250.541015625
1499 7246.685546875
1599 7243.41650390625
1699 7240.5927734375
1799 7238.11474609375
1899 7235.90771484375
1999 7233.9208984375


In [None]:
import torch.nn.functional as F
loss_func = F.cross_entropy
def accuracy(out, yb): return (torch.argmax(out, dim=1)==yb).float().mean()

loss_func(model(x,av), y), accuracy(model(x,av),y)

(tensor(1.0302, grad_fn=<NllLossBackward>), tensor(0.5239))

In [None]:
model.string()

'ASC_TRAIN=-0.7364017963409424, ASC_SM=tensor([0]), ASC_CAR=-0.06861856579780579, B_TIME=-0.5858228206634521, B_COST=-0.1433737725019455'

### TasteMNL with Negative Loglikelihood

In [None]:
import torch.nn.functional as F

In [None]:
class TasteMNL(torch.nn.Module):
    
    def __init__(self, N):
        super().__init__()
        self.linear1 = torch.nn.Linear(8,50)
        self.linear2 = torch.nn.Linear(50,12)
        self.linear3 = torch.nn.Linear(12,4)
        self.ASC_SM    = torch.tensor([0])
        self.ASC_TRAIN = torch.zeros(N)   
        self.ASC_CAR = torch.zeros(N)   
        self.B_TIME = torch.zeros(N)   
        self.B_COST = torch.zeros(N)   
        
    def forward(self, z, x, av=None):        
        
        Beta = F.relu(self.linear1(z))
        Beta = F.relu(self.linear2(Beta))
        Beta = self.linear3(Beta)
        self.ASC_TRAIN = Beta[:,0]
        self.ASC_CAR = Beta[:,1]
        self.B_TIME = Beta[:,2]
        self.B_COST = Beta[:,3]
        
        # Calculate V
        V1 = self.ASC_TRAIN + self.B_TIME * torch.tensor(x['TRAIN_TT'].values, dtype=torch.float) + \
            self.B_COST * torch.tensor(x['TRAIN_CO'].values, dtype=torch.float)
        V2 = self.ASC_SM    + self.B_TIME * torch.tensor(x['SM_TT'].values, dtype=torch.float)    + \
            self.B_COST * torch.tensor(x['SM_CO'].values, dtype=torch.float)
        V3 = self.ASC_CAR   + self.B_TIME * torch.tensor(x['CAR_TT'].values, dtype=torch.float)   + \
            self.B_COST * torch.tensor(x['CAR_CO'].values, dtype=torch.float)
        # Join with availability
        V1 = V1 * torch.tensor(av['TRAIN_AV'].values, dtype=torch.float)
        V2 = V2 * torch.tensor(av['SM_AV'].values, dtype=torch.float)
        V3 = V3 * torch.tensor(av['CAR_AV'].values, dtype=torch.float)
        # Concat into one matrix
        V = torch.cat((V1.unsqueeze(-1),V2.unsqueeze(-1),V3.unsqueeze(-1)),1)
        # Get probality and loglikelihood
        probs = V.exp()/(V.exp().sum(-1,keepdim=True))
        return probs
    
  
    def string(self):
        return f'ASC_TRAIN={self.ASC_TRAIN}, ASC_SM={self.ASC_SM}, ASC_CAR={self.ASC_CAR}, B_TIME={self.B_TIME}, B_COST={self.B_COST}'

In [None]:
# Data prepraration
# Personal attributes
z = data[['AGE','MALE','INCOME','FIRST','PURPOSE','GA','WHO', 'LUGGAGE']].copy()
z = torch.tensor(z.values, dtype=torch.float)
# Alternative attributesnll
x = data[cols]
# Observed choice (Train:0, SM: 1, Car: 2)
y = data['CHOICE']
y = torch.tensor(y.values,dtype=torch.long)
y = y - 1
# Availability
av = data[['TRAIN_AV','SM_AV', 'CAR_AV']]

z.shape, x.shape, y.shape, av.shape

(torch.Size([7484, 8]), (7484, 6), torch.Size([7484]), (7484, 3))

In [None]:
# N = z.shape[0]
# model = TasteMNL(N)
# lrs = [3e-3, 1e-3, 5e-4, 1e-4]
# for i in range(len(lrs)):
#     optimizer =  torch.optim.Adam(model.parameters(), lr = lrs[i], weight_decay = 0.0)
#     for t in range(12000):
#         # Forward
#         probs = model(z, x, av)
#         # Negative loglikelihood
#         loss = nll(probs,y)
#         if t % 2000 == 0:
#             print(i, t, loss.item())
#         # Backward
#         model.zero_grad()
#         loss.backward()
#         optimizer.step()

0.003
0 0 8695.7451171875
0 2000 4906.64013671875
0 4000 4712.39013671875
0 6000 4615.0185546875
0 8000 4557.451171875
0 10000 4513.49853515625
0.001
1 0 4491.91748046875
1 2000 4472.71875
1 4000 4454.26904296875
1 6000 4438.056640625
1 8000 4422.37646484375
1 10000 4412.49658203125
0.0005
2 0 4404.4248046875
2 2000 4393.63330078125
2 4000 4384.96923828125
2 6000 4378.716796875
2 8000 4372.66357421875
2 10000 4367.78564453125
0.0001
3 0 4362.2783203125
3 2000 4359.9150390625
3 4000 4357.2451171875
3 6000 4354.8203125
3 8000 4352.54443359375
3 10000 4350.1650390625


In [None]:
N = z.shape[0]
model = TasteMNL(N)
lr = 3e-3
# optimizer = torch.optim.SGD(model.parameters(),lr=lr)
optimizer =  torch.optim.Adam(model.parameters(), lr = lr, weight_decay = 0.0)
for t in range(12000):
    # Forward
    probs = model(z, x, av)
    # Negative loglikelihood
    loss = nll(probs,y)
    if t % 1000 == 0:
        print(t, loss.item())
    # Backward
    model.zero_grad()
    loss.backward()
    optimizer.step()

0 7624.8876953125
1000 4736.88427734375
2000 4543.5029296875
3000 4460.92138671875
4000 4413.04248046875
5000 4370.56494140625
6000 4332.6181640625
7000 4301.42236328125
8000 4268.51123046875
9000 4249.654296875
10000 4228.23046875
11000 4216.103515625


In [None]:
lr = 1e-3
optimizer =  torch.optim.Adam(model.parameters(), lr = lr, weight_decay = 0.0)
for t in range(12000):
    # Forward
    probs = model(z, x, av)
    # Negative loglikelihood
    loss = nll(probs,y)
    if t % 1000 == 0:
        print(t, loss.item())
    # Backward
    model.zero_grad()
    loss.backward()
    optimizer.step()

0 4199.02685546875
1000 4191.92431640625
2000 4182.482421875
3000 4174.251953125
4000 4165.7646484375
5000 4158.6015625
6000 4148.3466796875
7000 4137.77001953125
8000 4128.26513671875
9000 4122.544921875
10000 4111.57470703125
11000 4103.73681640625


In [None]:
lr = 5e-4
optimizer =  torch.optim.Adam(model.parameters(), lr = lr, weight_decay = 0.0)
for t in range(12000):
    # Forward
    probs = model(z, x, av)
    # Negative loglikelihood
    loss = nll(probs,y)
    if t % 1000 == 0:
        print(t, loss.item())
    # Backward
    model.zero_grad()
    loss.backward()
    optimizer.step()

0 4098.0341796875
1000 4089.334228515625
2000 4082.322265625
3000 4076.16259765625
4000 4070.8017578125
5000 4064.81640625
6000 4059.5166015625
7000 4055.72607421875
8000 4050.66357421875
9000 4046.42724609375
10000 4044.3515625
11000 4036.862060546875


In [None]:
lr = 1e-4
optimizer =  torch.optim.Adam(model.parameters(), lr = lr, weight_decay = 0.0)
for t in range(12000):
    # Forward
    probs = model(z, x, av)
    # Negative loglikelihood
    loss = nll(probs,y)
    if t % 1000 == 0:
        print(t, loss.item())
    # Backward
    model.zero_grad()
    loss.backward()
    optimizer.step()

0 4033.870361328125
1000 4030.62255859375
2000 4028.701171875
3000 4026.5771484375
4000 4024.5498046875
5000 4022.677978515625
6000 4021.061279296875
7000 4019.315673828125
8000 4017.79931640625
9000 4016.43359375
10000 4015.12255859375
11000 4013.778076171875


In [None]:
import torch.nn.functional as F
loss_func = F.cross_entropy
def accuracy(out, yb): return (torch.argmax(out, dim=1)==yb).float().mean()
loss_func(model(z,x,av), y), accuracy(model(z,x,av),y)

(tensor(0.8182, grad_fn=<NllLossBackward>), tensor(0.7636))

In [None]:
model.ASC_TRAIN.mean(), model.ASC_CAR.mean()

(tensor(-2.2415, grad_fn=<MeanBackward0>),
 tensor(-2.3657, grad_fn=<MeanBackward0>))

In [None]:
 model.B_TIME.mean(), model.B_COST.mean()

(tensor(-1.4012, grad_fn=<MeanBackward0>),
 tensor(-1.2147, grad_fn=<MeanBackward0>))

In [None]:
 model.B_TIME.shape, model.B_COST.shape

(torch.Size([7484]), torch.Size([7484]))

In [None]:
## VALIDATION
df_valid = pd.read_csv('./data/swissmetro_valid.csv')
df_valid = cleaning(df_valid)
# Personal attributes
z_valid = df_valid[['AGE','MALE','INCOME','FIRST','PURPOSE','GA','WHO', 'LUGGAGE']].copy()
z_valid = torch.tensor(z_valid.values, dtype=torch.float)
# Alternative attributesnll
x_valid = df_valid[cols]
# Observed choice (Train:0, SM: 1, Car: 2)
y_valid = df_valid['CHOICE']
y_valid = torch.tensor(y_valid.values,dtype=torch.long)
y_valid = y_valid - 1
# Availability
av_valid = df_valid[['TRAIN_AV','SM_AV', 'CAR_AV']]

z_valid.shape, x_valid.shape, y_valid.shape, av_valid.shape

(torch.Size([1604, 8]), (1604, 6), torch.Size([1604]), (1604, 3))

In [None]:
loss_func(model(z_valid,x_valid,av_valid), y_valid), 

(tensor(0.8466, grad_fn=<NllLossBackward>),)

In [None]:
accuracy(model(z_valid,x_valid,av_valid),y_valid)

tensor(0.7251)