In [None]:
import torch
import numpy as np
import pandas as pd
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader

In [None]:
# Read data
df_train = pd.read_csv("./data/swissmetro_train.csv")
df_valid = pd.read_csv("./data/swissmetro_valid.csv")
df_train.shape, df_valid.shape

((7484, 28), (1604, 28))

In [None]:
class ChoiceDataset:
    def __init__(self, df, atts_cols, avai_cols):
        """
        :param data: the choice data frame
        :param atts_cols: attribute columns of alternatives (like travel time, cost)
        :param avail_cols: availability columns of alternatives
        """
        self.df = df
        self.atts_cols = atts_cols
        self.avai_cols = avai_cols
        # Store numpy array of alternative attributes and availability
        self.atts = self.df[atts_cols].values
        self.avai = self.df[avai_cols].values 
        # Store column name and its corresponding column index of attribute variables and availability
        self.atts_idxs = {att:idx for att, idx in zip(atts_cols, np.arange(len(atts_cols)))}
        self.avai_idxs = {av:idx  for av, idx  in zip(avai_cols, np.arange(len(avai_cols)))}
    
    def __len__(self):
        return df.shape[0]
    
    def __getitem__(self, index):
        x = self.atts[index]
        av = self.avai[index]
        return {"x" : torch.tensor(x, dtype=torch.float32),
                "av" : torch.tensor(av, dtype=torch.float32)}

class MNL(nn.Module):
    def __init__(self, atts_idxs, avai_idxs):
        super(MNL, self).__init__()
        self.atts_idxs = atts_idxs
        self.avai_idxs = avai_idxs
        # Initiate parameters
        self.ASC_TRAIN = nn.Parameter(torch.full((), 0.1))
        self.ASC_CAR = nn.Parameter(torch.full((), 0.1))
        self.B_TIME = nn.Parameter(torch.full((), 0.1))
        self.B_COST = nn.Parameter(torch.full((), 0.1))
        
    def forward(self, x, av):
        # Calculate V
        V1 = (self.ASC_TRAIN + 
              self.B_TIME * x[:, self.atts_idxs["TRAIN_TT"]] + 
              self.B_COST * x[:, self.atts_idxs["TRAIN_CO"]])
        V2 = (self.B_TIME * x[:, self.atts_idxs["SM_TT"]] +
              self.B_COST * x[:, self.atts_idxs["SM_CO"]])
        V3 = (self.ASC_CAR + 
              self.B_TIME * x[:, self.atts_idxs["CAR_TT"]] +
              self.B_COST * x[:, self.atts_idxs["CAR_CO"]])
        # Join with availability
        V1 = V1 * av[:, avai_idxs["TRAIN_AV"]]
        V2 = V2 * av[:, avai_idxs["SM_AV"]]
        V3 = V3 * av[:, avai_idxs["CAR_AV"]]
        # Concat into one matrix
        V = torch.cat((V1.unsqueeze(-1), V2.unsqueeze(-1), V3.unsqueeze(-1)),1)
        # Get probality and loglikelihood
        probs = V.exp() / (V.exp().sum(-1, keepdim=True))
        return probs
    
    def string(self):
        return f'ASC_TRAIN={self.ASC_TRAIN}, ASC_CAR={self.ASC_CAR}, B_TIME={self.B_TIME}, B_COST={self.B_COST}'

In [None]:
atts_cols = ['TRAIN_TT', 'TRAIN_CO','SM_TT', 'SM_CO', 'CAR_TT', 'CAR_CO']
avai_cols = ['TRAIN_AV', 'CAR_AV', 'SM_AV']

ds_train = ChoiceDataset(df_train, atts_cols, avai_cols)
ds_valid = ChoiceDataset(df_valid, atts_cols, avai_cols)

atts_idxs = {att:idx for att, idx in zip(atts_cols, np.arange(len(atts_cols)))}
avai_idxs = {av:idx  for av, idx  in zip(avai_cols, np.arange(len(avai_cols)))}

In [None]:
batch = 16

In [None]:
model = MNL(atts_idxs, avai_idxs)
model.forward(ds_train[0:batch]["x"], ds_train[0:batch]["av"])

tensor([[7.4603e-04, 2.2645e-07, 9.9925e-01],
        [7.4984e-01, 5.5982e-04, 2.4960e-01],
        [9.9593e-01, 4.0701e-03, 5.5944e-10],
        [3.9737e-01, 9.8244e-03, 5.9281e-01],
        [2.5306e-11, 5.1756e-17, 1.0000e+00],
        [9.9997e-01, 4.5516e-06, 2.2544e-05],
        [5.9152e-01, 1.1973e-02, 3.9651e-01],
        [9.9424e-01, 5.4848e-03, 2.7307e-04],
        [9.9044e-01, 5.4780e-04, 9.0084e-03],
        [7.3033e-01, 2.6867e-01, 9.9352e-04],
        [9.8523e-01, 1.4774e-02, 6.7074e-07],
        [6.6922e-03, 9.0804e-05, 9.9322e-01],
        [9.8560e-01, 5.4371e-03, 8.9643e-03],
        [       nan,        nan, 0.0000e+00],
        [       nan,        nan, 0.0000e+00],
        [4.2539e-01, 3.8791e-04, 5.7422e-01]], grad_fn=<DivBackward0>)

In [None]:
dl_train = DataLoader(ds_train, batch_size=16, shuffle=True, num_workers=4)
dl_valid = DataLoader(ds_valid, batch_size=16, shuffle=True, num_workers=4)

In [None]:
ds_train.atts_idxs

{'TRAIN_TT': 0,
 'TRAIN_CO': 1,
 'SM_TT': 2,
 'SM_CO': 3,
 'CAR_TT': 4,
 'CAR_CO': 5}

In [None]:
{att:idx for att, idx in zip(atts_cols, np.arange(len(atts_cols)))}

{'TRAIN_TT': 0,
 'TRAIN_CO': 1,
 'SM_TT': 2,
 'SM_CO': 3,
 'CAR_TT': 4,
 'CAR_CO': 5}

In [None]:
df_train[atts_cols].columns

Index(['TRAIN_TT', 'TRAIN_CO', 'SM_TT', 'SM_CO', 'CAR_TT', 'CAR_CO'], dtype='object')

In [None]:
train_arr = df_train[atts_cols].values
train_arr.shape

(7484, 6)

In [None]:
train_arr[0]

array([182, 111,  81, 132, 210, 155])

In [None]:
train_ts = torch.tensor(train_arr, dtype=torch.float32)

In [None]:
train_arr[:, atts_idxs["TRAIN_TT"]] == df_train["TRAIN_TT"].values

array([ True,  True,  True, ...,  True,  True,  True])

In [None]:
train_arr[:, atts_idxs["TRAIN_TT"]]

array([182, 132, 220, ..., 283, 148, 170])

In [None]:
train_ts[:, atts_idxs["TRAIN_TT"]]

tensor([182., 132., 220.,  ..., 283., 148., 170.])

In [None]:
for col in atts_cols:
    print(col, np.array_equal(train_arr[:, atts_idxs[col]],df_train[col].values))

TRAIN_TT True
TRAIN_CO True
SM_TT True
SM_CO True
CAR_TT True
CAR_CO True


In [None]:
def nll(probs, target): return -probs[range(target.shape[0]), target].log().sum()       

def train(data_loader, model, optimizer, device):
    model.train()_10_tastenet_mnl.ipynb
    for data in data_loader:
        x = data['x']
        av = data['av']
#         x = x.to(device, dtype=torch.float32)
#         av = av.to(device, dtype=torch.float32)
        targets = data['y']
        
        optimizer.zero_grad()
        outputs = model(x, av)        
        loss = nll(outputs, targets)
        loss.backward()
        optimizer.step()
        
        
def evaluate(data_loader, model, device):
    model.eval()
    final_targets = []
    final_outputs = []
    with torch.no_grad():        
        for data in data_loader:
            x = data['x']
            av = data['av']
#             x = x.to(device, dtype=torch.float32)
#             av = av.to(device, dtype=torch.float32)
            targets = data['y'].detach().cpu().numpy().tolist()
            outputs = model(x, av)        
            final_targets.extend(targets)
            final_outputs.extend(outputs)