In [None]:
import os

# Mount Google Drive
from google.colab import drive
drive.mount('/content/drive')

# Create a folder in the root directory
!mkdir -p "/content/drive/My Drive/HOTEL_log_Sep26"



Mounted at /content/drive


In [None]:
root_dir = "/content/drive/My Drive/HOTEL_log_Sep26/"

In [None]:

import torch.nn as nn
import torch
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
from torch.utils.data.sampler import SubsetRandomSampler

import numpy as np
import time
import math
from scipy import stats
import copy

import matplotlib.pyplot as plt
from IPython.display import HTML
# import matplotlib.tri as tri


In [None]:
# upload berbeglia's generated data
!git clone https://github.com/Layneww/choice-models-sushi-hotel.git

Cloning into 'choice-models-sushi-hotel'...
remote: Enumerating objects: 346, done.[K
remote: Counting objects: 100% (143/143), done.[K
remote: Compressing objects: 100% (134/134), done.[K
remote: Total 346 (delta 100), reused 21 (delta 7), pack-reused 203 (from 1)[K
Receiving objects: 100% (346/346), 97.79 MiB | 30.28 MiB/s, done.
Resolving deltas: 100% (176/176), done.


In [None]:
cd choice-models-sushi-hotel/

/content/choice-models-sushi-hotel


In [None]:
!python setup.py install

running install
!!

        ********************************************************************************
        Please avoid running ``setup.py`` directly.
        Instead, use pypa/build, pypa/installer or other
        standards-based tools.

        See https://blog.ganssle.io/articles/2021/10/setup-py-deprecated.html for details.
        ********************************************************************************

!!
  self.initialize_options()
!!

        ********************************************************************************
        Please avoid running ``setup.py`` and ``easy_install``.
        Instead, use pypa/build, pypa/installer or other
        standards-based tools.

        See https://github.com/pypa/setuptools/issues/917 for details.
        ********************************************************************************

!!
  self.initialize_options()
running bdist_egg
running egg_info
creating python_choice_models.egg-info
writing python_choice_mode

In [None]:
!unzip -o data_sets/hotel/hotel.zip -d data_sets/hotel/

Archive:  data_sets/hotel/hotel.zip
   creating: data_sets/hotel/hotel/
  inflating: data_sets/hotel/hotel/instance_1.json  
  inflating: data_sets/hotel/hotel/instance_2.json  
  inflating: data_sets/hotel/hotel/instance_3.json  
  inflating: data_sets/hotel/hotel/instance_4.json  
  inflating: data_sets/hotel/hotel/instance_5.json  


In [None]:
import json

def read_hotel_instance(file_name):
    with open(file_name, 'r') as f:
        data = json.loads(f.read())
    #ground_truth = Model.from_data(data['ground_model'])
    product_labels = data['product_labels']
    transactions = Transaction.from_json(data['transactions']['in_sample_transactions'])
    out_of_sample_transactions = Transaction.from_json(data['transactions']['out_of_sample_transactions'])
    return product_labels, transactions, out_of_sample_transactions


def read_sushi_instance(file_name):
  with open(file_name, 'r') as f:
        data = json.loads(f.read())
  ground_truth = Model.from_data(data['ground_model'])
  in_sample_transactions = Transaction.from_json(data['transactions']['in_sample_transactions'])
  out_of_sample_transactions = Transaction.from_json(data['transactions']['out_of_sample_transactions'])
  return ground_truth, in_sample_transactions, out_of_sample_transactions


def read_synthetic_instance(file_name):
    with open(file_name, 'r') as f:
        data = json.loads(f.read())
    ground_truth = Model.from_data(data['ground_model'])
    transactions = Transaction.from_json(data['transactions']['in_sample_transactions'])
    return ground_truth, transactions

In [None]:
# utils
def choice2target(choice,offer_set,padding=True, padding_size=6):
  target = np.equal(offer_set, choice).astype(int)
  if not padding:
    return target
  else:
    padded_target = np.zeros(padding_size)
    padded_target[:len(offer_set)] = target
    return padded_target

def offer_set_to_one_hot_features_with_padding(offered_products, total_products,
                                               padding=True, padding_size=6):
  # padding size refers to the input size (0) of each transaction

  offer_set_size = len(offered_products)
  if not padding:
    features = np.zeros((offer_set_size, len(total_products)))
  else:
    features = np.zeros((padding_size, len(total_products)))
    # set padding features to be (-10e6, 0, ..., 0)
    features[offer_set_size:padding_size, 0] = -1e6
  features[np.arange(len(offered_products)), offered_products] = 1

  # set feature for product 0 to be zero
  if (0 in offered_products):
    features[0][0]=0

  return features

# apply softmax to a vector
def softmax(u):
  return np.exp(u)/np.sum(np.exp(u))

def prob2target(x):
  # probabilistic choice
  choice = np.random.choice(np.arange(len(x)), size=1, p=x)
  target = np.zeros(len(x))
  target[choice]=1
  return target


In [None]:
ls data_sets/hotel

[0m[01;34mhotel[0m/  hotel.zip


## Data Generator for General Json data (Berbeglia)
Preprocess Json data
use one-hot feature
only run this part for NN implementation

In [None]:
def transaction2feature(products, transaction, padding=True, padding_size=6):
  offered_set = transaction.offered_products
  #print(offered_set)
  features = offer_set_to_one_hot_features_with_padding(offered_set, products,
                                                          padding=padding, padding_size=padding_size)
  #features = torch.from_numpy(np.array([features])).float()
  features = np.array([features])
  return features

class JSONChoiceHotelDataset(Dataset):
  def __init__(self, input_file, padding=True, padding_size=6):
    product_labels, transactions,out_of_sample_transactions = read_hotel_instance(input_file)


    self.products = [0]+ sorted([int(i) for i in list(product_labels.keys())])
    self.data = transactions
    self.test_data = out_of_sample_transactions
    self.padding = padding
    self.padding_size = padding_size
    self.feature_dim = len(self.products)
    self.nchoices = len(transactions[0].offered_products)
    if padding:
      self.nchoices = padding_size


  def __getitem__(self, index):
    transaction = self.data[index]
    offered_set = transaction.offered_products

    features = transaction2feature(self.products, transaction, padding=self.padding, padding_size = self.padding_size)
    features = torch.from_numpy(features).float()[0] # make features to be tensor
    choices = torch.from_numpy(choice2target(transaction.product,offered_set,
                                                          padding=self.padding, padding_size=self.padding_size))

    return features, choices

  def __len__(self):
    return len(self.data)


## Model
This part outputs the model using the genereated data above as training data


### NN

In [None]:
# NN Model
# this model has not adapted to
# - different offer set size at time t
# - different feature lengths

class MixedLogitChoice(nn.Module):
    def __init__(self, d, nchoices, K, **kwargs):
        super(MixedLogitChoice, self).__init__()

        self.d = d # number of features
        self.n = nchoices # number of choices
        self.K = K # number of mixtures

        # initialize parameters
        self.lin = nn.Linear(in_features=self.d, out_features=K, bias=False)
        #self.lin.weight.data.uniform_(0.0, 2.0)

        self.act = nn.LogSoftmax(dim=1)
        #self.p = nn.Parameter(torch.ones(K)/K)


    def forward(self,x,**kwargs):
        #x = x.reshape((x.size()[0],self.n ,self.d))
        #p = F.softmax(self.p,dim=0)

        output = self.lin(x)

        # apply softmax
        output = self.act(output)
        #print(output.shape)
        output = torch.sum(torch.exp(output), dim=2)/self.K
        #print(output.shape)

        return output

In [None]:
def torch_log_ignore_zero(x):
  # x is a tensor
  #output = torch.zeros(size = x.size())
  idx = x>0
  x[idx] = torch.log(x[idx])
  return x

def NLL_loss(target, out):
  return torch.mul(torch.mul(target, torch_log_ignore_zero(out)).sum(),-1)

#### Train one instance
After running this part to make sure dataloader and neural network function normally, go to **Performance matrix** for evaluations.


In [None]:
# Creating data indices for training and validation splits:
def train_test_split(dataset, batch_size=50, validation_split=.2, shuffle_dataset=True, random_seed=42):

  dataset_size = len(dataset)
  indices = list(range(dataset_size))
  split = int(np.floor(validation_split * dataset_size))
  if shuffle_dataset :
      np.random.seed(random_seed)
      np.random.shuffle(indices)
  train_indices, val_indices = indices[split:], indices[:split]

  # Creating data samplers and loaders:
  train_sampler = SubsetRandomSampler(train_indices)
  valid_sampler = SubsetRandomSampler(val_indices)

  train_loader = torch.utils.data.DataLoader(dataset, batch_size=batch_size,
                                            sampler=train_sampler)
  validation_loader = torch.utils.data.DataLoader(dataset, batch_size=batch_size,
                                                  sampler=valid_sampler)
  return train_loader, validation_loader


In [None]:
class NNChoiceModel(object):

  def __init__(self, d, nchoices,nmixture,dataset,
               device=torch.device('cuda:0' if torch.cuda.is_available() else "cpu")):

    #initialize NN model
    self.model = MixedLogitChoice(d, nchoices,nmixtures).to(device)
    self.d = d
    self.nchoices = nchoices
    self.nmixtures = nmixtures

    # best model path
    self.saved_model_path_by_val = None
    self.saved_model_path_by_train = None

    # record training losses
    self.train_losses = []
    self.train_accurs = []
    self.test_losses = []
    self.test_accurs = []
    self.train_time = 0

    # record dataset
    self.dataset = dataset

    # record server
    self.device = device
    print('device', device)



  def train(self, model_name, batch_size, train_loader, validation_loader,
            num_epochs=1000, lr=1e-3, weight_decay = 0,
            loss_type = "NLL",padding=False, padding_size=6,
            upper_bound=np.log(1e6), lower_bound=-np.log(1e6),
            eval=False, opt='adam', noise=False,
            verbose=True,
            learning_rate_decay_step = 100000, gamma=1, stopping_train_loss = 1e-10, use_val=False):
      # training setup
    gmodel = self.model
    device = self.device
    if opt=='adam' or 'noisy-adam':
      optimizer = torch.optim.Adam(gmodel.parameters(),lr=lr, weight_decay=weight_decay)
    elif opt=='sgd':
      optimizer = torch.optim.SGD(gmodel.parameters(),lr=lr, weight_decay=weight_decay)
    lr_scheduler = torch.optim.lr_scheduler.ExponentialLR(optimizer=optimizer, gamma=gamma,last_epoch=-1)
    dataset_size = len(self.dataset.data)

    stop_train = False

    best_model_param_by_train = None
    best_model_param_by_val = None

    # Train
    best_vloss = 100_00_000
    best_tloss = 100_00_000
    # best_epoch = None
    default_start = time.time()
    for epoch in range(num_epochs):
      # Train:
      gmodel.train()
      running_tloss = 0
      # running_taccur = 0
      for batch_index, (features, choices) in enumerate(train_loader):

        start_time = time.time()

        input = features
        out = gmodel(input)
        target = choices

        if loss_type == 'NLL':
          tloss = NLL_loss(target, out)/ choices.size()[0]
        elif loss_type == 'SQ':
          tloss = torch.square(out - target).sum()/ choices.size()[0]

        if epoch==0 and batch_index ==0 and len(self.train_losses)==0:
          print('initial loss', round(tloss.item(),2))

        optimizer.zero_grad()
        tloss.backward()
        optimizer.step()
        end_step_time = time.time()
        #print('step time', end_step_time-start_time)
        if noise:
          lr_ = lr_scheduler.get_last_lr()[0]
          beta = 2*lr_ / ((1*lr_)**2)

          for p in gmodel.parameters():
              unit_noise = torch.autograd.Variable(p.data.new(p.size()).normal_())
              p.data.add_(unit_noise, alpha=(2*lr_/beta)**0.5)

        with torch.no_grad():
          for param in gmodel.parameters():
              param.clamp_(lower_bound, upper_bound)

              if padding:
                param.data[:,0]=1
        #print('padding time', time.time()-end_step_time)
        if not (epoch*dataset_size + batch_index*batch_size) % learning_rate_decay_step:
          lr_scheduler.step()
          #print(lr_scheduler.get_last_lr())

        # record training time
        self.train_time +=  time.time() - start_time

        # probs = out.detach().numpy()
        # predict = np.apply_along_axis(prob2target, 1, probs)
        # train_accuracy = np.average((target.argmax(dim=1).detach().numpy() == predict.argmax(axis=1))*1)


        running_tloss += tloss
        # running_taccur += train_accuracy
        #train_arg_accur = np.average((gt_probs.argmax(axis=1)== probs.argmax(axis=1))*1)



      avg_tloss = running_tloss / (batch_index+1)
      # avg_taccur = running_taccur / (batch_index+1)


      # Test: evaluate after each epoch
      avg_vloss = torch.Tensor(1)*0
      if eval:
        gmodel.eval()
        with torch.no_grad():
          running_vloss = torch.Tensor(1)*0
          # running_vaccur = torch.Tensor(1)*0
          # test_accuracy = torch.Tensor(1)*0

          for batch_index, (features, choices) in enumerate(validation_loader):
            # set to train mode
            input = features.to(device)
            out = gmodel(input)
            target = choices.to(device)

            if loss_type == 'NLL':
              vloss = NLL_loss(target, out)/ choices.size()[0]
            elif loss_type == 'SQ':
              vloss = torch.square(out - target).sum()/choices.size()[0]
            # probs = out.detach().numpy()

            # predict = np.apply_along_axis(prob2target, 1, probs)
            # test_accuracy = np.average((target.argmax(dim=1).detach().numpy() == predict.argmax(axis=1))*1)
            # test_arg_accur = np.average((gt_probs.argmax(axis=1)== probs.argmax(axis=1))*1)

            running_vloss += vloss
            # running_vaccur += test_accuracy
          avg_vloss = running_vloss/(batch_index+1)
        # avg_vaccur = running_vaccur/(batch_index+1)

        self.train_losses.append(avg_tloss.item())
        # self.train_accurs.append(train_accuracy.item())
        self.test_losses.append(avg_vloss.item())
        # self.test_accurs.append(test_accuracy.item())

      if (not (epoch+1) % 100) and verbose:
        print('time', round(self.train_time,1),
              'time extra', round(time.time()-default_start, 1),
              'epoch', epoch+1,
              'train loss', round(avg_tloss.item(),5),
              # 'train accur', round(avg_taccur.item(),5),
              'test loss', round(avg_vloss.item(),5),
              # 'test accur', round(avg_vaccur.item(),5),
                      )
        # if avg_vloss <= best_vloss and use_val:
        #   best_vloss = avg_vloss
        #   model_path = 'val_model_{}'.format(model_name)
        #   #torch.save(gmodel.state_dict(), model_path)
        #   best_model_param_by_val = gmodel.state_dict()
        #   self.saved_model_path_by_val=model_path
        #stop_train = (abs(best_tloss-avg_tloss)<stopping_train_loss*len(self.dataset))
        # if (avg_tloss <= best_tloss) and abs(best_tloss-avg_tloss)>=stopping_train_loss*len(self.dataset):
        #   best_tloss = avg_tloss
        #   model_path = 'train_model_{}'.format(model_name)
        #   #torch.save(gmodel.state_dict(), model_path)
        #   best_model_param_by_train = gmodel.state_dict()
        #   self.saved_model_path_by_train=model_path

        # elif epoch>2:
        #   stop_train=True
      # if stop_train:
      #   break
    # if self.saved_model_path_by_val is not None:
    #   torch.save(best_model_param_by_val, self.saved_model_path_by_val)
    # if self.saved_model_path_by_train is not None:
    #   torch.save(best_model_param_by_train, self.saved_model_path_by_train)
    # print('total train time:', round(self.train_time,1))
    # print('best train loss', best_tloss.item())

#   def getBestModelByVal(self):
#     saved_model = MixedLogitChoice(d=self.d, nchoices=self.nchoices,K=self.nmixtures)
#     saved_model.load_state_dict(torch.load(self.saved_model_path_by_val))
#     return saved_model

  # def getBestModelByTrain(self):
  #   saved_model = MixedLogitChoice(d=self.d, nchoices=self.nchoices,K=self.nmixtures).to(self.device)
  #   saved_model.load_state_dict(torch.load(self.saved_model_path_by_train))

# # model = saved_model
# # pred_weights = model.lin.weight.detach().numpy()
#     return saved_model

  # def loadBestModel(self):
  #   self.model = self.getBestModel()

  def getPredictedWeights(self):
    return self.model.lin.weight.detach().numpy()

  # def cdf_of(self,x):
  #   xk = self.getPredictedWeights()
  #   pk = np.ones(self.nmixtures)/self.nmixtures
  #   return cdf_discrete_rv_vec(x, xk, pk)

  def plotLoss(self, MA=100, se=1):
    epochs = range(se,len(self.train_losses)+1-MA+1)
    plt.plot(epochs, moving_average(self.train_losses,n=MA)[se-1:], 'grey', label='Training loss')
    plt.plot(epochs, moving_average(self.test_losses,n=MA)[se-1:], 'b', label='validation loss')
    plt.plot(epochs, moving_average(self.train_accurs,n=MA)[se-1:], 'grey',label='Training accur', linestyle='--')
    plt.plot(epochs, moving_average(self.test_accurs,n=MA)[se-1:], 'b',label='validation accur', linestyle='--')
    plt.title('Training and Validation loss/accur of {} with assume K={}'.format(self.gtype, self.nmixtures))
    plt.xlabel('Epochs')
    plt.ylabel('Loss/Accur')
    #plt.ylim(bottom=0,top=2)
    plt.legend()
    plt.show()

def moving_average(a, n=10):
    b = np.array(a)
    ret = np.cumsum(b, dtype=float)
    ret[n:] = ret[n:] - ret[:-n]
    return ret[n - 1:] / n

## Performance Matrix

### RMSE - for Json dataset
$$\operatorname{RMSE}_{soft} = \sqrt{ \frac{1}{T} \sum_{t=1}^T \sum_{j\in S_t} \left(\Pr(j\mid S_t,\beta_0) - \Pr(j\mid S_t,\hat\beta)\right)^2}$$

$$\operatorname{RMSE}_{hard} = \sqrt{ \frac{1}{T} \sum_{t=1}^T \sum_{j\in S_t} \left(\mathbf{1}(N_{jt}==1) - \Pr(j\mid S_t,\hat\beta)\right)^2}$$

For synthetic, as we know the ground truth, use the soft RMSE.

To be consistent with the `estimate.py` in the package `python-choice-models`, we show the following version of soft RMSE
$$\operatorname{RMSE}_{soft} = \sqrt{ \frac{1}{T} \sum_{t=1}^T  \left(Pr(j_t\mid S_t, \beta_0) - \Pr(j\mid S_t,\hat\beta)\right)^2}$$
where $j_t$ is the choice made at time $t$ with offered set $S_t$.



### Preliminary test

In [None]:

def prob_of_NN(model, prod, t,products,padding=True, padding_size=10,
               device=torch.device('cuda:0' if torch.cuda.is_available() else "cpu")):
  features = transaction2feature(products, t, padding=True, padding_size=padding_size)
  #print(features)
  features = torch.from_numpy(features).float().to(device)
  model.eval()
  with torch.no_grad():
    out = model(features)
    probs = out.cpu().detach().numpy()
  #print(probs.shape)
  #print(probs)
  index = t.offered_products.index(prod)
  return probs[0][index]

from python_choice_models.transactions.base import Transaction
def soft_rmse_known_model(model, dataset, ground_truth_model,padding=True, padding_size=10):
  rmse = 0.0
  amount_terms = 0.0
  products = ground_truth_model.products
  for t in dataset.data:
    for prod in t.offered_products:
      #print(prob_of_NN(t,products,padding=True, padding_size=6), ground_truth_model.probability_of(t))
      prob1 = prob_of_NN(model, prod, t,products,padding=padding, padding_size=padding_size)
      prob2 = ground_truth_model.probability_of(Transaction(prod, t.offered_products))
      #print(prob1, prob2)
      rmse += (( prob1 - prob2) ** 2)
      amount_terms += 1
  return np.sqrt(rmse / float(amount_terms))


def soft_rmse(model, dataset, ground_truth_model,padding=True, padding_size=6):
  rmse = 0.0
  amount_terms = 0.0
  products = ground_truth_model.products
  for t in dataset.data:
      #print(prob_of_NN(t,products,padding=True, padding_size=6), ground_truth_model.probability_of(t))
    prob1 = prob_of_NN(model, t.product, t,products,padding=padding, padding_size=padding_size)
    prob2 = ground_truth_model.probability_of(t)
    rmse += (( prob1 - prob2) ** 2)
    amount_terms += 1
  return np.sqrt(rmse / float(amount_terms))


def hard_rmse(model, out_of_sample_transactions, products, padding=True, padding_size=6):
  rmse = 0.0
  amount_terms = 0.0
  for t in out_of_sample_transactions:
    for prod in t.offered_products:
      prob1 = prob_of_NN(model, prod, t,products,padding=padding, padding_size=padding_size)
      prob2 = int(prod==t.product)
      rmse += (( prob1 - prob2) ** 2)
      amount_terms += 1
  return np.sqrt(rmse / float(amount_terms))

def hard_nll(model, out_of_sample_transactions, products, padding=True, padding_size=6):
  nll = 0.0
  amount_terms = 0.0
  for t in out_of_sample_transactions:
    for prod in t.offered_products:
      prob1 = prob_of_NN(model, prod, t,products,padding=padding, padding_size=padding_size)
      prob2 = int(prod==t.product)
      if prob2 == 1:
        nll += np.log(prob1)
        amount_terms += 1
  return nll / float(amount_terms)



In [None]:

class TensorDataset(Dataset):
    def __init__(self, feature_tensor, target_tensor):
        self.feature_tensor = feature_tensor
        self.target_tensor = target_tensor

    def __getitem__(self, index):
        return self.feature_tensor[index], self.target_tensor[index]

    def __len__(self):
        return self.feature_tensor.size(0)



In [None]:
ds = [11, 11, 9, 5, 7]
for instance_index in range(1,6):
  torch.manual_seed(0)
  print('---------- Hotel {} ------------'.format(instance_index))
  json_file_name='data_sets/hotel/hotel/instance_{}.json'.format(instance_index)
  max_padding_size = ds[instance_index-1]
  dataset = JSONChoiceHotelDataset(json_file_name, padding_size=max_padding_size)
  dataloader = torch.utils.data.DataLoader(dataset, batch_size=len(dataset))
  features, labels = next(iter(dataloader))
  device=torch.device('cuda:0' if torch.cuda.is_available() else "cpu")
  features = features.to(device)
  labels = labels.to(device)
  tensor_dataset = TensorDataset(features, labels)
  T = len(dataset)
  validation_size = 0.0
  batch_size = 8
  print('batch size:', batch_size)
  train_loader, test_loader = train_test_split(tensor_dataset, batch_size=batch_size, validation_split=validation_size, shuffle_dataset=True, random_seed=42)

  d=dataset.feature_dim
  nmixtures = 10000
  print('number of hidden nodes:', nmixtures)
  nn_model = NNChoiceModel(d, dataset.nchoices, nmixtures, dataset)
  for i in range(20):
    nn_model.train(model_name='hotel_{}_instance'.format(instance_index),
                   loss_type='SQ',
                  batch_size = batch_size, lr=5e-3, opt='adam', #noise=True, weight_decay=1e-9,
                  train_loader=train_loader, upper_bound=8, lower_bound=-8,
                  learning_rate_decay_step=500000,
                  validation_loader = test_loader,
                  num_epochs=200, gamma=1, padding=True, padding_size=max_padding_size,use_val=False)

    saved_model = nn_model.model

    #print('soft rmse', soft_rmse(saved_model, test_dataset, dataset.ground_truth, padding_size=max_padding_size))  # sensitive to suitable batch size & learning rate
    rmse = hard_rmse(saved_model, dataset.test_data, dataset.products, padding=True, padding_size=max_padding_size)
    in_rmse = hard_rmse(saved_model, dataset.data, dataset.products, padding=True, padding_size = max_padding_size)
    saved_model.eval()
    with torch.no_grad():
      out = saved_model(features)
      in_loss = NLL_loss(labels, out).item() / T
    loss = hard_nll(saved_model, dataset.test_data, dataset.products, padding=True, padding_size = max_padding_size)


    print('out-of-sample rmse', rmse, 'in-sample rmse', in_rmse, 'out-of-sample nll', loss, 'in-sample rmse', in_loss)

---------- Hotel 1 ------------
batch size: 8
number of hidden nodes: 10000
device cuda:0
initial loss 0.91
time 84.0 time extra 102.2 epoch 100 train loss 0.34952 test loss 0.0
time 166.2 time extra 203.0 epoch 200 train loss 0.34892 test loss 0.0
out-of-sample rmse 0.22182799816534898 in-sample rmse 0.21529225101034863 out-of-sample nll -0.7903745007514954 in-sample rmse 0.8018766097963871
initial loss 0.44
time 248.0 time extra 100.4 epoch 100 train loss 0.34801 test loss 0.0
time 329.2 time extra 200.0 epoch 200 train loss 0.34883 test loss 0.0
out-of-sample rmse 0.22180609553633893 in-sample rmse 0.21526564693327485 out-of-sample nll -0.7904871785865639 in-sample rmse 0.8013043631790716
initial loss 0.59
time 411.1 time extra 100.3 epoch 100 train loss 0.34887 test loss 0.0
time 492.9 time extra 200.6 epoch 200 train loss 0.34878 test loss 0.0
out-of-sample rmse 0.2217987058893189 in-sample rmse 0.2152607154531956 out-of-sample nll -0.7903729420338037 in-sample rmse 0.800977678977

In [None]:
import os
with open(os.path.join(root_dir, 'nn-adam_1.txt'), 'w') as f:
  f.write('908.7\t0.22178966722613075\t0.21525660365014973\t0.7898742852570876\t0.8009170335610106')


In [None]:
with open(os.path.join(root_dir, 'nn-adam_2.txt'), 'w') as f:
  f.write('613.7\t0.20875867282307645\t0.2076561415263884\t0.7569169343158763\t0.7523185712858267')


In [None]:
with open(os.path.join(root_dir, 'nn-adam_3.txt'), 'w') as f:
  f.write('1707.8\t0.20732514439508717\t0.20637711925974497\t0.7145531167664866\t0.7127658555963806')


In [None]:
with open(os.path.join(root_dir, 'nn-adam_5.txt'), 'w') as f:
  f.write('336.8\t0.23868402388601653\t0.2375867348327292\t0.7879139946956261\t0.7479081176519394')

In [None]:
import os
with open(os.path.join(root_dir, 'nn-noisy-adam_1.txt'), 'w') as f:
  f.write('1585.7\t0.22178382004681163\t0.21526709048827364\t0.7890873348487998\t0.8007987188481653')
with open(os.path.join(root_dir, 'nn-noisy-adam_2.txt'), 'w') as f:
  f.write('688.7\t0.2087715198102344\t0.2076730836105634\t0.7581730581098988\t0.753329997495584')
with open(os.path.join(root_dir, 'nn-noisy-adam_3.txt'), 'w') as f:
  f.write('1886.0\t0.20733967102055684\t0.20639045504789366\t0.7158754984224875\t0.7137613038457122')
with open(os.path.join(root_dir, 'nn-noisy-adam_4.txt'), 'w') as f:
  f.write('370.2\t0.2820037780788805\t0.27643864661214\t0.7313284973664718\t0.698735443462025')
with open(os.path.join(root_dir, 'nn-noisy-adam_5.txt'), 'w') as f:
  f.write('373.2\t0.23867869728584454\t0.2375867845695361\t0.7867977072210873\t0.7478160259127616')

In [None]:
ds = [11, 11, 9, 5, 7]
for instance_index in range(4,6):
  torch.manual_seed(0)
  print('---------- Hotel {} ------------'.format(instance_index))
  json_file_name='data_sets/hotel/hotel/instance_{}.json'.format(instance_index)
  max_padding_size = ds[instance_index-1]
  dataset = JSONChoiceHotelDataset(json_file_name, padding_size=max_padding_size)
  dataloader = torch.utils.data.DataLoader(dataset, batch_size=len(dataset))
  features, labels = next(iter(dataloader))
  device=torch.device('cuda:0' if torch.cuda.is_available() else "cpu")
  features = features.to(device)
  labels = labels.to(device)
  tensor_dataset = TensorDataset(features, labels)
  T = len(dataset)
  validation_size = 0.0
  batch_size = 8
  print('batch size:', batch_size)
  train_loader, test_loader = train_test_split(tensor_dataset, batch_size=batch_size, validation_split=validation_size, shuffle_dataset=True, random_seed=42)

  d=dataset.feature_dim
  nmixtures = 10000
  print('number of hidden nodes:', nmixtures)
  nn_model = NNChoiceModel(d, dataset.nchoices, nmixtures, dataset)
  for i in range(20):
    nn_model.train(model_name='hotel_{}_instance'.format(instance_index),
                   loss_type='SQ',
                  batch_size = batch_size, lr=1e-3, opt='adam', #noise=True, weight_decay=1e-9,
                  train_loader=train_loader, upper_bound=8, lower_bound=-8,
                  learning_rate_decay_step=500000,
                  validation_loader = test_loader,
                  num_epochs=200, gamma=1, padding=True, padding_size=max_padding_size,use_val=False)

    saved_model = nn_model.model

    #print('soft rmse', soft_rmse(saved_model, test_dataset, dataset.ground_truth, padding_size=max_padding_size))  # sensitive to suitable batch size & learning rate
    rmse = hard_rmse(saved_model, dataset.test_data, dataset.products, padding=True, padding_size=max_padding_size)
    in_rmse = hard_rmse(saved_model, dataset.data, dataset.products, padding=True, padding_size = max_padding_size)
    saved_model.eval()
    with torch.no_grad():
      out = saved_model(features)
      in_loss = NLL_loss(labels, out).item() / T
    loss = hard_nll(saved_model, dataset.test_data, dataset.products, padding=True, padding_size = max_padding_size)


    print('out-of-sample rmse', rmse, 'in-sample rmse', in_rmse, 'out-of-sample nll', loss, 'in-sample rmse', in_loss)

---------- Hotel 4 ------------
batch size: 8
number of hidden nodes: 500
device cuda:0
initial loss 0.92
time 19.0 time extra 23.3 epoch 100 train loss 0.34333 test loss 0.0
time 37.1 time extra 45.5 epoch 200 train loss 0.34333 test loss -0.0
out-of-sample rmse 0.28246863215401485 in-sample rmse 0.2771753662461098 out-of-sample nll -0.7311925578117371 in-sample rmse 0.703739235970107
initial loss 0.58
time 55.6 time extra 22.7 epoch 100 train loss 0.34604 test loss -0.0
time 74.1 time extra 45.4 epoch 200 train loss 0.34333 test loss 0.0
out-of-sample rmse 0.2824713847485076 in-sample rmse 0.27716633983908273 out-of-sample nll -0.7317001538926905 in-sample rmse 0.7037131351774389
initial loss 0.25
time 92.5 time extra 22.9 epoch 100 train loss 0.3433 test loss 0.0
time 110.5 time extra 44.9 epoch 200 train loss 0.34326 test loss 0.0
out-of-sample rmse 0.28244978833898826 in-sample rmse 0.27714776520674345 out-of-sample nll -0.7315935923836449 in-sample rmse 0.7035960224270821
initial

In [None]:
with open(os.path.join(root_dir, 'nn-adam_4.txt'), 'w') as f:
  f.write('370.3\t0.28201825924806784\t0.2764290322738004\t0.7319163341955706\t0.698704224283045')

In [None]:
ls

[0m[01;34mbuild[0m/      estimate.py            [01;34mpython_choice_models.egg-info[0m/  [01;32mrun_all.sh[0m*
[01;34mdata_sets[0m/  [01;34mexamples[0m/              README.md                       setup.py
[01;34mdist[0m/       [01;34mpython_choice_models[0m/  requirements.txt


In [None]:
saved_model = nn_model.model

#print('soft rmse', soft_rmse(saved_model, test_dataset, dataset.ground_truth, padding_size=max_padding_size))  # sensitive to suitable batch size & learning rate
rmse = hard_rmse(saved_model, dataset.test_data, dataset.products, padding=True, padding_size=max_padding_size)
in_rmse = hard_rmse(saved_model, dataset.data, dataset.products, padding=True, padding_size = max_padding_size)

print('out-of-sample rmse', rmse, 'in-sample rmse', in_rmse)

out-of-sample rmse 0.22180479764750835 in-sample rmse 0.21527642486415077


In [None]:
rmse = hard_rmse(saved_model, dataset.test_data, dataset.products, padding=True, padding_size=max_padding_size)
in_rmse = hard_rmse(saved_model, dataset.data, dataset.products, padding=True, padding_size = max_padding_size)
print(rmse)

0.22214996105063317


In [None]:
ds = [11, 11, 9, 5, 7]
for instance_index in range(2, 6):
  torch.manual_seed(0)
  print('---------- Hotel {} ------------'.format(instance_index))
  json_file_name='data_sets/hotel/hotel/instance_{}.json'.format(instance_index)
  max_padding_size = ds[instance_index-1]
  dataset = JSONChoiceHotelDataset(json_file_name, padding_size=max_padding_size)
  dataloader = torch.utils.data.DataLoader(dataset, batch_size=len(dataset))
  features, labels = next(iter(dataloader))
  device=torch.device('cuda:0' if torch.cuda.is_available() else "cpu")
  tensor_dataset = TensorDataset(features.to(device), labels.to(device))
  T = len(dataset)
  validation_size = 0.0
  batch_size = 8
  print('batch size:', batch_size)
  train_loader, test_loader = train_test_split(tensor_dataset, batch_size=batch_size, validation_split=validation_size, shuffle_dataset=True, random_seed=42)

  d=dataset.feature_dim
  nmixtures = 500
  print('number of hidden nodes:', nmixtures)
  nn_model = NNChoiceModel(d, dataset.nchoices, nmixtures, dataset)
  nn_model.train(model_name='hotel_{}_instance'.format(instance_index),
                batch_size = batch_size, lr=3, opt='sgd', #noise=True, weight_decay=1e-9,
                train_loader=train_loader, upper_bound=8, lower_bound=-8,
                learning_rate_decay_step=500000,
                validation_loader = test_loader,
                num_epochs=3000, gamma=0.99, padding=True, padding_size=max_padding_size,use_val=False)

  saved_model = nn_model.model

  #print('soft rmse', soft_rmse(saved_model, test_dataset, dataset.ground_truth, padding_size=max_padding_size))  # sensitive to suitable batch size & learning rate
  rmse = hard_rmse(saved_model, dataset.test_data, dataset.products, padding=True, padding_size=max_padding_size)
  print('soft rmse known model', rmse)

---------- Hotel 2 ------------
batch size: 8
number of hidden nodes: 500
device cuda:0
initial loss 2.67
time 39.3 time extra 45.4 epoch 100 train loss 0.79876 test loss 0.0
time 79.0 time extra 91.3 epoch 200 train loss 0.77906 test loss 0.0
time 119.2 time extra 137.6 epoch 300 train loss 0.77736 test loss 0.0
time 158.3 time extra 182.6 epoch 400 train loss 0.77775 test loss 0.0
time 198.1 time extra 228.5 epoch 500 train loss 0.77526 test loss 0.0
time 238.5 time extra 275.0 epoch 600 train loss 0.77276 test loss 0.0
time 277.6 time extra 320.1 epoch 700 train loss 0.77259 test loss 0.0
time 317.2 time extra 365.7 epoch 800 train loss 0.77179 test loss 0.0
time 356.9 time extra 411.5 epoch 900 train loss 0.77178 test loss 0.0
time 396.5 time extra 457.3 epoch 1000 train loss 0.77281 test loss 0.0
time 436.2 time extra 503.1 epoch 1100 train loss 0.76958 test loss 0.0
time 475.6 time extra 548.6 epoch 1200 train loss 0.76937 test loss 0.0
time 515.1 time extra 594.1 epoch 1300 trai

In [None]:
ds = [11, 11, 9, 5, 7]
for instance_index in range(1, 6):
  torch.manual_seed(0)
  print('---------- Hotel {} ------------'.format(instance_index))
  json_file_name='data_sets/hotel/hotel/instance_{}.json'.format(instance_index)
  max_padding_size = ds[instance_index-1]
  dataset = JSONChoiceHotelDataset(json_file_name, padding_size=max_padding_size)
  dataloader = torch.utils.data.DataLoader(dataset, batch_size=len(dataset))
  features, labels = next(iter(dataloader))
  tensor_dataset = TensorDataset(features, labels)
  T = len(dataset)
  validation_size = 0.0
  batch_size = 8
  print('batch size:', batch_size)
  train_loader, test_loader = train_test_split(tensor_dataset, batch_size=batch_size, validation_split=validation_size, shuffle_dataset=True, random_seed=42)

  d=dataset.feature_dim
  nmixtures = 500
  print('number of hidden nodes:', nmixtures)
  nn_model = NNChoiceModel(d, dataset.nchoices, nmixtures, dataset)
  nn_model.train(model_name='hotel_{}_instance'.format(instance_index),
                batch_size = batch_size, lr=3, opt='sgd', noise=True, weight_decay=1e-9,
                train_loader=train_loader, upper_bound=8, lower_bound=-8,
                learning_rate_decay_step=500000,
                validation_loader = test_loader,
                num_epochs=3000, gamma=0.99, padding=True, padding_size=max_padding_size,use_val=False)

  saved_model = nn_model.model

  #print('soft rmse', soft_rmse(saved_model, test_dataset, dataset.ground_truth, padding_size=max_padding_size))  # sensitive to suitable batch size & learning rate
  rmse = hard_rmse(saved_model, dataset.test_data, dataset.products, padding=True, padding_size=max_padding_size)
  print('soft rmse known model', rmse)

---------- Hotel 1 ------------
batch size: 8
number of hidden nodes: 500
device cpu
initial loss 2.39
time 130.0 time extra 148.4 epoch 100 train loss 0.83343 test loss 0.0
time 259.6 time extra 295.9 epoch 200 train loss 0.82214 test loss -0.0
time 388.4 time extra 442.5 epoch 300 train loss 0.81736 test loss 0.0
time 516.2 time extra 588.2 epoch 400 train loss 0.81627 test loss -0.0
time 643.9 time extra 733.8 epoch 500 train loss 0.8142 test loss -0.0
time 771.9 time extra 879.6 epoch 600 train loss 0.81377 test loss -0.0
time 901.4 time extra 1027.4 epoch 700 train loss 0.80942 test loss -0.0
time 1029.7 time extra 1173.6 epoch 800 train loss 0.80884 test loss 0.0
time 1158.2 time extra 1319.9 epoch 900 train loss 0.80802 test loss -0.0
time 1285.9 time extra 1465.5 epoch 1000 train loss 0.80875 test loss -0.0
time 1413.6 time extra 1611.0 epoch 1100 train loss 0.80858 test loss 0.0
time 1541.7 time extra 1757.2 epoch 1200 train loss 0.80708 test loss -0.0
time 1668.9 time extra 1