## Introduction to modelling with RUMnet

In [None]:
import os

# Remove GPU use
os.environ["CUDA_VISIBLE_DEVICES"] = ""

import sys

sys.path.append("../")

import numpy as np
import pandas as pd

from choice_learn.data import ChoiceDataset
from choice_learn.models import RUMnet
from choice_learn.datasets import load_swissmetro

In [None]:
df = load_swissmetro(as_frame=True)

In [None]:
df = df.loc[df.CHOICE!=0]

In [None]:
len(df)

In [None]:
df.head()

In [None]:
choices = df.CHOICE.to_numpy() - 1
contexts_items_availabilities = df[["TRAIN_AV", "SM_AV", "CAR_AV"]].to_numpy()
contexts_items_features = np.stack([df[["TRAIN_TT", "TRAIN_CO", "TRAIN_HE"]].to_numpy(),
                                    df[["SM_TT", "SM_CO", "SM_HE"]].to_numpy(),
                                    df[["CAR_TT", "CAR_CO", "CAR_HE"]].to_numpy()], axis=1)
# contexts_features = df[["GROUP", "PURPOSE", "FIRST", "TICKET", "WHO", "LUGGAGE", "AGE", "MALE",
#                         "INCOME", "GA", "ORIGIN", "DEST"]].to_numpy()
fixed_items_features = np.eye(3)

contexts_items_features[:, :, 0] = contexts_items_features[:, :, 0] / 1000
contexts_items_features[:, :, 1] = contexts_items_features[:, :, 1] / 5000
contexts_items_features[:, :, 2] = contexts_items_features[:, :, 2] / 100

long_data = pd.get_dummies(df, columns=["GROUP", "PURPOSE", "FIRST", "TICKET", "WHO", "LUGGAGE", "AGE", "MALE",
                        "INCOME", "GA", "ORIGIN", "DEST"], drop_first=False)

contexts_features = []
for col in long_data.columns:
    if col.startswith("GROUP"):
        contexts_features.append(col)
    if col.startswith("PURPOSE"):
        contexts_features.append(col)
    if col.startswith("FIRST"):
        contexts_features.append(col)
    if col.startswith("TICKET"):
        contexts_features.append(col)
    if col.startswith("WHO"):
        contexts_features.append(col)
    if col.startswith("LUGGAGE"):
        contexts_features.append(col)
    if col.startswith("AGE"):
        contexts_features.append(col)
    if col.startswith("MALE"):
        contexts_features.append(col)
    if col.startswith("INCOME"):
        contexts_features.append(col)
    if col.startswith("GA"):
        contexts_features.append(col)
    if col.startswith("ORIGIN"):
        contexts_features.append(col)
    if col.startswith("DEST"):
        print(col)
        contexts_features.append(col)

contexts_features = long_data[contexts_features].to_numpy()

In [None]:
contexts_features.shape

In [None]:
dataset = ChoiceDataset(fixed_items_features=(fixed_items_features.astype("float32"), ),
                        contexts_features=(contexts_features.astype("float32"), ),
                        contexts_items_features=(contexts_items_features.astype("float32"), ),
                        contexts_items_availabilities=contexts_items_availabilities,
                        choices=choices)

In [None]:
model_args = {
    "num_products_features": 6,
    "num_customer_features": 83,
    "width_eps_x": 20,
    "depth_eps_x": 5,
    "heterogeneity_x": 10,
    "width_eps_z": 20,
    "depth_eps_z": 5,
    "heterogeneity_z": 10,
    "width_u": 20,
    "depth_u": 5,
    "tol": 1,
    "optimizer": "Adam",
    "lr": 0.002,
    "logmin": 1e-10,
    "label_smoothing": 0.02,
    "callbacks": [],
}
model = RUMnet(**model_args)
model.instantiate()

In [None]:
model.fit(dataset, n_epochs=100, batch_size=100)