In [None]:
import os

os.environ["CUDA_VISIBLE_DEVICES"] = ""

import sys

sys.path.append("../../")

import numpy as np
import pandas as pd

In [None]:
from choice_learn.datasets import load_swissmetro
from choice_learn.data import ChoiceDataset
swiss_df = load_swissmetro(as_frame=True)
swiss_df.head()

In [None]:
swiss_df = swiss_df.loc[swiss_df.CAR_AV == 1]
swiss_df = swiss_df.loc[swiss_df.SM_AV == 1]
swiss_df = swiss_df.loc[swiss_df.TRAIN_AV == 1]

In [None]:
# swiss_df = swiss_df.loc[swiss_df.PURPOSE.isin([1, 3])]

# Normalizing values by 100
swiss_df[["TRAIN_TT", "SM_TT", "CAR_TT"]] = (
    swiss_df[["TRAIN_TT", "SM_TT", "CAR_TT"]] / 100.0
)

swiss_df[["TRAIN_HE", "SM_HE"]] = (
    swiss_df[["TRAIN_HE", "SM_HE"]] / 100.0
)

swiss_df["train_free_ticket"] = swiss_df.apply(
    lambda row: (row["GA"] == 1).astype(int), axis=1
)
swiss_df["sm_free_ticket"] = swiss_df.apply(
    lambda row: (row["GA"] == 1).astype(int), axis=1
)

swiss_df["TRAIN_travel_cost"] = swiss_df.apply(
    lambda row: (row["TRAIN_CO"] * (1 - row["train_free_ticket"])) / 100, axis=1
)
swiss_df["SM_travel_cost"] = swiss_df.apply(
    lambda row: (row["SM_CO"] * (1 - row["sm_free_ticket"])) / 100, axis=1
)
swiss_df["CAR_travel_cost"] = swiss_df.apply(lambda row: row["CAR_CO"] / 100, axis=1)

In [None]:
swiss_df.columns

In [None]:
swiss_df.SM_SEATS = swiss_df.SM_SEATS.astype("float32")

In [None]:
print(len(swiss_df))

In [None]:
dataset = ChoiceDataset.from_single_wide_df(df=swiss_df, choices_column="CHOICE", items_id=["TRAIN", "SM", "CAR"],
shared_features_columns=["GA", "AGE", "SM_SEATS", "LUGGAGE", "SM_SEATS", 'PURPOSE', 'FIRST', 'TICKET', 'WHO', 'MALE', 'INCOME', 'ORIGIN', 'DEST'],
items_features_suffixes=["TT", "travel_cost", "HE"], choice_format="items_index")

In [None]:
print(len(dataset))

In [None]:
from choice_learn.models import ConditionalLogit

model = ConditionalLogit()
model.add_shared_coefficient(feature_name="TT", items_indexes=[0, 1, 2])
model.add_shared_coefficient(feature_name="travel_cost", items_indexes=[0, 1, 2])
model.add_shared_coefficient(feature_name="HE", items_indexes=[0, 1])
model.add_shared_coefficient(feature_name="GA", items_indexes=[0, 1])
model.add_shared_coefficient(feature_name="AGE", items_indexes=[0])
model.add_shared_coefficient(feature_name="LUGGAGE", items_indexes=[2])
model.add_shared_coefficient(feature_name="SM_SEATS", items_indexes=[1])
model.add_coefficients(feature_name="intercept", items_indexes=[1, 2])

model.fit(dataset[:7234])

In [None]:
model.trainable_weights

In [None]:
model.evaluate(dataset)

In [None]:
indexes = np.random.permutation(len(dataset))
train = indexes[:7234]
test = indexes[7234:]

In [None]:
from choice_learn.models.learning_mnl import LearningMNL

swiss_model = LearningMNL(optimizer="Adam",
nn_features=['PURPOSE', 'FIRST', 'TICKET', 'WHO', 'MALE', 'INCOME', 'ORIGIN', 'DEST'], nn_layers_widths=[100], epochs=500, batch_size=32)
swiss_model.add_shared_coefficient(feature_name="TT", items_indexes=[0, 1, 2])
swiss_model.add_shared_coefficient(feature_name="travel_cost", items_indexes=[0, 1, 2])
swiss_model.add_shared_coefficient(feature_name="HE", items_indexes=[0, 1])
swiss_model.add_shared_coefficient(feature_name="GA", items_indexes=[0, 1])
swiss_model.add_shared_coefficient(feature_name="AGE", items_indexes=[0])
swiss_model.add_shared_coefficient(feature_name="LUGGAGE", items_indexes=[2])
swiss_model.add_shared_coefficient(feature_name="SM_SEATS", items_indexes=[1])
swiss_model.add_coefficients(feature_name="intercept", items_indexes=[1, 2])

hist = swiss_model.fit(dataset[train], val_dataset=dataset[test], verbose=1)

In [None]:
import matplotlib.pyplot as plt
plt.plot(hist["train_loss"])
plt.plot(hist["test_loss"])

In [None]:
 swiss_model.evaluate(dataset[:7234], batch_size=32) * 7234

In [None]:
swiss_model.evaluate(dataset[7234:], batch_size=32) *1802

In [None]:
swiss_model.trainable_weights

In [None]:
len(dataset)

In [None]:
swiss_model.nn_model.summary()