In [1]:
import tensorflow as tf
import os
import sys
import time
import numpy as np
import pandas as pd
from sklearn.svm import SVC
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report
from sklearn.pipeline import Pipeline
from sklearn.model_selection import GridSearchCV
from sklearn.ensemble import RandomForestClassifier
import xgboost as xgb
from lightgbm import LGBMClassifier

from choice_learn.data import ChoiceDataset
from choice_learn.models import SimpleMNL

2025-12-31 17:53:34.344300: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2025-12-31 17:53:34.344610: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2025-12-31 17:53:34.423339: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2025-12-31 17:53:34.578695: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
2025-12-31 17:53:40.099752: I external/local_xla/xla/

In [2]:
# Set device
if tf.config.list_physical_devices('GPU'):
    device = '/GPU:0'
    print(f"Use GPU: {tf.config.list_physical_devices('GPU')[0]}")
else:
    device = '/CPU:0'
    print("Use CPU")

Use GPU: PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')


In [3]:
project_root = os.path.abspath(os.getcwd())
sys.path.insert(0, project_root)

# Make sure you are in the package's main directory
print(project_root)

/root/Q3/DeepHalo-tf/Final/DeepHalo


In [4]:
from DeepHalo import DeepHaloChoiceModel

Initializing DeepHaloChoiceModel package v1.0.0


# Load Data from ChoiceDataset

In [5]:
from choice_learn.datasets import load_swissmetro
from choice_learn.datasets import load_modecanada

dataset_name = 'swiss'

def choose_dataset(dataset_name):
    if dataset_name == 'swiss':

        # Load SwissMetro (transportation) data
        swiss_df = load_swissmetro(as_frame=True)
        
        kept_columns = [
            "PURPOSE", "AGE",
            "CAR_AV", "TRAIN_AV", "SM_AV",
            "CAR_TT", "TRAIN_TT", "SM_TT",
            "CAR_CO", "TRAIN_CO", "SM_CO",
            "CHOICE",
        ]
        swiss_df = swiss_df[kept_columns]

        # Build ChoiceDataset in wide format
        dataset = ChoiceDataset.from_single_wide_df(
            df=swiss_df,
            items_id=["TRAIN", "SM", "CAR"],   # names used in *_AV, *_TT, *_CO columns
            choices_column="CHOICE",
            choice_format="items_index",       # CHOICE is 0/1/2 index of TRAIN/SM/CAR
            shared_features_columns=None,
            # shared_features_columns=["GROUP", "SURVEY", "SP", "ID", "PURPOSE", "FIRST", 
            #                         "TICKET", "WHO", "LUGGAGE", "AGE", "MALE", "INCOME", 
            #                         "GA", "ORIGIN", "DEST"],
            # items_features_suffixes=["CO", "TT", "HE", "SEATS"],
            items_features_suffixes=["CO", "TT"],    # features: *_CO, *_TT
            available_items_suffix="AV",
            delimiter="_",
        )

    elif dataset_name == 'canada':

        # Load ModeCanada (transportation) data
        canada_transport_df = load_modecanada(as_frame=True)
    
        dataset = ChoiceDataset.from_single_long_df(
        df=canada_transport_df,
        choices_column="choice",
        items_id_column="alt",
        choices_id_column="case",
        shared_features_columns=None,
        items_features_columns=["cost", "freq", "ovt", "ivt"],
        choice_format="one_zero")

    else:
        raise ValueError(f"Unknown dataset: {dataset_name}") 

    return dataset


dataset = choose_dataset(dataset_name)
print(dataset.summary())


%%% Summary of the dataset:
Number of items: 3
Number of choices: 10719
 No Shared Features by Choice registered


 Items Features by Choice:
2 items features 
 with names: (['CO', 'TT'],)



In [6]:
def train_test_split_choice_dataset(dataset, train_idx, test_idx): 
    
    # Split ChoiceDataset into train/test ChoiceDatasets preserving structure.

  
    # Extract train/test subsets
    train_choices = dataset.choices[train_idx]
    test_choices = dataset.choices[test_idx]
    
    # Slice items_features_by_choice (list of tuples)
    train_items_features = [dataset.items_features_by_choice[0][i] for i in train_idx]
    test_items_features = [dataset.items_features_by_choice[0][i] for i in test_idx]
    
    # Slice availability matrix
    train_avail = dataset.available_items_by_choice[train_idx]
    test_avail = dataset.available_items_by_choice[test_idx]
    
    # Create new ChoiceDatasets 
    train_dataset = ChoiceDataset(
        items_features_by_choice=train_items_features,
        choices=train_choices,
        available_items_by_choice=train_avail,
        shared_features_by_choice_names=dataset.shared_features_by_choice_names,
        items_features_by_choice_names=dataset.items_features_by_choice_names[0]
    )
    
    test_dataset = ChoiceDataset(
        items_features_by_choice=test_items_features,
        choices=test_choices,
        available_items_by_choice=test_avail,
        shared_features_by_choice_names=dataset.shared_features_by_choice_names,
        items_features_by_choice_names=dataset.items_features_by_choice_names[0]
    )
    

    return train_dataset, test_dataset

In [7]:
def choice_accuracy(pred_probs, true_labels, available_items_by_choice=None):
    
    # Compute choice prediction accuracy from predicted probabilities vs true labels.

    
    n_choices = len(true_labels)
    correct = 0
    
    # Handle availability (mask unavailable alternatives)
    if available_items_by_choice is not None:
        # Only consider available alternatives for prediction
        masked_probs = pred_probs.copy()
        masked_probs[available_items_by_choice == 0] = -np.inf  # Impossible choices
        pred_indices = np.argmax(masked_probs, axis=1)
    else:
        pred_indices = np.argmax(pred_probs, axis=1)
    
    # Count correct predictions
    correct = np.sum(pred_indices == true_labels)
    accuracy = correct / n_choices
    
    # Per-alternative accuracy
    per_alt_acc = {}
    for alt in range(pred_probs.shape[1]):
        alt_correct = np.sum((pred_indices == alt) & (true_labels == alt))
        alt_total = np.sum(true_labels == alt)
        per_alt_acc[f'Alt{alt}'] = alt_correct / alt_total if alt_total > 0 else 0
    
    return accuracy, {
        'correct': correct,
        'total': n_choices,
        'predicted_indices': pred_indices,
        'per_alternative': per_alt_acc
    }


In [8]:
test_size=0.2; random_state=42; stratify=True;
n_choices = len(dataset.choices)
    
# Get choice indices stratified by chosen alternative
choice_indices = np.arange(n_choices)

if stratify:
    train_idx, test_idx = train_test_split(
        choice_indices, test_size=test_size, 
        random_state=random_state, stratify=dataset.choices
    )
else:
    train_idx, test_idx = train_test_split(
        choice_indices, test_size=test_size, random_state=random_state
    )


train_dataset, test_dataset = train_test_split_choice_dataset(dataset, train_idx, test_idx)

# Choice Models

In [9]:
from choice_learn.models.simple_mnl import SimpleMNL
from choice_learn.models import ConditionalLogit
from choice_learn.models.halo_mnl import LowRankHaloMNL, HaloMNL
from choice_learn.models import RUMnet
from choice_learn.models import NestedLogit
from choice_learn.models.latent_class_mnl import LatentClassSimpleMNL
from choice_learn.models import ResLogit

In [10]:
def create_model(model_name):
    
    if model_name == "SimpleMNL":
        model = SimpleMNL(intercept="item")  
        
    elif model_name == "ConditionalLogit":
        model = ConditionalLogit(optimizer="lbfgs")

        # Intercept for train & sm
        model.add_coefficients(feature_name="intercept", items_indexes=[0, 1])
        
        # beta_co for all items
        model.add_coefficients(feature_name="CO",
                                     items_indexes=[0, 1, 2])
        
        # beta TT for car
        model.add_coefficients(feature_name="TT",
                                     items_indexes=[2],
                    			     coefficient_name="beta_tt_car")
        
        # betas TT shared by train and sm
        model.add_shared_coefficient(feature_name="TT",
                                           items_indexes=[0, 1])
    elif model_name == "HaloMNL":
        model = HaloMNL(intercept="item", optimizer="lbfgs")

    elif model_name == "LowRankHaloMNL": 
        model = LowRankHaloMNL(halo_latent_dim=2, intercept=None)

    elif model_name == "RUMnet": 
        model_args = {
                "num_products_features": 6,
                "num_customer_features": 83,
                "width_eps_x": 20,
                "depth_eps_x": 5,
                "heterogeneity_x": 10,
                "width_eps_z": 20,
                "depth_eps_z": 5,
                "heterogeneity_z": 10,
                "width_u": 20,
                "depth_u": 5,
                "optimizer": "Adam",
                "lr": 0.0002,
                "logmin": 1e-10,
                "label_smoothing": 0.02,
                "callbacks": [],
                "epochs": 100,
                "batch_size": 32,
                "tol": 0,
            }
        model = RUMnet(**model_args)
        model.instantiate()

    elif model_name == "LatenClasstMNL": 
        model = LatentClassSimpleMNL(n_latent_classes=2, fit_method="mle", optimizer="lbfgs", epochs=1000, lbfgs_tolerance=1e-20)

    elif model_name == "ResLogit": 
        model_args = {
            "intercept": "item",
            "optimizer": "SGD",
            "lr": 1e-6,
        }
        model = ResLogit(n_layers=5, **model_args)
        model.instantiate(n_items=3, n_shared_features=0, n_items_features=2)


    elif model_name == "NestedLogit":
        model = NestedLogit(optimizer="lbfgs", items_nests=[[0, 2], [1]])

        # Intercept for train & sm
        model.add_coefficients(feature_name="intercept", items_indexes=[0, 2])
        
        # betas TT and CO shared by train and sm
        model.add_shared_coefficient(feature_name="travel_time",
                                           items_indexes=[0, 1, 2])
        model.add_shared_coefficient(feature_name="cost",
                                           items_indexes=[0, 1, 2])
    
    else:
        raise ValueError(f"Unknown model name: {model_name}")
        
    return model
        

In [11]:
Test_accuracies = {}
Times = {}

In [12]:
Models = {}
Hists = {}


## DeepHalo

In [13]:
def build_model(file_path, H, depth, embed, featureless=True, feature2D=False, epochs=20, batch_size=64, loss_name='nll', dropout = 0, **kwargs):
    # build a DeepHalo choice model with the given specifications

    model_fname = os.path.join(file_path, f'Depth-{depth}-H-{H}-epoch{epochs}-Fless{featureless}-2D{feature2D}-{loss_name}-embed{embed}-swiss-test.weights.h5')

    
    DeepHalo_model = DeepHaloChoiceModel(
    H=H,
    depth=depth,
    embed=embed,
    featureless=featureless,
    feature2D=feature2D,
    epochs=epochs,
    batch_size=batch_size,
    loss_name=loss_name,
    )
    
    return DeepHalo_model, model_fname

In [14]:
# Load path
file_path = os.path.join(project_root, 'Experiments', 'Real')

model_args = { 
                "depth": 2,
                "H": 20,
                "epochs": 20,
                "featureless": False,
                "feature2D": False,
                "loss_name": 'nll',
                "embed": 64,
                "batch_size": 64,
                "dropout": 0.1,
            }

In [15]:
avail_train = train_dataset.available_items_by_choice.astype(np.float32)
X_train = train_dataset.items_features_by_choice[0].astype(np.float32)    

# Build model and initialize it
DeepHalo_model, model_fname = build_model(file_path, **model_args)
_ = DeepHalo_model.deep_halo_core(X_train[:2], avail_train[:2], training=False)



# Learn the model
start = time.time()
DeepHalo_model.fit(train_dataset)
Times['DeepHalo'] = time.time() - start

# Save/load weights
model_filename = model_fname

# DeepHalo_model.deep_halo_core.load_weights(model_fname)

DeepHalo_model.deep_halo_core.save_weights(
        os.path.join(file_path, model_filename)
    )


2025-12-31 17:53:40.628198: I external/local_xla/xla/stream_executor/cuda/cuda_executor.cc:887] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
2025-12-31 17:53:40.628371: I external/local_xla/xla/stream_executor/cuda/cuda_executor.cc:887] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
2025-12-31 17:53:40.628432: I external/local_xla/xla/stream_executor/cuda/cuda_executor.cc:887] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
2025-12-31 17:53:41.618245: I external/local_xla/xla/stream_executor/cuda/cuda_executor.cc:887] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
2025-12-31 17:53:41.618381: I external/local_xla/xla/stream_executor

In [16]:
# Compute predicted probs
probs_DeepHalo = DeepHalo_model.predict_probas(test_dataset).numpy()

y_pred_DeepHalo = np.argmax(probs_DeepHalo, axis=1)
DeepHalo_acc, _ = choice_accuracy(probs_DeepHalo, test_dataset.choices)

print(f"Test Accuracy for DeepHalo: {DeepHalo_acc:.4f}")

Test Accuracy for DeepHalo: 0.6670


In [17]:
Test_accuracies['DeepHalo'] = DeepHalo_acc
Models['DeepHalo'] = DeepHalo_model

## SimpleMNL

In [18]:
model_name = "SimpleMNL"
model = create_model(model_name)

Using L-BFGS optimizer, setting up .fit() function


In [19]:
# Learn the model
start = time.time()
hist = model.fit(train_dataset)
Times[model_name] = time.time() - start

Models[model_name] = model
Hists[model_name] = hist

## Conditional Logit

In [20]:
model_name = "ConditionalLogit"
model = create_model(model_name)

Using L-BFGS optimizer, setting up .fit() function


In [21]:
start = time.time()
hist = model.fit(train_dataset)
Times[model_name] = time.time() - start


Models[model_name] = model
Hists[model_name] = hist

## Halo MNL

In [22]:
model_name = "HaloMNL"
model = create_model(model_name)

Using L-BFGS optimizer, setting up .fit() function


In [23]:
start = time.time()
hist = model.fit(train_dataset)
Times[model_name] = time.time() - start

Models[model_name] = model
Hists[model_name] = hist

## LowRank Halo MNL

In [24]:
model_name = "LowRankHaloMNL"
model = create_model(model_name)

Using L-BFGS optimizer, setting up .fit() function


In [25]:
start = time.time()
hist = model.fit(train_dataset)
Times[model_name] = time.time() - start

Models[model_name] = model
Hists[model_name] = hist

ERROR:root:L-BFGS Optimization failed.


## Nested Logit


In [26]:
dataset_nested = load_swissmetro(preprocessing="biogeme_nested")


test_size=0.2; random_state=42; stratify=True;
n_choices = len(dataset_nested.choices)
    
# Get choice indices stratified by chosen alternative
choice_indices = np.arange(n_choices)

if stratify:
    train_idx_nested, test_idx_nested = train_test_split(
        choice_indices, test_size=test_size, 
        random_state=random_state, stratify=dataset_nested.choices
    )
else:
    train_idx_idx_nested, test_idx_nested = train_test_split(
        choice_indices, test_size=test_size, random_state=random_state
    )


train_dataset_nested = dataset_nested[train_idx_nested]
test_dataset_nested = dataset_nested[test_idx_nested]

In [27]:
model_name = "NestedLogit"
model = create_model(model_name)

Using L-BFGS optimizer, setting up .fit() function


In [28]:
start = time.time()
hist = model.fit(train_dataset_nested)
Times[model_name] = time.time() - start

Models[model_name] = model
Hists[model_name] = hist

Please report this to the TensorFlow team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output.
Cause: module, class, method, function, traceback, frame, or code object was expected, got cython_function_or_method


        clipped to 0.05 for numeric optimization purposes.
Please report this to the TensorFlow team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output.
Cause: module, class, method, function, traceback, frame, or code object was expected, got cython_function_or_method


Please report this to the TensorFlow team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output.
Cause: module, class, method, function, traceback, frame, or code object was expected, got cython_function_or_method


## RUMNet

In [29]:
dataset_RUM = load_swissmetro(as_frame=False, preprocessing="rumnet")


test_size=0.2; random_state=42; stratify=True;
n_choices = len(dataset_RUM.choices)
    
# Get choice indices stratified by chosen alternative
choice_indices = np.arange(n_choices)

if stratify:
    train_idx_RUM, test_idx_RUM = train_test_split(
        choice_indices, test_size=test_size, 
        random_state=random_state, stratify=dataset_RUM.choices
    )
else:
    train_idx_RUM, test_idx_RUM = train_test_split(
        choice_indices, test_size=test_size, random_state=random_state
    )


train_dataset_RUM = dataset_RUM[train_idx_RUM]
test_dataset_RUM = dataset_RUM[test_idx_RUM]

                                    fit models needing them such as Conditional Logit.
                                fit models needing them such as Conditional Logit.
                                    fit models needing them such as Conditional Logit.
                                fit models needing them such as Conditional Logit.
                                    fit models needing them such as Conditional Logit.
                                fit models needing them such as Conditional Logit.


In [30]:
model_name = "RUMnet"
model = create_model(model_name)

In [31]:
start = time.time()
hist = model.fit(train_dataset_RUM)
Times[model_name] = time.time() - start

Models[model_name] = model
Hists[model_name] = hist

Epoch 99 Train Loss 0.5668: 100%|█████████████████████████████████████████████████████| 100/100 [05:03<00:00,  3.03s/it]


In [32]:
model.save_model(os.path.join(file_path))

# Non-neural ML Methods

## SVM

In [33]:
def choice_set_to_features(dataset):
    # Convert a ChoiceDataset into features matrix for SVM input 

    X_sets, y_sets = [], []
    items_features_list = dataset.items_features_by_choice[0]  # List of tuples for ALL choices
    avail = dataset.available_items_by_choice              # (n_choices, n_items) availability matrix
    N = len(items_features_list)                           # Number of choice situations
    
    for i in range(N):
        # Get availability mask for a choice situation
        available_mask = avail[i]  
        
        # Get all alternatives' features (even unavailable ones)
        choice_set_feats = items_features_list[i]  
        
        # Create feature vector with ZEROS for unavailable items
        set_vector = []
        for j, alt_feats in enumerate(choice_set_feats):
            if available_mask[j] == 1:
                # Available: include actual features
                set_vector.extend(alt_feats.flatten())  
            else:
                # Unavailable: insert zeros
                zero_feats = np.zeros_like(alt_feats)
                set_vector.extend(zero_feats.flatten())  
        
        X_sets.append(set_vector)
        y_sets.append(dataset.choices[i])
    
    return np.array(X_sets), np.array(y_sets)


In [34]:
# Create SVM data
X_train, y_train = choice_set_to_features(train_dataset)
X_test, y_test = choice_set_to_features(test_dataset)

# Create pipeline of scaler + SVM classifier
pipeline = Pipeline([
    ('scaler', StandardScaler()),
    ('svm', SVC(kernel='rbf', probability=True, class_weight='balanced', random_state=42))
])


# Train the SVM
start = time.time()
pipeline.fit(X_train, y_train)
Times['SVM'] = time.time() - start


Models['SVM'] = pipeline

In [35]:
# y_pred = pipeline.predict(X_test)
# SVM_acc = accuracy_score(y_test, y_pred)
pred_probas = pipeline.predict_proba(X_test)
SVM_acc, _ = choice_accuracy(pred_probas, test_dataset.choices, test_dataset.available_items_by_choice)
print(f"\nTest Accuracy for SVM: {SVM_acc:.4f}")

Test_accuracies['SVM'] = SVM_acc

# # Evaluate
# print("\nClassification Report:")
# print(classification_report(y_test, y_pred, target_names=['TRAIN', 'SM', 'CAR']))


Test Accuracy for SVM: 0.6446


In [36]:
def contextual_choice_features(dataset):
    # Create context-aware features for each choice situation
    
    X_ctx, y_ctx = [], []
    items_features = dataset.items_features_by_choice[0]
    
    for i, choice_features_tuple in enumerate(items_features):
        # choice_features_tuple 
        choice_set = np.array([feat.flatten() for feat in choice_features_tuple])  
        
        chosen_idx = int(dataset.choices[i])
        
        for alt_idx in range(choice_set.shape[0]):
            alt_feats = choice_set[alt_idx]
            
            # CONTEXTUAL FEATURES (per our discussion):
            set_mean = choice_set.mean(axis=0)      # Relative to set average
            set_max = choice_set.max(axis=0)        # Relative to best competitor  
            set_std = choice_set.std(axis=0) + 1e-8 # Competition intensity
            
            # 4x feature sets = 4 * n_features total
            ctx_vector = np.concatenate([
                alt_feats,                           # 1. Absolute quality
                alt_feats - set_mean,                # 2. Relative to set avg
                alt_feats - set_max,                 # 3. Relative to best
                (alt_feats - set_mean) / set_std     # 4. Normalized relative
            ])
            
            X_ctx.append(ctx_vector)
            
            y_ctx.append(1 if alt_idx == chosen_idx else 0)  # BINARY: chosen vs not
    
    return np.array(X_ctx), np.array(y_ctx)

In [37]:
# def contextual_to_choice_multiclass(X_ctx, y_ctx, n_items=3):
#     
#     # Convert binary contextual predictions back to multiclass choice labels.
    

#     n_choices = len(X_ctx) // n_items
#     assert len(y_ctx) == n_choices * n_items, "Data length mismatch"
    
#     X_choice = []
#     y_choice = []
    
#     for i in range(n_choices):
#         # Extract one choice set (3 rows)
#         start_idx = i * n_items
#         end_idx = start_idx + n_items
#         choice_set = X_ctx[start_idx:end_idx]
        
#         # Store choice set features
#         X_choice.append(choice_set.mean(axis=0))  # Or concatenate, etc.
        
#         # Find chosen alternative index (where y_ctx=1)
#         choice_labels = y_ctx[start_idx:end_idx]
#         chosen_idx = np.argmax(choice_labels)  # 0,1,2 for TRAIN,SM,CAR
        
#         y_choice.append(chosen_idx)
    
#     return np.array(X_choice), np.array(y_choice)



In [38]:
def contextual_probs_to_choice_labels(pipeline, X_test, n_items=3, available_items_by_choice=None):
   
    # Convert contextual SVM probabilities to multiclass choice predictions.
    

    import numpy as np
    
    n_choices = len(X_test) // n_items
    assert len(X_test) % n_items == 0, "X_test must be multiple of n_items"
    
    pred_probas = pipeline.predict_proba(X_test)[:, 1]  # Positive class probs (n_test,)
    choice_pred = np.zeros(n_choices, dtype=int)
    choice_probs = np.zeros((n_choices, n_items))
    
    for i in range(n_choices):
        start_idx = i * n_items
        end_idx = start_idx + n_items
        
        # Get probabilities for this choice set
        set_probs = pred_probas[start_idx:end_idx]  # (n_items,)
        
        # Handle availability (mask unavailable alternatives)
        if available_items_by_choice is not None:
            avail_mask = available_items_by_choice[i]
            set_probs[~avail_mask.astype(bool)] = 0  # Zero unavailable
        
        # Normalize to choice probabilities (softmax within set)
        choice_probs[i] = set_probs / set_probs.sum()
        
        # Predict chosen alternative
        choice_pred[i] = np.argmax(choice_probs[i])
    
    return choice_pred, choice_probs




Contextual SVM

In [39]:
# Create contextual data
X_train_ctx, y_train_ctx = contextual_choice_features(train_dataset)
X_test_ctx, y_test_ctx = contextual_choice_features(test_dataset)

# Create pipeline of scaler + SVM classifier on contextual data
pipeline = Pipeline([
    ('scaler', StandardScaler()),
    ('svm', SVC(kernel='rbf', probability=True, class_weight='balanced', random_state=42))
])


# Train the SVM
start = time.time()
pipeline.fit(X_train_ctx, y_train_ctx)
Times['Context. SVM'] = time.time() - start


Models['Context. SVM'] = pipeline

In [40]:
choice_pred, choice_probs = contextual_probs_to_choice_labels(
    pipeline, X_test_ctx, n_items=3, 
    available_items_by_choice=test_dataset.available_items_by_choice
)
SVM_acc, _ = choice_accuracy(choice_probs, test_dataset.choices, test_dataset.available_items_by_choice)
print(f"\nTest Accuracy for SVM contextual: {SVM_acc:.4f}")

Test_accuracies['Context. SVM'] = SVM_acc


Test Accuracy for SVM contextual: 0.6651


## Ranom Forest

In [41]:

# RF pipeline
rf_pipeline = Pipeline([
    ('scaler', StandardScaler()),  # Optional for trees
    ('rf', RandomForestClassifier(
        n_estimators=500,
        max_depth=10,
        min_samples_split=20,
        class_weight='balanced',
        random_state=42,
        n_jobs=-1
    ))
])

start = time.time()
rf_pipeline.fit(X_train, y_train)
Times['RandomForest'] = time.time() - start


Models['RandomForest'] = rf_pipeline

In [42]:
# rf_pred = np.argmax(rf_pipeline.predict_proba(X_test), axis=1)
# rf_accuracy = np.mean(rf_pred == y_test)

pred_probas = rf_pipeline.predict_proba(X_test)
rf_acc, _ = choice_accuracy(pred_probas, test_dataset.choices, test_dataset.available_items_by_choice)
print(f"\nTest Accuracy for Random Forest: {rf_acc:.4f}")

Test_accuracies['RandomForest'] = rf_acc


Test Accuracy for Random Forest: 0.6451


Contextual Random Forest

In [43]:
# RF pipeline
rf_pipeline = Pipeline([
    ('scaler', StandardScaler()),  # Optional for trees
    ('rf', RandomForestClassifier(
        n_estimators=500,
        max_depth=10,
        min_samples_split=20,
        class_weight='balanced',
        random_state=42,
        n_jobs=-1
    ))
])

start = time.time()
rf_pipeline.fit(X_train_ctx, y_train_ctx)
Times['Context. RandomForest'] = time.time() - start


Models['Context. RandomForest'] = rf_pipeline

In [44]:
choice_pred, choice_probs = contextual_probs_to_choice_labels(
    rf_pipeline, X_test_ctx, n_items=3, 
    available_items_by_choice=test_dataset.available_items_by_choice
)
rf_acc, _ = choice_accuracy(choice_probs, test_dataset.choices, test_dataset.available_items_by_choice)
print(f"\nTest Accuracy for SVM contextual: {rf_acc:.4f}")

Test_accuracies['Context. RandomForest'] = rf_acc


Test Accuracy for SVM contextual: 0.6936


## Gradient Boosting

In [45]:
# XGBoost 
xgb_pipeline = Pipeline([
    ('scaler', StandardScaler()),  # Optional
    ('xgb', xgb.XGBClassifier(
        n_estimators=1000,
        max_depth=6,
        learning_rate=0.05,
        subsample=0.8,
        colsample_bytree=0.8,
        scale_pos_weight=2,  # Handle imbalance
        random_state=42,
        n_jobs=-1,
        eval_metric='mlogloss'
    ))
])

# LightGBM 
lgb_pipeline = LGBMClassifier(
    n_estimators=1000,
    max_depth=6,
    learning_rate=0.05,
    subsample=0.8,
    colsample_bytree=0.8,
    class_weight='balanced',
    random_state=42,
    n_jobs=-1,
    verbose=-1
)

# Train & predict
start = time.time()
xgb_pipeline.fit(X_train, y_train)
Times['XGB'] = time.time() - start



start = time.time()
lgb_pipeline.fit(X_train, y_train)
Times['LGB'] = time.time() - start



Models['XGB'] = xgb_pipeline
Models['LGB'] = lgb_pipeline

Parameters: { "scale_pos_weight" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


In [46]:
pred_probas = lgb_pipeline.predict_proba(X_test)
lgb_acc, _ = choice_accuracy(pred_probas, test_dataset.choices, test_dataset.available_items_by_choice)
print(f"\nTest Accuracy for LGB: {lgb_acc:.4f}")


pred_probas = xgb_pipeline.predict_proba(X_test)
xgb_acc, _ = choice_accuracy(pred_probas, test_dataset.choices, test_dataset.available_items_by_choice)
print(f"\nTest Accuracy for XGB: {xgb_acc:.4f}")


Test_accuracies['LGB'] = lgb_acc
Test_accuracies['XGB'] = xgb_acc


Test Accuracy for LGB: 0.6968

Test Accuracy for XGB: 0.7318




Contextual gradient boosting

In [47]:
# XGBoost (Top choice)
xgb_pipeline = Pipeline([
    ('scaler', StandardScaler()),  # Optional
    ('xgb', xgb.XGBClassifier(
        n_estimators=1000,
        max_depth=6,
        learning_rate=0.05,
        subsample=0.8,
        colsample_bytree=0.8,
        scale_pos_weight=2,  # Handle imbalance
        random_state=42,
        n_jobs=-1,
        eval_metric='mlogloss'
    ))
])

# LightGBM (Fastest)
lgb_pipeline = LGBMClassifier(
    n_estimators=1000,
    max_depth=6,
    learning_rate=0.05,
    subsample=0.8,
    colsample_bytree=0.8,
    class_weight='balanced',
    random_state=42,
    n_jobs=-1,
    verbose=-1
)

# Train & predict
start = time.time()
lgb_pipeline.fit(X_train_ctx, y_train_ctx)
Times['Context. LGB'] = time.time() - start

start = time.time()
xgb_pipeline.fit(X_train_ctx, y_train_ctx)
Times['Context. XGB'] = time.time() - start



Models['Context. XGB'] = xgb_pipeline
Models['Context. LGB'] = lgb_pipeline


In [48]:
choice_pred, choice_probs = contextual_probs_to_choice_labels(
    xgb_pipeline, X_test_ctx, n_items=3, 
    available_items_by_choice=test_dataset.available_items_by_choice
)
xgb_acc, _ = choice_accuracy(choice_probs, test_dataset.choices, test_dataset.available_items_by_choice)
print(f"\nTest Accuracy for SVM contextual: {xgb_acc:.4f}")

Test_accuracies['Context. XGB'] = xgb_acc

choice_pred, choice_probs = contextual_probs_to_choice_labels(
    lgb_pipeline, X_test_ctx, n_items=3, 
    available_items_by_choice=test_dataset.available_items_by_choice
)
lgb_acc, _ = choice_accuracy(choice_probs, test_dataset.choices, test_dataset.available_items_by_choice)
print(f"\nTest Accuracy for SVM contextual: {lgb_acc:.4f}")

Test_accuracies['Context. LGB'] = lgb_acc


Test Accuracy for SVM contextual: 0.7024

Test Accuracy for SVM contextual: 0.7038




# Evaluate test accuracy

In [49]:
missing_keys = list(set(Models.keys()) - set(Test_accuracies.keys()))

In [50]:
for model_name in missing_keys:
    if model_name == "NestedLogit":
        dataset_test = test_dataset_nested
    elif model_name == "RUMnet":
        dataset_test = test_dataset_RUM
    else:
        dataset_test = test_dataset
        
    model = Models[model_name]
    pred_probas = model.predict_probas(dataset_test).numpy()
    test_acc, _ = choice_accuracy(pred_probas, dataset_test.choices, dataset_test.available_items_by_choice)
    Test_accuracies[model_name] = test_acc
    
    # print(f"Test Accuracy for {model_name}: {test_acc:.4f}")




        clipped to 0.05 for numeric optimization purposes.


In [51]:
results_df = pd.DataFrame({
    'Test Accuracy': Test_accuracies,
    'Time': Times
})

results_df

Unnamed: 0,Test Accuracy,Time
DeepHalo,0.666978,30.714309
SVM,0.64459,10.90596
Context. SVM,0.665112,67.586082
RandomForest,0.645056,1.274715
Context. RandomForest,0.693563,2.976948
LGB,0.696828,0.961082
XGB,0.73181,1.507641
Context. XGB,0.702425,0.792533
Context. LGB,0.703825,0.645908
RUMnet,0.736007,303.063342


In [52]:
saved_model_names = [key for key in Models.keys() if key != 'RUMnet' and key != 'DeepHalo']

In [53]:
import joblib

joblib.dump([Models[name] for name in saved_model_names], os.path.join(file_path,"choice_models.pkl") )  
joblib.dump(Times, os.path.join(file_path,"Times.pkl") ) 
joblib.dump(Test_accuracies, os.path.join(file_path,"Accuracies.pkl") )  

['/root/Q3/DeepHalo-tf/Final/DeepHalo/Experiments/Real/Accuracies.pkl']