In [363]:
import numpy as np
import json 
import pandas as pd 
from scipy.special import gamma, kv
from tqdm import tqdm
import torch

In [364]:
# Parameters 
instance_id = 4

# kernel_type = 'matern'
# kernel_params = {'length_scale': 1.0, 'nu':1.5, 'sigma': 1.0}

kernel_type = 'gaussian'
kernel_params = {'length_scale': 1.0, 'sigma': 1.0}

loss_type = 'mse'
# loss_type = 'cross_entrophy'

# kernel_type = 'sigmoid'
# kernel_params = {'alpha': 0.5, 'c': 1.0, 'sigma':1.0}


# kernel_type = 'laplace'
# kernel_params = {'sigma': 1.0}


kernel_params_str = "_".join([f"{key}={value}" for key, value in kernel_params.items()])

# 1. Read Data, Convert to DataFrame

In [365]:
def convert_json_to_df(json_file_path:str):
    with open(json_file_path, "r", encoding="utf-8") as file:
        data = json.load(file)

    in_sample_transactions = data["transactions"]["in_sample_transactions"]
    out_sample_transactions = data["transactions"]["out_of_sample_transactions"]
    product_labels = data['product_labels']

    in_sample_transactions = pd.DataFrame(in_sample_transactions)
    out_sample_transactions = pd.DataFrame(out_sample_transactions)
    
    # rename 'prodcut' to 'choice' 
    in_sample_transactions.rename(columns={'product':'choice'}, inplace=True)
    out_sample_transactions.rename(columns={'product':'choice'}, inplace=True)
    product_labels = pd.DataFrame(
        list(product_labels.items()), columns=["product_id", "product_name"]
    )
    return in_sample_transactions, out_sample_transactions,product_labels

In [366]:
def convert_list_to_one_hot(transaction:list,d):
    one_hot = np.zeros(d)
    for item in transaction:
        one_hot[item] = 1
    return one_hot

def convert_to_one_hot(transactions:pd.DataFrame,d):
    transactions["offered_product_one_hot"] = transactions['offered_products'].apply(lambda x : convert_list_to_one_hot(x,d))
    transactions['choice_one_hot'] = transactions['choice'].apply(lambda x: convert_list_to_one_hot([x],d))
    return transactions



In [367]:
in_sample_transactions, out_sample_transactions ,items= convert_json_to_df(f"hotel_json/instance_{instance_id}.json")
d = len(items) + 1 # consider the 0 as no-purchase
datasize = len(in_sample_transactions)
in_sample_transactions = convert_to_one_hot(in_sample_transactions,d)
out_sample_transactions = convert_to_one_hot(out_sample_transactions,d)

in_sample_transactions

Unnamed: 0,offered_products,choice,offered_product_one_hot,choice_one_hot
0,"[0, 1, 2, 3, 4]",3,"[1.0, 1.0, 1.0, 1.0, 1.0]","[0.0, 0.0, 0.0, 1.0, 0.0]"
1,"[0, 1, 2, 3, 4]",0,"[1.0, 1.0, 1.0, 1.0, 1.0]","[1.0, 0.0, 0.0, 0.0, 0.0]"
2,"[0, 1, 2, 3, 4]",0,"[1.0, 1.0, 1.0, 1.0, 1.0]","[1.0, 0.0, 0.0, 0.0, 0.0]"
3,"[0, 1, 2, 3, 4]",0,"[1.0, 1.0, 1.0, 1.0, 1.0]","[1.0, 0.0, 0.0, 0.0, 0.0]"
4,"[0, 1, 2, 3, 4]",0,"[1.0, 1.0, 1.0, 1.0, 1.0]","[1.0, 0.0, 0.0, 0.0, 0.0]"
...,...,...,...,...
1095,"[0, 1, 2, 3, 4]",1,"[1.0, 1.0, 1.0, 1.0, 1.0]","[0.0, 1.0, 0.0, 0.0, 0.0]"
1096,"[0, 1, 2, 3, 4]",0,"[1.0, 1.0, 1.0, 1.0, 1.0]","[1.0, 0.0, 0.0, 0.0, 0.0]"
1097,"[0, 1, 2, 3, 4]",0,"[1.0, 1.0, 1.0, 1.0, 1.0]","[1.0, 0.0, 0.0, 0.0, 0.0]"
1098,"[0, 1, 2, 3, 4]",0,"[1.0, 1.0, 1.0, 1.0, 1.0]","[1.0, 0.0, 0.0, 0.0, 0.0]"


# 2. Kernel Implementation





Matrix-valued **Matern kernel** $\tilde{\boldsymbol{\mathsf{k}}}$ can be 

$$
\boldsymbol{\mathsf{k}}(\boldsymbol{e}_{S},\boldsymbol{e}_{S'}) = \boldsymbol{K} \otimes \mathsf{k}_{m}(\boldsymbol{e}_{S}, \boldsymbol{e}_{S'})
$$
where $\boldsymbol{K}$ is a positive semi-definite matrix. Then 

$$
\tilde{\mathsf{k}}^{ij}(\boldsymbol{e}_{S},\boldsymbol{e}_{S'}) = K_{ij} \times\sigma^{2} \frac{2^{1-\nu}}{\Gamma(\nu)} \left( \sqrt{ 2\nu }  \frac{\left\| \boldsymbol{e}_{S} - \boldsymbol{e}_{S'} \right\|_{2}  }{\ell} \right) K_{\nu} \left( \sqrt{ 2\nu } \frac{\left\| \boldsymbol{\boldsymbol{e}_{S}-\boldsymbol{e}_{S'}} \right\|_{2}  }{\ell} \right)
$$
Add constraint, 

$$
\mathsf{k}^{ij}(S, S') = \mathbb{1}(i \in \boldsymbol{e}_{S})\cdot \mathbb{1}(j \in \boldsymbol{e}_{S'}) \cdot   \tilde{\mathsf{k}}^{ij}(\boldsymbol{e}_{S}, \boldsymbol{e}_{S'})
$$


Same with Gaussian kernel.



In [368]:
K = torch.eye(d) # for the moment we consider the identity matrix as the covariance matrix

In [369]:
def generate_scalar_matern_kernel(length_scale:float, nu:float, sigma:float):
    """
    Input: kernel parameters and index (i,j)
    generate base scalar-valued Matern kernel at (i,j) 
    return {k_m}_ij
    """
    def kernel(x1:torch.Tensor, x2:torch.Tensor):
        dist = torch.norm(x1 - x2)
        
        if dist == 0:
            return sigma**2

        # calculate the factor
        factor = (2 ** (1 - nu)) / gamma(nu)
        scaled_dist = np.sqrt(2 * nu) * dist / length_scale
        result = sigma**2 * factor * (scaled_dist**nu) * kv(nu, scaled_dist)

        return result
    
    return kernel

def generate_scalar_gaussian_kernel(length_scale:float, sigma:float):
    """
    Input: kernel parameters and index (i,j)
    generate base scalar-valued Gaussian kernel at (i,j)
    return {k_g}_ij
    """
    def kernel(x1:torch.Tensor, x2:torch.Tensor):
        
        dist = torch.norm (x1-x2)
        return sigma**2 * torch.exp(-dist**2 / (2 * length_scale**2))
    return kernel

def generate_scalar_sigmoid_kernel(alpha: float, c: float, sigma: float):
    """
    Input: kernel parameters alpha, c, and sigma
    Generate base scalar-valued Sigmoid kernel at (i,j)
    return {k_s}_ij
    """
    def kernel(x1: torch.Tensor, x2: torch.Tensor):

        dot_product = torch.matmul(x1, x2)
        return sigma**2 * torch.tanh(alpha * dot_product + c)
    
    return kernel



def generate_laplace_kernel(sigma: float):
    """
    Generates the Laplace kernel (based on L1 distance).
    
    Input:
        sigma: The scale parameter for the kernel.
    
    Returns:
        kernel: The Laplace kernel function.
    """
    def kernel(x1: torch.Tensor, x2: torch.Tensor):
        distance = torch.sum(torch.abs(x1 - x2))  
        return torch.exp(-distance / sigma)
    
    return kernel

def generate_scalar_anova_kernel(sigma: torch.Tensor):
    """
    Generates the ANOVA kernel (a product of independent Gaussian kernels)

    Input:
        sigma: A 1D tensor of length scale parameters for each dimension.

    Returns:
        kernel: The ANOVA kernel function.
    """
    
    def kernel(x1: torch.Tensor, x2: torch.Tensor):

        diff = x1 - x2
        kernel_values = torch.exp(-0.5 * (diff ** 2) / (sigma ** 2))
        return torch.prod(kernel_values)
    
    return kernel


def generate_matrix_kernel(scalar_kernel):
    def kernel(x1:torch.Tensor, x2:torch.Tensor):
        scalar_kernel_value = scalar_kernel(x1, x2)
        
        mask_x1 = x1 != 0
        mask_x2 = x2 != 0

        mask_x1_expand = mask_x1.view(-1, 1).expand(d, d)
        mask_x2_expand = mask_x2.view(1, -1).expand(d, d)
        
        result = scalar_kernel_value * K * mask_x1_expand * mask_x2_expand
        return result
    return kernel

In [370]:
scalar_kernel = None
if kernel_type == 'matern':
    scalar_kernel = generate_scalar_matern_kernel(kernel_params['length_scale'], kernel_params['nu'], kernel_params['sigma'])
elif kernel_type == 'gaussian':
    scalar_kernel = generate_scalar_gaussian_kernel(kernel_params['length_scale'], kernel_params['sigma'])
elif kernel_type == 'sigmoid':
    scalar_kernel = generate_scalar_sigmoid_kernel(kernel_params['alpha'], kernel_params['c'], kernel_params['sigma'])
elif kernel_type == 'anova':
    scalar_kernel = generate_scalar_anova_kernel(kernel_params['sigma'])
elif kernel_type == 'laplace':
    scalar_kernel = generate_laplace_kernel(kernel_params['sigma'])
    
matrix_kernel = generate_matrix_kernel(scalar_kernel)


# 3. Solve

## 3.1. Precalcualte Kernel Tensor

In [324]:



total_iterations = datasize * datasize
with tqdm(total=total_iterations, desc="Overall Progress", unit="iteration") as pbar:
    K_kernel = torch.zeros(datasize,datasize,d,d,dtype=torch.float32)

    for i in range(datasize):
        for j in range(datasize):
            S_i = torch.tensor(in_sample_transactions["offered_product_one_hot"][i],dtype=torch.float32)
            S_j = torch.tensor(in_sample_transactions["offered_product_one_hot"][j],dtype=torch.float32)
            K_kernel[i, j] = matrix_kernel(
                S_i,
                S_j,
            )

            pbar.update(1)  

Overall Progress: 100%|██████████| 1210000/1210000 [02:13<00:00, 9068.24iteration/s] 


In [371]:
import os
kernel_data_save_path = f"results/feature_free/hotel_{instance_id}/kernel_data/{kernel_type}({kernel_params_str})_kernel_data.pt"
os.makedirs(os.path.dirname(kernel_data_save_path), exist_ok=True)
torch.save(
    K_kernel,
    f"results/feature_free/hotel_{instance_id}/kernel_data/{kernel_type}({kernel_params_str})_kernel_data.pt",
)

## 3.2 Solve Using `torch.optimin.Adam`

In [372]:
K_kernel = torch.load(
    f"results/feature_free/hotel_{instance_id}/kernel_data/{kernel_type}({kernel_params_str})_kernel_data.pt"
)

In [379]:
alphaset = torch.randn((datasize, d), dtype=torch.float32, requires_grad=True)
lambda_ = 1e-4
grad_clip_threshold = 2.0
patience = 40
best_loss = float('inf')  
epochs_since_improvement = 0
best_alphaset = None

def objective(alphaset: torch.Tensor):
    U = torch.zeros((datasize, d), dtype=torch.float32)
    U = torch.einsum("ijab, jb -> ia", K_kernel, alphaset)

    l = loss(U)
    r = reg(alphaset)

    return l + lambda_ * r



def loss(U: torch.Tensor):

    loss_value = 0.0
    for i in range(datasize):

        p_vec = torch.zeros((d, 1), dtype=torch.float32)

        hS_i = torch.tensor(
            in_sample_transactions.iloc[i]["offered_product_one_hot"],
            dtype=torch.float32,
        ).view(-1, 1)

        y_i = torch.tensor(
            in_sample_transactions.iloc[i]["choice_one_hot"], dtype=torch.float32
        ).view(-1, 1)

        utility_hSi = U[i].view(-1, 1)
        exp_utility = torch.exp(utility_hSi)
        sum_exp_utility = torch.sum(exp_utility) 

        for j in range(d):

            if hS_i[j] == 1:

                p_vec[j] = torch.exp(utility_hSi[j]) / sum_exp_utility
            else:

                p_vec[j] = 0

        loss_value += loss_func(p_vec, y_i)
    return loss_value / datasize


def cross_entropy_loss(p_vec: torch.Tensor, y_vec: torch.Tensor):
    for i in range(d):
        if y_vec[i] == 1:
            return -torch.log(p_vec[i])


def squared_loss(p_vec: torch.Tensor, y_vec: torch.Tensor):
    return torch.sum((p_vec - y_vec) ** 2)


def reg(alphaset: torch.Tensor):

    alphaset = alphaset.unsqueeze(2)  # add a dimension 

    # einsum
    result = torch.einsum("ikd,ijkl,jle->", alphaset, K_kernel, alphaset)

    return result

def compute_gradient():
    objective_value = objective(alphaset)
    objective_value.backward() 
    return alphaset.grad

loss_func = None
if loss_type == 'cross_entrophy':
    loss_func = cross_entropy_loss
elif loss_type == 'mse':
    loss_func = squared_loss

optimizer = torch.optim.Adam([alphaset], lr=0.01)

for epoch in range(400):
    optimizer.zero_grad()  
    loss_value = objective(alphaset)

    
    if loss_value.item() < best_loss:
        best_loss = loss_value.item()
        epochs_since_improvement = 0  
        best_alphaset = alphaset.clone().detach()
    else:
        epochs_since_improvement += 1
        
    if epochs_since_improvement >= patience:
        print(f"Early stopping at epoch {epoch}, Best Loss: {best_loss}")
        break  

    if epoch % 10 == 0:
        print(f"Epoch {epoch}, Best Loss: {best_loss}")
    
    loss_value.backward()  
    torch.nn.utils.clip_grad_norm_([alphaset], grad_clip_threshold)
    optimizer.step()  



Epoch 0, Best Loss: 2.1053805351257324
Epoch 10, Best Loss: 0.46666282415390015
Epoch 20, Best Loss: 0.46666282415390015
Epoch 30, Best Loss: 0.38745447993278503
Epoch 40, Best Loss: 0.38745447993278503
Epoch 50, Best Loss: 0.38745447993278503
Epoch 60, Best Loss: 0.36232060194015503
Epoch 70, Best Loss: 0.36232060194015503
Epoch 80, Best Loss: 0.36232060194015503
Epoch 90, Best Loss: 0.36232060194015503
Early stopping at epoch 94, Best Loss: 0.36232060194015503


In [380]:
def cal_utility(S:torch.Tensor):
    utility_vec = torch.zeros((d,1),dtype=torch.float32)
    for i in range(datasize):
        S_i = torch.tensor(in_sample_transactions["offered_product_one_hot"][i],dtype=torch.float32)
        kernel_value = matrix_kernel(S, S_i)
        utility_vec += kernel_value @ best_alphaset[i].view(-1,1)
    return utility_vec

def cal_probability(S:np.ndarray):
    utility_vec = cal_utility(S)
    sum_exp_utility = torch.sum(torch.exp(utility_vec))
    p_vec = torch.zeros(d,dtype=torch.float32)
    for i in range(d):
        if S[i] == 1:
           p_vec[i] = torch.exp(utility_vec[i]) / sum_exp_utility 
        else:
            p_vec[i] = 0
            
    return p_vec
    




# 4. Performance

## 4.1. Test Loss

In [381]:

S_test = torch.stack([torch.tensor(sample, dtype=torch.float32) for sample in out_sample_transactions["offered_product_one_hot"]])
test_p_vecs = torch.stack([cal_probability(S) for S in S_test])
test_choice_vecs = torch.stack([torch.tensor(sample, dtype=torch.float32) for sample in out_sample_transactions["choice_one_hot"]])
test_p_vecs

tensor([[1.2935e-02, 5.7832e-06, 9.8536e-01, 0.0000e+00, 0.0000e+00],
        [1.2935e-02, 5.7832e-06, 9.8536e-01, 0.0000e+00, 0.0000e+00],
        [1.2935e-02, 5.7832e-06, 9.8536e-01, 0.0000e+00, 0.0000e+00],
        ...,
        [6.2992e-06, 4.1263e-09, 1.3609e-04, 9.9969e-01, 0.0000e+00],
        [6.2992e-06, 4.1263e-09, 1.3609e-04, 9.9969e-01, 0.0000e+00],
        [6.2992e-06, 4.1263e-09, 1.3609e-04, 9.9969e-01, 0.0000e+00]])

In [382]:
test_mse = torch.mean((test_p_vecs- test_choice_vecs) ** 2).item()
test_rmse = np.sqrt(test_mse)
test_rmse,test_mse

(0.5626776732540368, 0.31660616397857666)

## 4.2. Train Loss

In [383]:
S_train = torch.stack([torch.tensor(sample, dtype=torch.float32) for sample in in_sample_transactions["offered_product_one_hot"]])
train_p_vecs = torch.stack([cal_probability(S) for S in S_train])
train_choice_vecs = torch.stack([torch.tensor(sample, dtype=torch.float32) for sample in in_sample_transactions["choice_one_hot"]])
train_mse = torch.mean((train_p_vecs - train_choice_vecs) ** 2).item()
train_rmse = np.sqrt(train_mse)
train_mse, train_rmse

(0.3273162245750427, 0.5721155692471956)

In [384]:
import csv
results = [
    [instance_id, test_rmse, train_rmse, lambda_,kernel_type , kernel_params_str,loss_type]
]
file_path = f'results/results.csv'
file_exists = os.path.exists(file_path)
with open(file_path, mode="a", newline="", encoding="utf-8") as file:
    writer = csv.writer(file)
    if not file_exists or os.stat(file_path).st_size == 0:
        writer.writerow(["instance_id", "test_rmse", "train_rmse", "lambda_", "kernel_type", "kernel_params","loss_type"])
    writer.writerows(results)