In [1]:
import numpy as np
import json 
import pandas as pd 
from scipy.special import gamma, kv
from tqdm import tqdm
import torch

# 1. Read Data, Convert to DataFrame

In [2]:
def convert_json_to_df(json_file_path:str):
    with open(json_file_path, "r", encoding="utf-8") as file:
        data = json.load(file)

    in_sample_transactions = data["transactions"]["in_sample_transactions"]
    out_sample_transactions = data["transactions"]["out_of_sample_transactions"]
    product_labels = data['product_labels']

    in_sample_transactions = pd.DataFrame(in_sample_transactions)
    out_sample_transactions = pd.DataFrame(out_sample_transactions)
    
    # rename 'prodcut' to 'choice' 
    in_sample_transactions.rename(columns={'product':'choice'}, inplace=True)
    out_sample_transactions.rename(columns={'product':'choice'}, inplace=True)
    product_labels = pd.DataFrame(
        list(product_labels.items()), columns=["product_id", "product_name"]
    )
    return in_sample_transactions, out_sample_transactions,product_labels

In [3]:
def convert_list_to_one_hot(transaction:list,d):
    one_hot = np.zeros(d)
    for item in transaction:
        one_hot[item] = 1
    return one_hot

def convert_to_one_hot(transactions:pd.DataFrame,d):
    transactions["offered_product_one_hot"] = transactions['offered_products'].apply(lambda x : convert_list_to_one_hot(x,d))
    transactions['choice_one_hot'] = transactions['choice'].apply(lambda x: convert_list_to_one_hot([x],d))
    return transactions



In [4]:
instance_id = 5
in_sample_transactions, out_sample_transactions ,items= convert_json_to_df(f"hotel_json/instance_{instance_id}.json")
d = len(items) + 1 # consider the 0 as no-purchase
datasize = len(in_sample_transactions)
in_sample_transactions = convert_to_one_hot(in_sample_transactions,d)
out_sample_transactions = convert_to_one_hot(out_sample_transactions,d)

in_sample_transactions

Unnamed: 0,offered_products,choice,offered_product_one_hot,choice_one_hot
0,"[0, 1, 2, 3, 4, 5, 6]",5,"[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0]","[0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0]"
1,"[0, 1, 2, 3, 4, 5, 6]",0,"[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0]","[1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]"
2,"[0, 1, 2, 3, 4, 5, 6]",0,"[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0]","[1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]"
3,"[0, 1, 2, 3, 4, 5, 6]",0,"[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0]","[1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]"
4,"[0, 1, 2, 3, 4, 5, 6]",0,"[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0]","[1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]"
...,...,...,...,...
995,"[0, 2, 4, 5]",2,"[1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0]","[0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0]"
996,"[0, 2, 4, 5]",0,"[1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0]","[1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]"
997,"[0, 2, 4, 5]",0,"[1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0]","[1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]"
998,"[0, 2, 4, 5]",0,"[1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0]","[1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]"


# 2. Kernel Implementation





Matrix-valued **Matern kernel** $\tilde{\boldsymbol{\mathsf{k}}}$ can be 

$$
\boldsymbol{\mathsf{k}}(\boldsymbol{e}_{S},\boldsymbol{e}_{S'}) = \boldsymbol{K} \otimes \mathsf{k}_{m}(\boldsymbol{e}_{S}, \boldsymbol{e}_{S'})
$$
where $\boldsymbol{K}$ is a positive semi-definite matrix. Then 

$$
\tilde{\mathsf{k}}^{ij}(\boldsymbol{e}_{S},\boldsymbol{e}_{S'}) = K_{ij} \times\sigma^{2} \frac{2^{1-\nu}}{\Gamma(\nu)} \left( \sqrt{ 2\nu }  \frac{\left\| \boldsymbol{e}_{S} - \boldsymbol{e}_{S'} \right\|_{2}  }{\ell} \right) K_{\nu} \left( \sqrt{ 2\nu } \frac{\left\| \boldsymbol{\boldsymbol{e}_{S}-\boldsymbol{e}_{S'}} \right\|_{2}  }{\ell} \right)
$$
Add constraint, 

$$
\mathsf{k}^{ij}(S, S') = \mathbb{1}(i \in \boldsymbol{e}_{S})\cdot \mathbb{1}(j \in \boldsymbol{e}_{S'}) \cdot   \tilde{\mathsf{k}}^{ij}(\boldsymbol{e}_{S}, \boldsymbol{e}_{S'})
$$


Same with Gaussian kernel.



In [5]:

K = np.eye(d) # for simplicity, we assume the kernel matrix is the identity matrix 

def generate_scalar_matern_kernel(length_scale:float, nu:float, sigma:float):
    """
    Input: kernel parameters and index (i,j)
    generate base scalar-valued Matern kernel at (i,j) 
    return {k_m}_ij
    """
    def kernel(x1:np.ndarray, x2:np.ndarray):

        dist = np.linalg.norm(x1 - x2)
        
        if dist == 0:
            return sigma**2

        # calculate the factor
        factor = (2 ** (1 - nu)) / gamma(nu)
        scaled_dist  = np.sqrt(2 * nu) * dist / length_scale
        result = sigma**2 * factor * (scaled_dist**nu) * kv(nu, scaled_dist)
        return result
    return kernel

def generate_scalar_gaussian_kernel(length_scale:float, sigma:float):
    """
    Input: kernel parameters and index (i,j)
    generate base scalar-valued Gaussian kernel at (i,j)
    return {k_g}_ij
    """
    def kernel(x1:np.ndarray, x2:np.ndarray):
        
        dist = np.linalg.norm(x1 - x2)
        return sigma**2 * np.exp(-dist**2 / (2 * length_scale**2))
    return kernel

def generate_matrix_matern_kernel(length_scale, nu, sigma):
    """
    generate a matrix-valued Matern kernel 
    """
    scalar_matern_kernel = generate_scalar_matern_kernel(length_scale, nu, sigma)
    def kernel(x1:np.ndarray,x2:np.ndarray):
        dim = x1.shape[0]
        result = np.zeros((dim,dim))
        for i in range(dim):
            for j in range(dim):
                if x1[i] == 0 or x2[j] == 0:
                    result[i,j] = 0
                else:
                    result[i,j] = scalar_matern_kernel(x1,x2) * K[i,j]
        return result
    return kernel


def generate_matrix_gaussian_kernel(length_scale, sigma):
    """
    generate a matrix-valued Gaussian kernel 
    """
    scalar_gaussian_kernel = generate_scalar_gaussian_kernel(length_scale, sigma)
    def kernel(x1:np.ndarray,x2:np.ndarray):
        dim = x1.shape[0]
        result = np.zeros((dim,dim))
        for i in range(dim):
            for j in range(dim):
                if x1[i] == 0 or x2[j] == 0:
                    result[i,j] = 0
                else:
                    result[i,j] = scalar_gaussian_kernel(x1, x2) * K[i,j]
        return result
    return kernel







    
    


In [None]:
scalar_matern_kernel = generate_scalar_matern_kernel(1, 1, 1)
scalar_gaussian_kernel = generate_scalar_gaussian_kernel(1, 1)
matrix_matern_kernel = generate_matrix_matern_kernel(1, 1, 1)
matrix_gaussian_kernel = generate_matrix_gaussian_kernel(1, 1)

# 3. Solve

## 3.1. Precalcualte Kernel Tensor

In [None]:



total_iterations = datasize * datasize
with tqdm(total=total_iterations, desc="Overall Progress", unit="iteration") as pbar:
    K_kernel = np.zeros((datasize, datasize, d,d))  

    for i in range(datasize):
        for j in range(datasize):

            K_kernel[i, j] = matrix_matern_kernel(
                in_sample_transactions["offered_product_one_hot"][i],
                in_sample_transactions["offered_product_one_hot"][j],
            )

            pbar.update(1)  

In [62]:
alphaset = torch.randn((datasize, d), dtype=torch.float32, requires_grad=True)
lambda_ = 0.0001

def objective(alphaset: torch.Tensor):
    U = torch.zeros((datasize, d), dtype=torch.float32)
    U = torch.einsum("ijab, jb -> ia", K_kernel, alphaset)

    l = loss(U)
    r = reg(alphaset)

    return l + lambda_ * r



def loss(U: torch.Tensor):

    loss_value = 0.0
    for i in range(datasize):

        p_vec = torch.zeros((d, 1), dtype=torch.float32)

        hS_i = torch.tensor(
            in_sample_transactions.iloc[i]["offered_product_one_hot"],
            dtype=torch.float32,
        ).view(-1, 1)

        y_i = torch.tensor(
            in_sample_transactions.iloc[i]["choice_one_hot"], dtype=torch.float32
        ).view(-1, 1)

        utility_hSi = U[i].view(-1, 1)
        exp_utility = torch.exp(utility_hSi)
        sum_exp_utility = torch.sum(exp_utility) 

        for j in range(d):

            if hS_i[j] == 1:

                p_vec[j] = torch.exp(utility_hSi[j]) / sum_exp_utility
            else:

                p_vec[j] = 0

        loss_value += cross_entropy_loss(p_vec, y_i)
    return loss_value / datasize


def cross_entropy_loss(p_vec: torch.Tensor, y_vec: torch.Tensor):
    for i in range(d):
        if y_vec[i] == 1:
            return -torch.log(p_vec[i])


def squared_loss(p_vec: torch.Tensor, y_vec: torch.Tensor):
    return torch.sum((p_vec - y_vec) ** 2)


def reg(alphaset: torch.Tensor):

    alphaset = alphaset.unsqueeze(2)  # add a dimension 

    # einsum
    result = torch.einsum("ikd,ijkl,jle->", alphaset, K_kernel, alphaset)

    return result

def compute_gradient():
    objective_value = objective(alphaset)
    objective_value.backward() 
    return alphaset.grad

optimizer = torch.optim.Adam([alphaset], lr=0.01)
for epoch in range(500):
    optimizer.zero_grad()  
    loss_value = objective(alphaset)
    loss_value.backward()  
    optimizer.step()  
    if epoch % 10 == 0:
        print(f"Epoch {epoch}, Loss: {loss_value.item()}")


Epoch 0, Loss: 17.133644104003906
Epoch 10, Loss: 3.9532620906829834
Epoch 20, Loss: 2.744377613067627
Epoch 30, Loss: 2.309144973754883
Epoch 40, Loss: 1.8242213726043701
Epoch 50, Loss: 1.3611044883728027
Epoch 60, Loss: 1.1296478509902954
Epoch 70, Loss: 0.9803513288497925
Epoch 80, Loss: 0.8962939381599426
Epoch 90, Loss: 0.8494159579277039
Epoch 100, Loss: 0.8338196873664856
Epoch 110, Loss: 0.8237236142158508
Epoch 120, Loss: 0.8133565783500671
Epoch 130, Loss: 0.8054370284080505
Epoch 140, Loss: 0.7992307543754578
Epoch 150, Loss: 0.7940787076950073
Epoch 160, Loss: 0.7897587418556213
Epoch 170, Loss: 0.7860875725746155
Epoch 180, Loss: 0.7829329371452332
Epoch 190, Loss: 0.7802245616912842
Epoch 200, Loss: 0.7779388427734375
Epoch 210, Loss: 0.776119589805603
Epoch 220, Loss: 0.778583288192749
Epoch 230, Loss: 0.840743362903595
Epoch 240, Loss: 1.2489045858383179
Epoch 250, Loss: 1.3080044984817505
Epoch 260, Loss: 0.9927216172218323
Epoch 270, Loss: 0.8949605822563171
Epoch 28

In [63]:
objective(alphaset),reg(alphaset)*lambda_

(tensor([0.7636], grad_fn=<AddBackward0>),
 tensor(0.0188, grad_fn=<MulBackward0>))

In [65]:
def cal_utility(S:np.ndarray):
    utility_vec = torch.zeros((d,1),dtype=torch.float32)
    for i in range(datasize):
        kernel_value = torch.tensor(matrix_matern_kernel(S, in_sample_transactions["offered_product_one_hot"][i]),dtype=torch.float32)
        utility_vec += kernel_value @ alphaset[i].view(-1,1)
    return utility_vec

def cal_probablity(S:np.ndarray):
    utility_vec = cal_utility(S)
    sum_exp_utility = torch.sum(torch.exp(utility_vec))
    p_vec = torch.zeros((d,1),dtype=torch.float32)
    for i in range(d):
        if S[i] == 1:
           p_vec[i] = torch.exp(utility_vec[i]) / sum_exp_utility 
        else:
            p_vec[i] = 0
            
    return p_vec
    



S = np.array([1,0,0,1,1,1,1])
cal_probablity(S)

tensor([[0.7954],
        [0.0000],
        [0.0000],
        [0.0238],
        [0.0259],
        [0.0835],
        [0.0532]], grad_fn=<CopySlices>)