In [1]:
import numpy as np
import json 
import pandas as pd 
from scipy.special import gamma, kv
from tqdm import tqdm
import torch

# 1. Read Data, Convert to DataFrame

In [2]:
def convert_json_to_df(json_file_path:str):
    with open(json_file_path, "r", encoding="utf-8") as file:
        data = json.load(file)

    in_sample_transactions = data["transactions"]["in_sample_transactions"]
    out_sample_transactions = data["transactions"]["out_of_sample_transactions"]
    product_labels = data['product_labels']

    in_sample_transactions = pd.DataFrame(in_sample_transactions)
    out_sample_transactions = pd.DataFrame(out_sample_transactions)
    
    # rename 'prodcut' to 'choice' 
    in_sample_transactions.rename(columns={'product':'choice'}, inplace=True)
    out_sample_transactions.rename(columns={'product':'choice'}, inplace=True)
    product_labels = pd.DataFrame(
        list(product_labels.items()), columns=["product_id", "product_name"]
    )
    return in_sample_transactions, out_sample_transactions,product_labels

In [3]:
def convert_list_to_one_hot(transaction:list,d):
    one_hot = np.zeros(d)
    for item in transaction:
        one_hot[item] = 1
    return one_hot

def convert_to_one_hot(transactions:pd.DataFrame,d):
    transactions["offered_product_one_hot"] = transactions['offered_products'].apply(lambda x : convert_list_to_one_hot(x,d))
    transactions['choice_one_hot'] = transactions['choice'].apply(lambda x: convert_list_to_one_hot([x],d))
    return transactions



In [4]:
instance_id = 5
in_sample_transactions, out_sample_transactions ,items= convert_json_to_df(f"hotel_json/instance_{instance_id}.json")
d = len(items) + 1 # consider the 0 as no-purchase
datasize = len(in_sample_transactions)
in_sample_transactions = convert_to_one_hot(in_sample_transactions,d)
out_sample_transactions = convert_to_one_hot(out_sample_transactions,d)

in_sample_transactions

Unnamed: 0,offered_products,choice,offered_product_one_hot,choice_one_hot
0,"[0, 1, 2, 3, 4, 5, 6]",5,"[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0]","[0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0]"
1,"[0, 1, 2, 3, 4, 5, 6]",0,"[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0]","[1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]"
2,"[0, 1, 2, 3, 4, 5, 6]",0,"[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0]","[1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]"
3,"[0, 1, 2, 3, 4, 5, 6]",0,"[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0]","[1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]"
4,"[0, 1, 2, 3, 4, 5, 6]",0,"[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0]","[1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]"
...,...,...,...,...
995,"[0, 2, 4, 5]",2,"[1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0]","[0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0]"
996,"[0, 2, 4, 5]",0,"[1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0]","[1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]"
997,"[0, 2, 4, 5]",0,"[1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0]","[1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]"
998,"[0, 2, 4, 5]",0,"[1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0]","[1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]"


# 2. Kernel Implementation





Matrix-valued **Matern kernel** $\tilde{\boldsymbol{\mathsf{k}}}$ can be 

$$
\boldsymbol{\mathsf{k}}(\boldsymbol{e}_{S},\boldsymbol{e}_{S'}) = \boldsymbol{K} \otimes \mathsf{k}_{m}(\boldsymbol{e}_{S}, \boldsymbol{e}_{S'})
$$
where $\boldsymbol{K}$ is a positive semi-definite matrix. Then 

$$
\tilde{\mathsf{k}}^{ij}(\boldsymbol{e}_{S},\boldsymbol{e}_{S'}) = K_{ij} \times\sigma^{2} \frac{2^{1-\nu}}{\Gamma(\nu)} \left( \sqrt{ 2\nu }  \frac{\left\| \boldsymbol{e}_{S} - \boldsymbol{e}_{S'} \right\|_{2}  }{\ell} \right) K_{\nu} \left( \sqrt{ 2\nu } \frac{\left\| \boldsymbol{\boldsymbol{e}_{S}-\boldsymbol{e}_{S'}} \right\|_{2}  }{\ell} \right)
$$
Add constraint, 

$$
\mathsf{k}^{ij}(S, S') = \mathbb{1}(i \in \boldsymbol{e}_{S})\cdot \mathbb{1}(j \in \boldsymbol{e}_{S'}) \cdot   \tilde{\mathsf{k}}^{ij}(\boldsymbol{e}_{S}, \boldsymbol{e}_{S'})
$$


Same with Gaussian kernel.



In [5]:
K = torch.eye(d) # for the moment we consider the identity matrix as the covariance matrix

In [6]:
# def generate_scalar_matern_kernel(length_scale:float, nu:float, sigma:float):
#     """
#     Input: kernel parameters and index (i,j)
#     generate base scalar-valued Matern kernel at (i,j) 
#     return {k_m}_ij
#     """
#     def kernel(x1:np.ndarray, x2:np.ndarray):

#         dist = np.linalg.norm(x1 - x2)
        
#         if dist == 0:
#             return sigma**2

#         # calculate the factor
#         factor = (2 ** (1 - nu)) / gamma(nu)
#         scaled_dist  = np.sqrt(2 * nu) * dist / length_scale
#         result = sigma**2 * factor * (scaled_dist**nu) * kv(nu, scaled_dist)
#         return result
#     return kernel

def generate_scalar_matern_kernel(length_scale:float, nu:float, sigma:float):
    """
    Input: kernel parameters and index (i,j)
    generate base scalar-valued Matern kernel at (i,j) 
    return {k_m}_ij
    """
    def kernel(x1:torch.Tensor, x2:torch.Tensor):
        dist = torch.norm(x1 - x2)
        
        if dist == 0:
            return sigma**2

        # calculate the factor
        factor = (2 ** (1 - nu)) / gamma(nu)
        scaled_dist = np.sqrt(2 * nu) * dist / length_scale
        result = sigma**2 * factor * (scaled_dist**nu) * kv(nu, scaled_dist)

        return result
    
    return kernel

# def generate_scalar_gaussian_kernel(length_scale:float, sigma:float):
#     """
#     Input: kernel parameters and index (i,j)
#     generate base scalar-valued Gaussian kernel at (i,j)
#     return {k_g}_ij
#     """
#     def kernel(x1:np.ndarray, x2:np.ndarray):
        
#         dist = np.linalg.norm(x1 - x2)
#         return sigma**2 * np.exp(-dist**2 / (2 * length_scale**2))
#     return kernel

def generate_matrix_matern_kernel(length_scale, nu, sigma):
    scalar_kernel = generate_scalar_matern_kernel(length_scale, nu, sigma)
    
    def kernel(x1:torch.Tensor, x2:torch.Tensor):
        scalar_kernel_value = scalar_kernel(x1, x2)
        
        mask_x1 = x1 != 0
        mask_x2 = x2 != 0
        
        mask_x1_expand = mask_x1.view(-1, 1).expand(d, d) 
        mask_x2_expand = mask_x2.view(1, -1).expand(d, d)
        
        result = scalar_kernel_value * K* mask_x1_expand * mask_x2_expand 

        return result 
    return kernel

# def generate_matrix_matern_kernel(length_scale, nu, sigma):
#     """
#     generate a matrix-valued Matern kernel 
#     """
#     scalar_matern_kernel = generate_scalar_matern_kernel(length_scale, nu, sigma)
#     def kernel(x1:np.ndarray,x2:np.ndarray):
#         dim = x1.shape[0]
#         result = np.zeros((dim,dim))
#         for i in range(dim):
#             for j in range(dim):
#                 if x1[i] == 0 or x2[j] == 0:
#                     result[i,j] = 0
#                 else:
#                     result[i,j] = scalar_matern_kernel(x1,x2) * K[i,j]
#         return result
#     return kernel


# def generate_matrix_gaussian_kernel(length_scale, sigma):
#     """
#     generate a matrix-valued Gaussian kernel 
#     """
#     scalar_gaussian_kernel = generate_scalar_gaussian_kernel(length_scale, sigma)
#     def kernel(x1:np.ndarray,x2:np.ndarray):
#         dim = x1.shape[0]
#         result = np.zeros((dim,dim))
#         for i in range(dim):
#             for j in range(dim):
#                 if x1[i] == 0 or x2[j] == 0:
#                     result[i,j] = 0
#                 else:
#                     result[i,j] = scalar_gaussian_kernel(x1, x2) * K[i,j]
#         return result
#     return kernel

In [7]:
matrix_matern_kernel = generate_matrix_matern_kernel(1, 1, 1)
scalar_matern_kernel = generate_scalar_matern_kernel(1, 1, 1)
matrix_matern_kernel(torch.tensor([1,1,0,1,0,0,0],dtype=float), torch.tensor([1,0,1,1,1,1,1],dtype=float)),matrix_matern_kernel(torch.tensor([1,1,0,1,0,0,0],dtype=float), torch.tensor([1,0,1,1,1,1,1],dtype=float))

(tensor([[0.1046, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000],
         [0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000],
         [0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000],
         [0.0000, 0.0000, 0.0000, 0.1046, 0.0000, 0.0000, 0.0000],
         [0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000],
         [0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000],
         [0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000]]),
 tensor([[0.1046, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000],
         [0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000],
         [0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000],
         [0.0000, 0.0000, 0.0000, 0.1046, 0.0000, 0.0000, 0.0000],
         [0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000],
         [0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000],
         [0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000]]))

# 3. Solve

## 3.1. Precalcualte Kernel Tensor

In [8]:



total_iterations = datasize * datasize
with tqdm(total=total_iterations, desc="Overall Progress", unit="iteration") as pbar:
    K_kernel = torch.zeros(datasize,datasize,d,d)

    for i in range(datasize):
        for j in range(datasize):
            S_i = torch.tensor(in_sample_transactions["offered_product_one_hot"][i],dtype=float)
            S_j = torch.tensor(in_sample_transactions["offered_product_one_hot"][j],dtype=float)
            K_kernel[i, j] = matrix_matern_kernel(
                S_i,
                S_j,
            )

            pbar.update(1)  

Overall Progress:  76%|███████▌  | 757611/1000000 [01:02<00:18, 13451.76iteration/s]

In [None]:
alphaset = torch.randn((datasize, d), dtype=torch.float32, requires_grad=True)
lambda_ = 0.001

def objective(alphaset: torch.Tensor):
    U = torch.zeros((datasize, d), dtype=torch.float32)
    U = torch.einsum("ijab, jb -> ia", K_kernel, alphaset)

    l = loss(U)
    r = reg(alphaset)

    return l + lambda_ * r



def loss(U: torch.Tensor):

    loss_value = 0.0
    for i in range(datasize):

        p_vec = torch.zeros((d, 1), dtype=torch.float32)

        hS_i = torch.tensor(
            in_sample_transactions.iloc[i]["offered_product_one_hot"],
            dtype=torch.float32,
        ).view(-1, 1)

        y_i = torch.tensor(
            in_sample_transactions.iloc[i]["choice_one_hot"], dtype=torch.float32
        ).view(-1, 1)

        utility_hSi = U[i].view(-1, 1)
        exp_utility = torch.exp(utility_hSi)
        sum_exp_utility = torch.sum(exp_utility) 

        for j in range(d):

            if hS_i[j] == 1:

                p_vec[j] = torch.exp(utility_hSi[j]) / sum_exp_utility
            else:

                p_vec[j] = 0

        loss_value += cross_entropy_loss(p_vec, y_i)
    return loss_value / datasize


def cross_entropy_loss(p_vec: torch.Tensor, y_vec: torch.Tensor):
    for i in range(d):
        if y_vec[i] == 1:
            return -torch.log(p_vec[i])


def squared_loss(p_vec: torch.Tensor, y_vec: torch.Tensor):
    return torch.sum((p_vec - y_vec) ** 2)


def reg(alphaset: torch.Tensor):

    alphaset = alphaset.unsqueeze(2)  # add a dimension 

    # einsum
    result = torch.einsum("ikd,ijkl,jle->", alphaset, K_kernel, alphaset)

    return result

def compute_gradient():
    objective_value = objective(alphaset)
    objective_value.backward() 
    return alphaset.grad

optimizer = torch.optim.Adam([alphaset], lr=0.01)
for epoch in range(500):
    optimizer.zero_grad()  
    loss_value = objective(alphaset)
    loss_value.backward()  
    optimizer.step()  
    if epoch % 10 == 0:
        print(f"Epoch {epoch}, Loss: {loss_value.item()}")



Epoch 0, Loss: 47.24093246459961
Epoch 10, Loss: 8.232274055480957
Epoch 20, Loss: 6.692347049713135
Epoch 30, Loss: 2.97208833694458
Epoch 40, Loss: 2.1698923110961914
Epoch 50, Loss: 1.5645359754562378
Epoch 60, Loss: 1.2849969863891602
Epoch 70, Loss: 1.1289321184158325
Epoch 80, Loss: 1.0561659336090088
Epoch 90, Loss: 0.9989989399909973
Epoch 100, Loss: 0.9638248682022095
Epoch 110, Loss: 0.9510769248008728
Epoch 120, Loss: 0.9401125907897949
Epoch 130, Loss: 0.930778980255127
Epoch 140, Loss: 0.9224726557731628
Epoch 150, Loss: 0.9150317907333374
Epoch 160, Loss: 0.9083288311958313
Epoch 170, Loss: 0.9022581577301025
Epoch 180, Loss: 0.896713376045227
Epoch 190, Loss: 0.8916382193565369
Epoch 200, Loss: 0.886982262134552
Epoch 210, Loss: 0.8847923278808594
Epoch 220, Loss: 0.8826903104782104
Epoch 230, Loss: 0.8806502819061279
Epoch 240, Loss: 0.8786593079566956
Epoch 250, Loss: 0.876736044883728
Epoch 260, Loss: 0.8748530149459839
Epoch 270, Loss: 0.8730258941650391
Epoch 280, L

In [None]:
obj = objective(alphaset)

In [None]:
def cal_utility(S:torch.Tensor):
    utility_vec = torch.zeros((d,1),dtype=torch.float32)
    for i in range(datasize):
        S_i = torch.tensor(in_sample_transactions["offered_product_one_hot"][i],dtype=float)
        kernel_value = matrix_matern_kernel(S, S_i)
        utility_vec += kernel_value @ alphaset[i].view(-1,1)
    return utility_vec

def cal_probability(S:np.ndarray):
    utility_vec = cal_utility(S)
    sum_exp_utility = torch.sum(torch.exp(utility_vec))
    p_vec = torch.zeros(d,dtype=torch.float32)
    for i in range(d):
        if S[i] == 1:
           p_vec[i] = torch.exp(utility_vec[i]) / sum_exp_utility 
        else:
            p_vec[i] = 0
            
    return p_vec
    



S = torch.tensor([1,1,1,1,1,1,1],dtype=float)
cal_probability(S)

tensor([0.7966, 0.0909, 0.0283, 0.0081, 0.0143, 0.0546, 0.0070],
       grad_fn=<CopySlices>)

# 4. Performance

In [None]:
# testsize = len(out_sample_transactions)
# mse = 0
# for i in tqdm(range(testsize)):
#     S = torch.tensor(out_sample_transactions["offered_product_one_hot"][i])
#     p_vec = cal_probablity(S)
#     mse += squared_loss(p_vec, S)

S_test = torch.stack([torch.tensor(sample, dtype=torch.float32) for sample in out_sample_transactions["offered_product_one_hot"]])
p_vecs = torch.stack([cal_probability(S) for S in S_test])
choice_vecs = torch.stack([torch.tensor(sample, dtype=torch.float32) for sample in out_sample_transactions["choice_one_hot"]])


In [None]:
p_vecs.shape, choice_vecs.shape

(torch.Size([255, 7]), torch.Size([255, 7]))

In [None]:
mse = torch.mean((p_vecs- choice_vecs) ** 2).item()
rmse = np.sqrt(mse)
rmse,mse

(0.2243296581533966, 0.05032379552721977)

In [None]:
# 把objective, rmse, mse保存到csv中
import csv
results = [
    [float(obj.detach()),mse, rmse, ]
]
# 以追加模式打开 CSV 文件
with open(f'lambda={lambda_}.csv', mode='a', newline='', encoding='utf-8') as file:
    writer = csv.writer(file)
    writer.writerows(results)  # 追加数据