# 下載套件

In [1]:
import torch
import torch.nn as nn
from torch import nn, matmul, softmax
from torch.nn.init import xavier_uniform_
import torch.nn.functional as F
import torch.nn.utils.rnn as rnn_utils
from torch.autograd import Variable

import os
import numpy as np
import pandas as pd
import gensim
from gensim.models import KeyedVectors
import pickle
import gzip
import gc
import random

In [2]:
from torch.utils.data import DataLoader, Dataset
from torch.utils.data.dataset import random_split

from tqdm import tqdm
from collections import Counter

import math

# 參數設置

In [3]:
# 超參數
EPOCHS = 25
LEARNING_RATE = 0.0001                 # Learning_rate
BATCH_SIZE = 8
BETA = 0.5
ALPHA = 0.1
EMBEDDING_DIMENSION = 32               # 嵌入維度
MODEL_DIMENSION = EMBEDDING_DIMENSION  # 模型維度
HIDDEN_DIMENSION = 128                 # MLP 隱藏層維度
HIDDEN_SIZE = 16                       # LSTM 隱藏層維度
NUM_HEAD = 4
NUM_LAYER = 4

isI2V = 1

In [4]:
# 資料集
DATASET_NAME = "TaFeng"     # 讀取TaFeng資料
# DATASET_NAME = "Dunnhumby"  # 讀取Dunnhumby資料
# DATASET_NAME = "Instacart"  # 讀取Instacart資料

# 匯入檔案
- item2Vec_{dataset}.32d.model
- user_cart_itemid_list.gz
- {dataset}_clean.csv

In [5]:
# load word2Vec pre_train model
model_filename = f"../preprocessing-data/item2vec_models/item2vec_{DATASET_NAME}.{EMBEDDING_DIMENSION}d.model"
# {DATASET}
with open(model_filename, "rb") as fp:
    model = pickle.load(fp)
weights = torch.FloatTensor(model.wv.vectors)
weights.shape

torch.Size([15764, 32])

In [6]:
# {DATASET} user_cart_itemid_list 用戶id, 購物籃時間差(不會使用到), 此用戶的購物籃串列(每個串列包含多項目)。
with gzip.open(f"../preprocessing-data/{DATASET_NAME}_user_cart_itemid_list.gz", "rb") as fp:
    user_cart_itemid_list = pickle.load(fp)
user_cart_itemid_list[:2]

[(1113,
  [[0, 1, 2], [3, 4, 5, 6, 7, 8], [9, 10, 11, 12, 13, 14, 15]],
  [3, 6, 7]),
 (5241,
  [[16, 17, 18, 19, 20, 21],
   [22, 23, 24, 25, 26, 27, 28, 29, 30, 31],
   [32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47],
   [48, 49, 50, 51, 52]],
  [6, 10, 16, 5])]

In [7]:
# Load {DATASET} confidences_Matrix
with gzip.open(f"../preprocessing-data/confidences/{DATASET_NAME}_confidences_array.gz", "rb") as fp:
    confidences_array = pickle.load(fp)

In [8]:
dataset = pd.read_csv(f"../cleaned_dataset/{DATASET_NAME}_clean.csv")

# 最多購物籃
max_cart_count = dataset.groupby("CUSTOMER_ID")["CART_ID"].nunique().max()
print(max_cart_count)

dataset

72


Unnamed: 0,CUSTOMER_ID,PRODUCT_ID,TRANSACTION_DT,CART_ID,NEW_ITEM_ID
0,1113,4902105011621,2000-11-26,0,0
1,1113,7616100830794,2000-11-26,0,1
2,1113,4710892632017,2000-11-26,0,2
3,1113,4710905340113,2000-11-27,1,3
4,1113,4717362901277,2000-11-27,1,4
...,...,...,...,...,...
533054,20002000,4710339772139,2001-01-20,62360,4546
533055,20002000,20513184,2001-01-20,62360,1351
533056,20002000,4714800731229,2001-01-20,62360,2946
533057,20002000,4714541091071,2001-01-20,62360,7382


# 切分資料集
- 分成輸入資料與標籤資料
- 訓練集:驗證集:測試集 = 8:1:1

In [9]:
# 切分資料集
# train_set_size = int(len(user_cart_itemid_list) * 0.8)
# valid_set_size = int(len(user_cart_itemid_list) * 0.1)
# test_set_size = len(user_cart_itemid_list)-train_set_size-valid_set_size
# train_set, valid_set, test_set = random_split(user_cart_itemid_list, [train_set_size, valid_set_size, test_set_size])
# print(len(train_set))
# print(len(valid_set))
# print(len(test_set))

8523
1065
1066


In [10]:
# 將切割好的資料暫存起來
# dataset_folder = f"../preprocessing-data/{DATASET_NAME}_dataset"
# if not os.path.exists(dataset_folder):
#     os.mkdir(dataset_folder)

# # 訓練集
# filepath = f"../preprocessing-data/{DATASET_NAME}_dataset/train_set.pkl"
# with open(filepath, "wb") as f:
#     pickle.dump(train_set, f)
# # 驗證集
# filepath = f"../preprocessing-data/{DATASET_NAME}_dataset/valid_set.pkl"
# with open(filepath, "wb") as f:
#     pickle.dump(valid_set, f)
# # 測試集
# filepath = f"../preprocessing-data/{DATASET_NAME}_dataset/test_set.pkl"
# with open(filepath, "wb") as f:
#     pickle.dump(test_set, f)

In [11]:
# 讀取之前暫存的資料集

# # 載入訓練、驗證、測試集
# with open(f"../preprocessing-data/{DATASET_NAME}_dataset/train_set.pkl", "rb") as fp:
#     train_set = pickle.load(fp)
# with open(f"../preprocessing-data/{DATASET_NAME}_dataset/valid_set.pkl", "rb") as fp:
#     valid_set = pickle.load(fp)
# with open(f"../preprocessing-data/{DATASET_NAME}_dataset/test_set.pkl", "rb") as fp:
#     test_set = pickle.load(fp)

# BATCH

In [12]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
device

device(type='cuda')

In [13]:
# 定義資料轉換函數(於collate_batch函式中使用)
item_index_pipeline = lambda x : [[model.wv.key_to_index[j] for j in i] for i in x] # 取得購物籃中，項目的索引值(只有使用Item2Vec時會需要)

In [14]:
class TensorDataset(Dataset):
    # TensorDataset繼承Dataset, 重載__init__, __getitem__, __len__
    # 實現將一組Tensor數據封裝成Tensor數據集
    # 能夠通過Index得到數據集的數據，能夠通過len，得到數據集大小
    def __init__(self, data_tensor):
        self.data_tensor = data_tensor
    def __getitem__(self, index):
        return self.data_tensor[index]
    def __len__(self):
        return len(self.data_tensor)

# 輸出userID, input_list跟label(最後一個購物籃)
def collate_batch(batch):
    # 使用ID、時間差、訓練的購物籃項目、預測的購物籃項目
    userID, input_item_list, label_item_list, input_size_list, label_size_list = [], [], [], [], []
    for _user in batch:
        #　userID
        userID.append(_user[0])
        # 所有購物籃的項目ID串列中的最後一個購物籃項目ID
        label_item_list.append(torch.tensor(_user[1][-1]))
        label_size_list.append(torch.tensor(_user[2][-1]))
        
        # 不使用Item2Vec進行項目嵌入
        if isI2V == 0:
            train_list = _user[1][0:-1]
        # 使用Item2Vec進行項目嵌入
        else:
            train_list = item_index_pipeline(_user[1][0:-1])
        input_size_list.append(_user[2][0:-1])
        
        input_item_list.append(train_list) #　所有購物籃的項目ID串列(除了最後一個購物籃)
    
    return userID, input_item_list, label_item_list, input_size_list, label_size_list

In [15]:
# 轉成 Dataset
split_train_ = TensorDataset(train_set)
split_valid_ = TensorDataset(valid_set)
split_test_ = TensorDataset(test_set)

In [16]:
# DataLoader
train_dataloader = DataLoader(split_train_, batch_size=BATCH_SIZE, shuffle=True, collate_fn=collate_batch, drop_last=True)
valid_dataloader = DataLoader(split_valid_, batch_size=BATCH_SIZE, shuffle=True, collate_fn=collate_batch, drop_last=True)
test_dataloader = DataLoader(split_test_, batch_size=BATCH_SIZE, shuffle=True, collate_fn=collate_batch, drop_last=True)

# Self-Attention

In [17]:
# 使用nn.MultiheadAttention
# 輸入一個用戶的購物籃，輸出購物籃嵌入
class SelfAttention(nn.Module):
    #　項目向量維度，輸出模型維度
    def __init__(self, embed_dim, model_dim):
        super(SelfAttention, self).__init__()
        self.embed_dim = embed_dim
        self.model_dim = model_dim
    
        # 初始化Q, K, V 矩陣
        self.query_matrix = nn.Linear(embed_dim, model_dim)
        xavier_uniform_(self.query_matrix.weight)
        self.key_matrix = nn.Linear(embed_dim, model_dim)
        xavier_uniform_(self.key_matrix.weight)
        self.value_matrix = nn.Linear(embed_dim, model_dim)
        xavier_uniform_(self.value_matrix.weight)
        self.multihead_attn = nn.MultiheadAttention(embed_dim, num_heads=1)
    
    def forward(self, inputs, attention_mask):
        
        # 輸入一個項目向量，透過三個可學習的參數矩陣，得到計算所需要的q, k, v
        q = self.query_matrix(inputs)
        k = self.key_matrix(inputs)
        v = self.value_matrix(inputs)
        
        attn_output, attn_output_weight = self.multihead_attn(q, k, v, key_padding_mask=attention_mask.transpose(0,1))
        
        output_mean = torch.tensor([[0 for _ in range(MODEL_DIMENSION)] for _ in range(len(attention_mask))], dtype=torch.float).to(device)
        for i, cart in enumerate(attention_mask):
            for j, mask in enumerate(cart):
                if mask == False:
                    # 使用最後一個項目最作為輸出
                    output_mean[i] = attn_output[i][j]
        basket_embedding = output_mean
        
        return basket_embedding

# LSTM

In [18]:
class LSTM(nn.Module):
    def __init__(self, input_size, output_size):
        super(LSTM, self).__init__()
        self.lstm = torch.nn.LSTM(input_size, HIDDEN_SIZE, 2)
        self.hiddenlayer1 = torch.nn.Linear(max_cart_count * HIDDEN_SIZE, 512)
        self.hiddenlayer2 = torch.nn.Linear(512, 512)
        self.hiddenlayer3 = torch.nn.Linear(512, 256)
        self.hiddenlayer4 = torch.nn.Linear(256, 128)
        self.embed = torch.nn.Linear(128, EMBEDDING_DIMENSION)
        self.leakyrelu = torch.nn.LeakyReLU()
    
    def forward(self, inputs):
        output, (h,c) = self.lstm(inputs)
        hidden1 = self.hiddenlayer1(output.view(max_cart_count * HIDDEN_SIZE))
        hidden2 = self.hiddenlayer2(hidden1)
        hidden3 = self.hiddenlayer3(hidden2)
        hidden4 = self.hiddenlayer4(hidden3)
        output = self.embed(hidden4)
        return output

# Transformer

In [19]:
class PositionalEncoding(nn.Module):
    def __init__(self, d_model:int, dropout, maxlen:int=500):
        super(PositionalEncoding, self).__init__()
        # den 是把10000^(2i/d_model)取log_e，前面加負號是求倒數
        den = torch.exp(-torch.arange(0, d_model, 2) * math.log(10000) / d_model)
        pos = torch.arange(0, maxlen).reshape(maxlen, 1)
        pos_embedding = torch.zeros(maxlen, d_model)
        pos_embedding[:, 0::2] = torch.sin(pos*den)
        pos_embedding[:, 1::2] = torch.cos(pos*den)
        
        pos_embedding = pos_embedding.unsqueeze(0)
        
        self.dropout = nn.Dropout(dropout)
        self.register_buffer("pos_embedding", pos_embedding)
        
    def forward(self, token_embedding):
        return self.dropout(token_embedding + self.pos_embedding[:, :token_embedding.size(1), :])
    
class TransformerEncoder(nn.Module):
    def __init__(self, d_model, num_heads=8, num_layers=6):
        super(TransformerEncoder, self).__init__()
        self.pe = PositionalEncoding(d_model=d_model, dropout=0.5, maxlen=max_cart_count)
        #　創建Transformer模型
        self.transformer = nn.TransformerEncoder(
            nn.TransformerEncoderLayer(d_model=d_model, nhead=num_heads),
            num_layers=num_layers
        )
        
    def forward(self, baskets_embedding):
        baskets_embedding_pe = self.pe(baskets_embedding)
        
        # 購物籃padding的遮罩
        padding_mask = ~baskets_embedding.sum(dim=-1).ne(0).transpose(0,1)
        
        output = self.transformer(baskets_embedding_pe.to(torch.float32), src_key_padding_mask=padding_mask.to(torch.float32))
        return output

# MLP

In [20]:
class MLPforItem(nn.Module):
    def __init__(self, embed_dim, hidden_dim, items_dim):
        super(MLPforItem, self).__init__()
        # hidden layer
        self.hidden = nn.Linear(embed_dim, hidden_dim)
        xavier_uniform_(self.hidden.weight)
        self.norm = nn.BatchNorm1d(hidden_dim, momentum=0.03)
        self.relu = nn.ReLU()
        # output layer
        self.output = nn.Linear(hidden_dim, items_dim)
    
    def forward(self, inputs):
        y = self.relu(self.norm(self.hidden(inputs)))
        return self.output(y)

In [21]:
class MLPforSize(nn.Module):
    def __init__(self, embed_dim):
        super(MLPforSize, self).__init__()
        # predict layer
        self.predict = nn.Linear(embed_dim, 1)
        self.leakyrelu = torch.nn.LeakyReLU()
    def forward(self, inputs):
        output = self.leakyrelu(self.predict(inputs))
        return output

# 損失函數

In [22]:
# MSE
def mean_square_error(prediction, target):
    predictions = prediction[0]
    targets = torch.tensor([target[0]], dtype=torch.float).to(device)
    loss = F.mse_loss(predictions, targets)
    for i in range(1, len(prediction)):
        predictions = prediction[i]
        targets= torch.tensor([target[i]], dtype=torch.float).to(device)
        loss += F.mse_loss(predictions, targets)
    return loss

In [23]:
# cross_entropy_loss
def cross_entropy_loss(predictions, targets):
    # 創建稀疏張量的索引和值
    indices = []
    values = []
    for i, t in enumerate(targets):
        for v in t:
            indices.append([i, v])
            values.append(1)
            
    # 創建稀疏張量
    sparse_targets = torch.sparse_coo_tensor(indices=torch.tensor(indices).t(),
                                             values=torch.tensor(values, dtype=torch.float16),
                                             size=(len(targets), items_count), device=device)
    sparse_targets = sparse_targets.to_dense()
    
    loss = F.binary_cross_entropy_with_logits(predictions, sparse_targets)
    return loss

# 評估指標

In [24]:
def format_metric(result_dict):
    assert type(result_dict) == dict
    format_str = []
    metrics = np.unique([k for k in result_dict.keys()])
    for metric in np.sort(metrics):
        name = '{}'.format(metric)
        m = result_dict[name]
        if type(m) is float or type(m) is np.float32 or type(m) is np.float64:
            format_str.append("{}: {:<.4f}".format(name, m))
        elif type(m) is int or type(m) is np.int32 or type(m) is np.int64:
            format_str.append("{}: {}".format(name, m))
    return ", ".join(format_str)

# F1-score

In [25]:
def calculate_f1_score_at_k(predictions, targets, k_list):
    """
    計算 F1-score@K。

    Args:
        predictions: 二維的預測機率矩陣，大小為 [num_users, num_items]。
        targets: 一個包含每個用戶真實標籤的列表，其中每個列表的大小不同。
        k_list: 用預測出的K值，計算F1-score@K

    Returns:
        F1-score@K 分數。
    """
    # 將預測機率矩陣值轉換為 Pytorch 張量
    predictions = torch.from_numpy(np.array(predictions, dtype=np.float32))
    num_users = len(targets)
    f1_score_at_k_eval = dict()
    
    f1_score_sum = 0.0
    for i in range(num_users):
        
        # 將用戶 i 的真實標籤轉換為 PyTorch 張量。
        labels = torch.from_numpy(np.array(targets[i], dtype=np.int64))
        # 計算用戶 i 在預測機率矩陣中機率最高的 K 個項目索引
        top_k_item_labels = torch.topk(predictions[i], k_list[i])[1]
        # 計算用戶 i 的真實標籤和預測標籤的交集 (TP)
        true_positives = torch.sum(torch.sum(torch.eq(top_k_item_labels, labels.unsqueeze(1)).to(torch.float32), dim=1)).item()
        # 計算用戶 i 的真實標籤和預測標籤的聯集
        predicted_positives = k_list[i] # TP+FP
        actual_positives = len(labels)  # TP+FN
        # 預防 K 預測為0導致 precision 分母為0
        if predicted_positives == 0:
            precision = 0.0
        else:
            precision = true_positives / predicted_positives
        # 預防實際 K 為0導致 recall 分母為0
        if actual_positives == 0:
            recall = 0.0
        else:
            recall = true_positives / actual_positives
        # 計算F1-score
        if precision + recall == 0:
            f1_score = 0.0
        else:
            f1_score = 2 * precision * recall / (precision + recall)
        f1_score_sum += f1_score
    # 計算平均 F1-score@K 分數
    f1_score_at_k = f1_score_sum / float(num_users)
    key = "{}".format("F1-score")
    f1_score_at_k_eval[key] = f1_score_at_k
    
    return f1_score_at_k_eval

# NDCG

In [26]:
def calculate_ndcg_at_k(basket_predictions, basket_targets, size_predictions, size_targets):
    """
    計算 NDCG@K。

    Args:
        basket_predictions: 預測購物籃項目
        bakset_targets: 實際購物籃項目
        size_predictions: 預測購物籃大小
        size_targets: 實際購物籃大小  

    Returns:
        NDCG@K 分數。
    """
    # 將預測機率矩陣轉換為 PyTorch 張量
    predictions = torch.from_numpy(np.array(basket_predictions, dtype=np.float32))
    num_users = len(basket_targets)
    ndcg_at_k_eval = dict()
    
    ndcg_sum = 0.0
    for i in range(num_users):
        # 將用戶 i 的真實標籤轉換為 PyTorch 張量
        labels = torch.from_numpy(np.array(basket_targets[i], dtype=np.int64))
        # 計算用戶 i 在預測機率矩陣中機率最高的 K 個項目的索引=標籤
        top_k_item_labels = torch.topk(basket_predictions[i], size_predictions[i])[1]
        # 計算 DCG@K
        dcg_at_k = torch.sum(torch.nan_to_num(torch.div(1.0, torch.log2(torch.arange(size_predictions[i], dtype=torch.float32) +2))) * (torch.eq(top_k_item_labels, labels.unsqueeze(1)).to(torch.float32) ))
        # 計算 IDCG@K
        idcg_at_k = torch.sum(torch.div(1.0, torch.log2(torch.arange(len(labels), dtype=torch.float32) + 2)))
        # 計算 NDCG@K * Penalty weight
        if torch.eq(idcg_at_k, 0):
            ndcg_at_k = idcg_at_k
        else:
            ndcg_at_k = (dcg_at_k / idcg_at_k) * (size_targets[i] / (size_targets[i] + abs(size_targets[i] - size_predictions[i])))
        ndcg_sum += ndcg_at_k.item()
    #　計算平均　NDCG@K 分數
    ndcg_at_k = ndcg_sum / float(num_users)
    key = "{}".format("NDCG")
    ndcg_at_k_eval[key] = ndcg_at_k

    return ndcg_at_k_eval

# MAE

In [27]:
def calculate_mae(size_predictions, size_targets):
    sum = 0
    num_users = len(size_targets)
    mae_eval = dict()
    for i in range(num_users):
        sum += abs(size_predictions[i] - (size_targets[i]).item())
    key = "{}".format("MAE")
    mae_eval[key] = sum / num_users
    return mae_eval

# 訓練&測試

In [28]:
# 訓練模型
def train_model():
    my_model.train()
    loss_list = []
    
    for batch_idx, (userID, basket_input, basket_label, size_input, size_label) in enumerate(tqdm(train_dataloader)):
        optimizer.zero_grad()
        basket_output, size_output = my_model(basket_input, size_input)
        # 計算損失
        loss = ALPHA * mean_square_error(size_output, size_label) + (1 - ALPHA) * cross_entropy_loss(basket_output, basket_label)
        loss_list.append(loss.item())
        loss.backward()
        optimizer.step()
        
        if (batch_idx%100 == 0) or (batch_idx == len(train_dataloader)-1):
            precentage = (100 * batch_idx/len(train_dataloader))
            print(f"Epoch {epoch}: {precentage:.0f}%, loss: {loss.item():.6f}")
            
        with torch.no_grad():
            basket_output = torch.from_numpy(np.array(basket_output.cpu(), dtype=np.float32))
            size_output = np.round(np.squeeze(np.array([_.cpu() for _ in size_output], dtype=np.float32))).astype(int).tolist()
            if batch_idx == 0:
                basket_outputs = basket_output
                basket_labels = basket_label
                size_outputs = size_output
                size_labels = size_label
            else:
                basket_outputs = torch.cat( (basket_outputs, basket_output ),-2 )
                basket_labels = basket_labels + basket_label
                size_outputs = size_outputs + size_output
                size_labels = size_labels + size_label
                
    with torch.no_grad():
        evaluations = calculate_f1_score_at_k(basket_outputs, basket_labels, size_outputs) 
        res_str = '(' + format_metric(evaluations) + ')'
        print(f"                      {res_str}\n")

        evaluations = calculate_ndcg_at_k(basket_outputs, basket_labels, size_outputs, size_labels) 
        res_str = '(' + format_metric(evaluations) + ')'
        print(f"                      {res_str}\n")

        evaluations = calculate_mae(size_outputs, size_labels)
        res_str = '(' + format_metric(evaluations) + ')'
        print(f"                      {res_str}\n")
        
    return torch.mean(torch.tensor(loss_list))

In [29]:
# 驗證模型
def evaluate_model():
    my_model.eval()
    loss_list = []
    for batch_idx, (userID, basket_input, basket_label, size_input, size_label) in enumerate(tqdm(valid_dataloader)):
        basket_output, size_output = my_model(basket_input, size_input)
        # 計算損失
        loss = ALPHA * mean_square_error(size_output, size_label) + (1 - ALPHA) * cross_entropy_loss(basket_output, basket_label)
        loss_list.append(loss.item())
        with torch.no_grad():
            basket_output = torch.from_numpy(np.array(basket_output.cpu(), dtype=np.float32))
            size_output = np.round(np.squeeze(np.array([_.cpu() for _ in size_output], dtype=np.float32))).astype(int).tolist()
            if batch_idx == 0:
                basket_outputs = basket_output
                basket_labels = basket_label
                size_outputs = size_output
                size_labels = size_label
            else:
                basket_outputs = torch.cat( (basket_outputs, basket_output ),-2 )
                basket_labels = basket_labels + basket_label
                size_outputs = size_outputs + size_output
                size_labels = size_labels + size_label
        
    with torch.no_grad():
        evaluations = calculate_f1_score_at_k(basket_outputs, basket_labels, size_outputs) 
        res_str = '(' + format_metric(evaluations) + ')'
        print(f"                      {res_str}\n")

        evaluations = calculate_ndcg_at_k(basket_outputs, basket_labels, size_outputs, size_labels) 
        res_str = '(' + format_metric(evaluations) + ')'
        print(f"                      {res_str}\n")

        evaluations = calculate_mae(size_outputs, size_labels)
        res_str = '(' + format_metric(evaluations) + ')'
        print(f"                      {res_str}\n")
        
    return torch.mean(torch.tensor(loss_list))

In [30]:
# 測試模型
def test_model():
    my_model.eval()
    loss_list = []
    for batch_idx, (userID, basket_input, basket_label, size_input, size_label) in enumerate(tqdm(test_dataloader)):
        basket_output, size_output = my_model(basket_input, size_input)
        # 計算損失
        loss = ALPHA * mean_square_error(size_output, size_label) + (1 - ALPHA) * cross_entropy_loss(basket_output, basket_label)
        loss_list.append(loss.item())
        with torch.no_grad():
            basket_output = torch.from_numpy(np.array(basket_output.cpu(), dtype=np.float32))
            size_output = np.round(np.squeeze(np.array([_.cpu() for _ in size_output], dtype=np.float32))).astype(int).tolist()
            if batch_idx == 0:
                basket_outputs = basket_output
                basket_labels = basket_label
                size_outputs = size_output
                size_labels = size_label
            else:
                basket_outputs = torch.cat( (basket_outputs, basket_output ),-2 )
                basket_labels = basket_labels + basket_label
                size_outputs = size_outputs + size_output
                size_labels = size_labels + size_label
    
    with torch.no_grad():
        f1_evaluations = calculate_f1_score_at_k(basket_outputs, basket_labels, size_outputs) 
        f1_list = [f1_evaluations]
        res_str = '(' + format_metric(f1_evaluations) + ')'
        print(f"                      {res_str}\n")
        
        ndcg_evaluations = calculate_ndcg_at_k(basket_outputs, basket_labels, size_outputs, size_labels) 
        ndcg_list = [ndcg_evaluations]
        res_str = '(' + format_metric(ndcg_evaluations) + ')'
        print(f"                      {res_str}\n")
        
        mae_evaluations = calculate_mae(size_outputs, size_labels)
        mae_list = [mae_evaluations]
        res_str = '(' + format_metric(mae_evaluations) + ')'
        print(f"                      {res_str}\n")
        
    return torch.mean(torch.tensor(loss_list)), f1_list, ndcg_list, mae_list

# 完整模型

In [31]:
# 項目總數
items_count = confidences_array.shape[0]
print("items_count=", items_count)
# 項目出現次數
items_freq = Counter(dataset["NEW_ITEM_ID"])
# 計算每個項目出現的比例: items_frq/items_count
item_weight = torch.tensor(np.array(list(items_freq.values()))/items_count).to(device)
# 按照new_item_id順序排列
print(item_weight)

items_count= 15764
tensor([1.7128e-03, 1.1418e-03, 2.4169e-02,  ..., 6.3436e-05, 6.3436e-05,
        6.3436e-05], device='cuda:0', dtype=torch.float64)


# 加上信賴度矩陣

In [32]:
# 信賴度矩陣
confidences_array = torch.tensor(confidences_array, dtype=torch.float64).to(device)
confidences_array

tensor([[0.0000, 0.0741, 0.0741,  ..., 0.0000, 0.0000, 0.0000],
        [0.0000, 0.0000, 0.1111,  ..., 0.0000, 0.0000, 0.0000],
        [0.0000, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 0.0000],
        ...,
        [0.0000, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 0.0000],
        [0.0000, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 0.0000],
        [0.0000, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 0.0000]],
       device='cuda:0', dtype=torch.float64)

In [33]:
class MyModel01(nn.Module):
    def __init__(self, embed_dim, model_dim, hidden_dim, items_count):
        super(MyModel01, self).__init__()
        self.model_dim = model_dim
        self.embedding = nn.Embedding.from_pretrained(weights, freeze=False)
        self.embedding.requires_grad=True
        self.basket_embed = SelfAttention(embed_dim=embed_dim, model_dim=model_dim)
        self.size_embed = LSTM(1,1)
        self.model_encoder = TransformerEncoder(d_model=model_dim, num_heads=NUM_HEAD, num_layers=NUM_LAYER)
        # 嵌入維度、隱藏層維度、總項目數量
        self.basket_mlp = MLPforItem(model_dim, hidden_dim, items_count)
        self.size_mlp = MLPforSize(model_dim)
        self.relu = nn.ReLU()
        
    def forward(self, basket_input, size_input):
        basket_list, size_list, attention_mask, k_list = [], [], [], []
        output_list = []
        
        # 為每個用戶的購物籃加上 padding跟 mask
        for user in basket_input:
            # 將購物籃項目 ID 轉換為嵌入向量
            batch_features = [ self.embedding(torch.tensor(cart).to(device)) for cart in user ]
            # 進行 padding
            batch_features = rnn_utils.pad_sequence(batch_features, batch_first=True, padding_value=0)
            # 購物籃中項目的遮罩
            mask = ~batch_features.sum(dim=-1).ne(0)
            basket_list.append(batch_features)
            attention_mask.append(mask)
        
        # 預測size_tensor
        sizes_input = [torch.tensor(size).to(device) for size in size_input]
        tmp_tensor = torch.zeros(max_cart_count)
        size_list.append(tmp_tensor)
        for size in sizes_input:
            size_list.append(size)
        size_list = rnn_utils.pad_sequence(size_list, batch_first=True, padding_value=0)[1:]
            
        # 進入自注意力，輸出形狀(BATCH_SIZE, basket_size, embed_dim)
        basket_embedding_list = []
        for i, user_inputs in enumerate(basket_list):
            basket_embedding_list.append(self.basket_embed(user_inputs, attention_mask[i]))
        
        # size通過lstm進行編碼
        for i, user_inputs in enumerate(size_list):
            k_list.append(self.size_embed(torch.tensor([[float(_)] for _ in size_list[i]]).to(device)))
        
        # 進行購物籃的 padding
        input_seq = rnn_utils.pad_sequence(basket_embedding_list, batch_first=True, padding_value=0)
        
        # 進入Transformer
        basket_embed = self.model_encoder(input_seq.to(device))
        
        B_s_list = []
        
        for i, b in enumerate(basket_embed):
            basket_size = len(attention_mask[i])
            B_s = b[basket_size-1]  # 取得最後一個購物籃向量
            B_s_list.append(B_s)
        
        # 進入basket MLP層
        p = self.basket_mlp(torch.stack(B_s_list, dim=0))
        pc = (self.relu(p.to(torch.float64))+1e-8) @ confidences_array
        pw = torch.mul( p, item_weight )
        p_ = torch.mul(BETA, torch.add(pc,pw)) + torch.mul(1-BETA, p.to(torch.float64))
        y = p_
        
        # 進入size MLP層
        k = self.size_mlp(torch.stack(k_list, dim=0))
        
        return y, k

In [34]:
my_model = MyModel01(embed_dim=EMBEDDING_DIMENSION, model_dim=MODEL_DIMENSION, hidden_dim=HIDDEN_DIMENSION, items_count=items_count).to(device)
optimizer = torch.optim.Adam(my_model.parameters(), lr=LEARNING_RATE)
my_model.train()

MyModel01(
  (embedding): Embedding(15764, 32)
  (basket_embed): SelfAttention(
    (query_matrix): Linear(in_features=32, out_features=32, bias=True)
    (key_matrix): Linear(in_features=32, out_features=32, bias=True)
    (value_matrix): Linear(in_features=32, out_features=32, bias=True)
    (multihead_attn): MultiheadAttention(
      (out_proj): NonDynamicallyQuantizableLinear(in_features=32, out_features=32, bias=True)
    )
  )
  (size_embed): LSTM(
    (lstm): LSTM(1, 16, num_layers=2)
    (hiddenlayer1): Linear(in_features=1152, out_features=512, bias=True)
    (hiddenlayer2): Linear(in_features=512, out_features=512, bias=True)
    (hiddenlayer3): Linear(in_features=512, out_features=256, bias=True)
    (hiddenlayer4): Linear(in_features=256, out_features=128, bias=True)
    (embed): Linear(in_features=128, out_features=32, bias=True)
    (leakyrelu): LeakyReLU(negative_slope=0.01)
  )
  (model_encoder): TransformerEncoder(
    (pe): PositionalEncoding(
      (dropout): Dropout

In [None]:
results = []

for epoch in range(1, EPOCHS + 1):
    train_loss = train_model()
    print("train_loss=", train_loss)
    print("-"*20)
    val_loss = evaluate_model()
    print("val_loss=", val_loss)
    print("-"*20)
    test_loss, f1_list, ndcg_list, mae_list = test_model()
    print("-"*20)
    result = [epoch] + f1_list + ndcg_list + mae_list + [test_loss.item()]
    results.append(result)
    print(results)
    print("-"*89)
    
    collected = gc.collect()
    torch.cuda.empty_cache()
    
record_df = pd.DataFrame(results, columns=["Epoch", "F1-score", "NDCG", "MAE", "Loss"])

record_df

  0%|          | 1/1065 [00:00<11:17,  1.57it/s]

Epoch 1: 0%, loss: 151.072876


  9%|▉         | 101/1065 [00:26<04:06,  3.91it/s]

Epoch 1: 9%, loss: 16.322041


 19%|█▉        | 201/1065 [00:54<04:35,  3.13it/s]

Epoch 1: 19%, loss: 28.844355


 28%|██▊       | 301/1065 [01:23<03:41,  3.45it/s]

Epoch 1: 28%, loss: 44.661697


 38%|███▊      | 401/1065 [01:53<03:30,  3.16it/s]

Epoch 1: 38%, loss: 10.057695


 47%|████▋     | 501/1065 [02:23<03:06,  3.02it/s]

Epoch 1: 47%, loss: 19.810877


 56%|█████▋    | 601/1065 [02:55<02:33,  3.02it/s]

Epoch 1: 56%, loss: 42.338947


 66%|██████▌   | 701/1065 [03:29<02:02,  2.97it/s]

Epoch 1: 66%, loss: 27.594660


 75%|███████▌  | 801/1065 [04:02<01:29,  2.95it/s]

Epoch 1: 75%, loss: 47.710537


 85%|████████▍ | 901/1065 [04:37<00:58,  2.80it/s]

Epoch 1: 85%, loss: 15.116541


 94%|█████████▍| 1001/1065 [05:12<00:23,  2.75it/s]

Epoch 1: 94%, loss: 29.204096


100%|██████████| 1065/1065 [05:36<00:00,  3.17it/s]

Epoch 1: 100%, loss: 5.437896





                      (F1-score: 0.0063)

                      (NDCG: 0.0060)

                      (MAE: 4.4901)

train_loss= tensor(35.1452)
--------------------


100%|██████████| 133/133 [00:22<00:00,  5.89it/s]


                      (F1-score: 0.0001)

                      (NDCG: 0.0001)

                      (MAE: 4.3205)

val_loss= tensor(30.8904)
--------------------


100%|██████████| 133/133 [00:21<00:00,  6.10it/s]


                      (F1-score: 0.0002)

                      (NDCG: 0.0001)

                      (MAE: 4.2923)

--------------------
[[1, {'F1-score': 0.0001709677197496979}, {'NDCG': 7.514301529924448e-05}, {'MAE': 4.292293233082707}, 29.84501838684082]]
-----------------------------------------------------------------------------------------


  0%|          | 1/1065 [00:00<04:13,  4.20it/s]

Epoch 2: 0%, loss: 25.580425


  9%|▉         | 101/1065 [00:27<04:15,  3.77it/s]

Epoch 2: 9%, loss: 31.514866


 19%|█▉        | 201/1065 [00:54<03:54,  3.68it/s]

Epoch 2: 19%, loss: 36.681316


 28%|██▊       | 301/1065 [01:23<03:25,  3.73it/s]

Epoch 2: 28%, loss: 4.043067


 38%|███▊      | 401/1065 [01:51<03:12,  3.46it/s]

Epoch 2: 38%, loss: 33.491985


 47%|████▋     | 501/1065 [02:22<02:46,  3.39it/s]

Epoch 2: 47%, loss: 11.884916


 56%|█████▋    | 601/1065 [02:53<02:29,  3.11it/s]

Epoch 2: 56%, loss: 3.917925


 66%|██████▌   | 701/1065 [03:26<02:00,  3.02it/s]

Epoch 2: 66%, loss: 41.285542


 75%|███████▌  | 801/1065 [04:01<01:30,  2.91it/s]

Epoch 2: 75%, loss: 15.355593


 85%|████████▍ | 901/1065 [04:36<00:56,  2.91it/s]

Epoch 2: 85%, loss: 10.663137


 94%|█████████▍| 1001/1065 [05:12<00:24,  2.58it/s]

Epoch 2: 94%, loss: 25.281384


100%|██████████| 1065/1065 [05:35<00:00,  3.17it/s]

Epoch 2: 100%, loss: 9.244298





                      (F1-score: 0.0001)

                      (NDCG: 0.0000)

                      (MAE: 4.0998)

train_loss= tensor(29.5839)
--------------------


100%|██████████| 133/133 [00:22<00:00,  5.92it/s]


                      (F1-score: 0.0000)

                      (NDCG: 0.0000)

                      (MAE: 4.0611)

val_loss= tensor(31.2898)
--------------------


100%|██████████| 133/133 [00:21<00:00,  6.12it/s]


                      (F1-score: 0.0001)

                      (NDCG: 0.0000)

                      (MAE: 3.9098)

--------------------
[[1, {'F1-score': 0.0001709677197496979}, {'NDCG': 7.514301529924448e-05}, {'MAE': 4.292293233082707}, 29.84501838684082], [2, {'F1-score': 6.885345231209893e-05}, {'NDCG': 3.09213222165529e-05}, {'MAE': 3.9097744360902253}, 29.40765953063965]]
-----------------------------------------------------------------------------------------


  0%|          | 1/1065 [00:00<05:01,  3.53it/s]

Epoch 3: 0%, loss: 53.626190


  9%|▉         | 101/1065 [00:26<04:21,  3.68it/s]

Epoch 3: 9%, loss: 39.499893


 19%|█▉        | 201/1065 [00:54<03:50,  3.74it/s]

Epoch 3: 19%, loss: 8.605682


 28%|██▊       | 301/1065 [01:22<03:29,  3.65it/s]

Epoch 3: 28%, loss: 217.611984


 38%|███▊      | 401/1065 [01:51<03:07,  3.54it/s]

Epoch 3: 38%, loss: 21.665529


 47%|████▋     | 501/1065 [02:22<03:07,  3.01it/s]

Epoch 3: 47%, loss: 6.352293


 56%|█████▋    | 601/1065 [02:53<02:34,  3.01it/s]

Epoch 3: 56%, loss: 9.914840


 66%|██████▌   | 701/1065 [03:26<02:08,  2.84it/s]

Epoch 3: 66%, loss: 11.068206


 75%|███████▌  | 801/1065 [04:00<01:25,  3.09it/s]

Epoch 3: 75%, loss: 24.209066


 85%|████████▍ | 901/1065 [04:36<00:57,  2.86it/s]

Epoch 3: 85%, loss: 11.924843


 94%|█████████▍| 1001/1065 [05:11<00:22,  2.86it/s]

Epoch 3: 94%, loss: 63.701687


100%|██████████| 1065/1065 [05:35<00:00,  3.18it/s]

Epoch 3: 100%, loss: 39.467876





                      (F1-score: 0.0001)

                      (NDCG: 0.0000)

                      (MAE: 4.0772)

train_loss= tensor(29.1440)
--------------------


100%|██████████| 133/133 [00:22<00:00,  5.92it/s]


                      (F1-score: 0.0002)

                      (NDCG: 0.0001)

                      (MAE: 4.1936)

val_loss= tensor(36.2078)
--------------------


100%|██████████| 133/133 [00:21<00:00,  6.11it/s]


                      (F1-score: 0.0005)

                      (NDCG: 0.0002)

                      (MAE: 3.9408)

--------------------
[[1, {'F1-score': 0.0001709677197496979}, {'NDCG': 7.514301529924448e-05}, {'MAE': 4.292293233082707}, 29.84501838684082], [2, {'F1-score': 6.885345231209893e-05}, {'NDCG': 3.09213222165529e-05}, {'MAE': 3.9097744360902253}, 29.40765953063965], [3, {'F1-score': 0.0005252089540396515}, {'NDCG': 0.00021626549786923075}, {'MAE': 3.9407894736842106}, 33.605140686035156]]
-----------------------------------------------------------------------------------------


  0%|          | 1/1065 [00:00<05:17,  3.35it/s]

Epoch 4: 0%, loss: 49.989120


  9%|▉         | 101/1065 [00:28<04:20,  3.71it/s]

Epoch 4: 9%, loss: 7.396740


 19%|█▉        | 201/1065 [00:55<03:53,  3.71it/s]

Epoch 4: 19%, loss: 157.367935


 28%|██▊       | 301/1065 [01:24<03:48,  3.35it/s]

Epoch 4: 28%, loss: 33.360958


 38%|███▊      | 401/1065 [01:52<03:12,  3.45it/s]

Epoch 4: 38%, loss: 10.268783


 47%|████▋     | 501/1065 [02:23<02:51,  3.29it/s]

Epoch 4: 47%, loss: 9.822732


 56%|█████▋    | 601/1065 [02:54<02:44,  2.82it/s]

Epoch 4: 56%, loss: 11.763803


 66%|██████▌   | 701/1065 [03:28<02:08,  2.84it/s]

Epoch 4: 66%, loss: 4.099924


 75%|███████▌  | 801/1065 [04:02<01:40,  2.62it/s]

Epoch 4: 75%, loss: 27.823591


 85%|████████▍ | 901/1065 [04:37<00:58,  2.82it/s]

Epoch 4: 85%, loss: 12.949684


 94%|█████████▍| 1001/1065 [05:13<00:23,  2.77it/s]

Epoch 4: 94%, loss: 94.539192


100%|██████████| 1065/1065 [05:36<00:00,  3.16it/s]

Epoch 4: 100%, loss: 27.669836





                      (F1-score: 0.0099)

                      (NDCG: 0.0077)

                      (MAE: 4.0593)

train_loss= tensor(28.8449)
--------------------


100%|██████████| 133/133 [00:22<00:00,  5.94it/s]


                      (F1-score: 0.0141)

                      (NDCG: 0.0161)

                      (MAE: 4.2397)

val_loss= tensor(29.7709)
--------------------


100%|██████████| 133/133 [00:21<00:00,  6.12it/s]


                      (F1-score: 0.0160)

                      (NDCG: 0.0178)

                      (MAE: 4.2133)

--------------------
[[1, {'F1-score': 0.0001709677197496979}, {'NDCG': 7.514301529924448e-05}, {'MAE': 4.292293233082707}, 29.84501838684082], [2, {'F1-score': 6.885345231209893e-05}, {'NDCG': 3.09213222165529e-05}, {'MAE': 3.9097744360902253}, 29.40765953063965], [3, {'F1-score': 0.0005252089540396515}, {'NDCG': 0.00021626549786923075}, {'MAE': 3.9407894736842106}, 33.605140686035156], [4, {'F1-score': 0.015962576029807638}, {'NDCG': 0.017818498838328777}, {'MAE': 4.213345864661654}, 28.84568977355957]]
-----------------------------------------------------------------------------------------


  0%|          | 1/1065 [00:00<04:27,  3.98it/s]

Epoch 5: 0%, loss: 76.776505


  9%|▉         | 101/1065 [00:27<04:20,  3.69it/s]

Epoch 5: 9%, loss: 37.431190


 19%|█▉        | 201/1065 [00:55<03:57,  3.64it/s]

Epoch 5: 19%, loss: 19.042934


 28%|██▊       | 301/1065 [01:23<03:30,  3.62it/s]

Epoch 5: 28%, loss: 36.144855


 38%|███▊      | 401/1065 [01:52<03:26,  3.21it/s]

Epoch 5: 38%, loss: 42.307449


 47%|████▋     | 501/1065 [02:23<03:03,  3.07it/s]

Epoch 5: 47%, loss: 26.860182


 56%|█████▋    | 601/1065 [02:55<02:19,  3.32it/s]

Epoch 5: 56%, loss: 19.365690


 66%|██████▌   | 701/1065 [03:27<01:58,  3.08it/s]

Epoch 5: 66%, loss: 10.431344


 75%|███████▌  | 801/1065 [04:01<01:26,  3.05it/s]

Epoch 5: 75%, loss: 9.059081


 85%|████████▍ | 901/1065 [04:36<00:54,  3.01it/s]

Epoch 5: 85%, loss: 4.468736


 94%|█████████▍| 1001/1065 [05:12<00:22,  2.87it/s]

Epoch 5: 94%, loss: 4.706221


100%|██████████| 1065/1065 [05:35<00:00,  3.17it/s]

Epoch 5: 100%, loss: 39.122250





                      (F1-score: 0.0186)

                      (NDCG: 0.0226)

                      (MAE: 4.0507)

train_loss= tensor(28.6441)
--------------------


100%|██████████| 133/133 [00:22<00:00,  5.92it/s]


                      (F1-score: 0.0204)

                      (NDCG: 0.0256)

                      (MAE: 3.9934)

val_loss= tensor(31.8237)
--------------------


100%|██████████| 133/133 [00:21<00:00,  6.11it/s]


                      (F1-score: 0.0237)

                      (NDCG: 0.0303)

                      (MAE: 3.8205)

--------------------
[[1, {'F1-score': 0.0001709677197496979}, {'NDCG': 7.514301529924448e-05}, {'MAE': 4.292293233082707}, 29.84501838684082], [2, {'F1-score': 6.885345231209893e-05}, {'NDCG': 3.09213222165529e-05}, {'MAE': 3.9097744360902253}, 29.40765953063965], [3, {'F1-score': 0.0005252089540396515}, {'NDCG': 0.00021626549786923075}, {'MAE': 3.9407894736842106}, 33.605140686035156], [4, {'F1-score': 0.015962576029807638}, {'NDCG': 0.017818498838328777}, {'MAE': 4.213345864661654}, 28.84568977355957], [5, {'F1-score': 0.023711579980802776}, {'NDCG': 0.0302586423555263}, {'MAE': 3.8204887218045114}, 30.032276153564453]]
-----------------------------------------------------------------------------------------


  0%|          | 1/1065 [00:00<04:19,  4.10it/s]

Epoch 6: 0%, loss: 33.700836


  9%|▉         | 101/1065 [00:26<04:19,  3.71it/s]

Epoch 6: 9%, loss: 33.032097


 19%|█▉        | 201/1065 [00:54<03:49,  3.76it/s]

Epoch 6: 19%, loss: 16.543489


 28%|██▊       | 301/1065 [01:22<03:20,  3.81it/s]

Epoch 6: 28%, loss: 30.251989


 38%|███▊      | 401/1065 [01:51<03:15,  3.40it/s]

Epoch 6: 38%, loss: 51.616978


 47%|████▋     | 501/1065 [02:22<02:47,  3.36it/s]

Epoch 6: 47%, loss: 39.447792


 56%|█████▋    | 601/1065 [02:54<02:31,  3.06it/s]

Epoch 6: 56%, loss: 31.451775


 66%|██████▌   | 701/1065 [03:26<01:54,  3.18it/s]

Epoch 6: 66%, loss: 21.758905


 75%|███████▌  | 801/1065 [04:00<01:30,  2.92it/s]

Epoch 6: 75%, loss: 23.353371


 85%|████████▍ | 901/1065 [04:35<00:56,  2.93it/s]

Epoch 6: 85%, loss: 4.674962


 94%|█████████▍| 1001/1065 [05:12<00:25,  2.52it/s]

Epoch 6: 94%, loss: 164.875702


100%|██████████| 1065/1065 [05:36<00:00,  3.17it/s]

Epoch 6: 100%, loss: 20.012758





                      (F1-score: 0.0265)

                      (NDCG: 0.0307)

                      (MAE: 4.0369)

train_loss= tensor(28.5653)
--------------------


100%|██████████| 133/133 [00:22<00:00,  5.89it/s]


                      (F1-score: 0.0297)

                      (NDCG: 0.0324)

                      (MAE: 3.9586)

val_loss= tensor(30.2516)
--------------------


100%|██████████| 133/133 [00:21<00:00,  6.10it/s]


                      (F1-score: 0.0379)

                      (NDCG: 0.0400)

                      (MAE: 3.8393)

--------------------
[[1, {'F1-score': 0.0001709677197496979}, {'NDCG': 7.514301529924448e-05}, {'MAE': 4.292293233082707}, 29.84501838684082], [2, {'F1-score': 6.885345231209893e-05}, {'NDCG': 3.09213222165529e-05}, {'MAE': 3.9097744360902253}, 29.40765953063965], [3, {'F1-score': 0.0005252089540396515}, {'NDCG': 0.00021626549786923075}, {'MAE': 3.9407894736842106}, 33.605140686035156], [4, {'F1-score': 0.015962576029807638}, {'NDCG': 0.017818498838328777}, {'MAE': 4.213345864661654}, 28.84568977355957], [5, {'F1-score': 0.023711579980802776}, {'NDCG': 0.0302586423555263}, {'MAE': 3.8204887218045114}, 30.032276153564453], [6, {'F1-score': 0.03794659612273717}, {'NDCG': 0.040000335461306}, {'MAE': 3.8392857142857144}, 28.908143997192383]]
-----------------------------------------------------------------------------------------


  0%|          | 1/1065 [00:00<04:43,  3.76it/s]

Epoch 7: 0%, loss: 3.442751


  9%|▉         | 101/1065 [00:26<04:33,  3.52it/s]

Epoch 7: 9%, loss: 12.663728


 19%|█▉        | 201/1065 [00:54<04:02,  3.56it/s]

Epoch 7: 19%, loss: 15.431534


 28%|██▊       | 301/1065 [01:23<03:29,  3.64it/s]

Epoch 7: 28%, loss: 24.960506


 38%|███▊      | 401/1065 [01:53<03:16,  3.38it/s]

Epoch 7: 38%, loss: 70.201157


 47%|████▋     | 501/1065 [02:24<02:53,  3.26it/s]

Epoch 7: 47%, loss: 46.009628


 56%|█████▋    | 601/1065 [02:56<02:24,  3.21it/s]

Epoch 7: 56%, loss: 23.644382


 66%|██████▌   | 701/1065 [03:28<01:56,  3.13it/s]

Epoch 7: 66%, loss: 28.847084


 75%|███████▌  | 801/1065 [04:02<01:30,  2.91it/s]

Epoch 7: 75%, loss: 23.054811


 85%|████████▍ | 901/1065 [04:37<01:00,  2.71it/s]

Epoch 7: 85%, loss: 9.549104


 94%|█████████▍| 1001/1065 [05:12<00:23,  2.71it/s]

Epoch 7: 94%, loss: 11.043426


100%|██████████| 1065/1065 [05:36<00:00,  3.16it/s]

Epoch 7: 100%, loss: 23.613634





                      (F1-score: 0.0402)

                      (NDCG: 0.0402)

                      (MAE: 4.0273)

train_loss= tensor(28.2821)
--------------------


100%|██████████| 133/133 [00:22<00:00,  5.91it/s]


                      (F1-score: 0.0395)

                      (NDCG: 0.0387)

                      (MAE: 3.9821)

val_loss= tensor(31.4813)
--------------------


100%|██████████| 133/133 [00:21<00:00,  6.11it/s]


                      (F1-score: 0.0477)

                      (NDCG: 0.0463)

                      (MAE: 3.8111)

--------------------
[[1, {'F1-score': 0.0001709677197496979}, {'NDCG': 7.514301529924448e-05}, {'MAE': 4.292293233082707}, 29.84501838684082], [2, {'F1-score': 6.885345231209893e-05}, {'NDCG': 3.09213222165529e-05}, {'MAE': 3.9097744360902253}, 29.40765953063965], [3, {'F1-score': 0.0005252089540396515}, {'NDCG': 0.00021626549786923075}, {'MAE': 3.9407894736842106}, 33.605140686035156], [4, {'F1-score': 0.015962576029807638}, {'NDCG': 0.017818498838328777}, {'MAE': 4.213345864661654}, 28.84568977355957], [5, {'F1-score': 0.023711579980802776}, {'NDCG': 0.0302586423555263}, {'MAE': 3.8204887218045114}, 30.032276153564453], [6, {'F1-score': 0.03794659612273717}, {'NDCG': 0.040000335461306}, {'MAE': 3.8392857142857144}, 28.908143997192383], [7, {'F1-score': 0.04768036243929811}, {'NDCG': 0.046260364893718826}, {'MAE': 3.81109022556391}, 29.836551666259766]]
---------------

  0%|          | 1/1065 [00:00<04:26,  3.99it/s]

Epoch 8: 0%, loss: 48.338081


  9%|▉         | 101/1065 [00:27<04:07,  3.90it/s]

Epoch 8: 9%, loss: 17.515272


 19%|█▉        | 201/1065 [00:54<04:01,  3.58it/s]

Epoch 8: 19%, loss: 22.662556


 28%|██▊       | 301/1065 [01:23<03:31,  3.61it/s]

Epoch 8: 28%, loss: 7.546237


 38%|███▊      | 401/1065 [01:52<03:06,  3.55it/s]

Epoch 8: 38%, loss: 16.328125


 47%|████▋     | 501/1065 [02:23<02:56,  3.19it/s]

Epoch 8: 47%, loss: 13.658923


 56%|█████▋    | 601/1065 [02:55<02:29,  3.11it/s]

Epoch 8: 56%, loss: 17.617048


 66%|██████▌   | 701/1065 [03:27<01:59,  3.04it/s]

Epoch 8: 66%, loss: 6.366136


 75%|███████▌  | 801/1065 [04:01<01:29,  2.94it/s]

Epoch 8: 75%, loss: 7.864896


 85%|████████▍ | 901/1065 [04:36<00:58,  2.80it/s]

Epoch 8: 85%, loss: 14.207162


 94%|█████████▍| 1001/1065 [05:13<00:24,  2.65it/s]

Epoch 8: 94%, loss: 23.487064


100%|██████████| 1065/1065 [05:36<00:00,  3.16it/s]

Epoch 8: 100%, loss: 17.659735





                      (F1-score: 0.0453)

                      (NDCG: 0.0432)

                      (MAE: 4.0208)

train_loss= tensor(28.3216)
--------------------


100%|██████████| 133/133 [00:22<00:00,  5.92it/s]


                      (F1-score: 0.0404)

                      (NDCG: 0.0391)

                      (MAE: 4.0545)

val_loss= tensor(29.2132)
--------------------


100%|██████████| 133/133 [00:21<00:00,  6.09it/s]


                      (F1-score: 0.0488)

                      (NDCG: 0.0466)

                      (MAE: 3.9577)

--------------------
[[1, {'F1-score': 0.0001709677197496979}, {'NDCG': 7.514301529924448e-05}, {'MAE': 4.292293233082707}, 29.84501838684082], [2, {'F1-score': 6.885345231209893e-05}, {'NDCG': 3.09213222165529e-05}, {'MAE': 3.9097744360902253}, 29.40765953063965], [3, {'F1-score': 0.0005252089540396515}, {'NDCG': 0.00021626549786923075}, {'MAE': 3.9407894736842106}, 33.605140686035156], [4, {'F1-score': 0.015962576029807638}, {'NDCG': 0.017818498838328777}, {'MAE': 4.213345864661654}, 28.84568977355957], [5, {'F1-score': 0.023711579980802776}, {'NDCG': 0.0302586423555263}, {'MAE': 3.8204887218045114}, 30.032276153564453], [6, {'F1-score': 0.03794659612273717}, {'NDCG': 0.040000335461306}, {'MAE': 3.8392857142857144}, 28.908143997192383], [7, {'F1-score': 0.04768036243929811}, {'NDCG': 0.046260364893718826}, {'MAE': 3.81109022556391}, 29.836551666259766], [8, {'F1-score'

  0%|          | 1/1065 [00:00<04:15,  4.17it/s]

Epoch 9: 0%, loss: 12.730721


  9%|▉         | 101/1065 [00:27<04:15,  3.77it/s]

Epoch 9: 9%, loss: 10.022324


 19%|█▉        | 201/1065 [00:55<03:59,  3.61it/s]

Epoch 9: 19%, loss: 14.281415


 28%|██▊       | 301/1065 [01:23<03:33,  3.57it/s]

Epoch 9: 28%, loss: 7.393651


 38%|███▊      | 401/1065 [01:53<03:15,  3.40it/s]

Epoch 9: 38%, loss: 36.068092


 38%|███▊      | 406/1065 [01:55<03:34,  3.07it/s]