In [1]:
import torch
import os
from tqdm import tqdm
import logging
import random
from collections import Counter
from nltk.tokenize import word_tokenize
import numpy as np

In [2]:
from torch import nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import IterableDataset, Dataset
from torch.utils.data import DataLoader
from sklearn.metrics import roc_auc_score

In [3]:
def get_sample(all_elements, num_sample):
    if num_sample > len(all_elements):
        return random.sample(all_elements * (num_sample // len(all_elements) + 1), num_sample)
    else:
        return random.sample(all_elements, num_sample)

In [4]:
train_data_dir = './data/MINDsmall_train'
num_words_title = 20
use_category = True
use_subcategory = True
processed_file_path = os.path.join(train_data_dir, f'behaviors_np{4}.tsv')
word_embedding_dim = 300
glove_embedding_path = './data/glove.840B.300d.txt'

In [5]:
def prepare_training_data(seed = 1009, npratio = 4):
    random.seed(seed)
    behaviors = []
    
    behavior_file_path = os.path.join(train_data_dir, 'behaviors.tsv')
    with open(behavior_file_path, 'r', encoding='utf-8') as f:
        for line in tqdm(f):
            iid, uid, time, history, imp = line.strip().split('\t')
            impressions = [x.split('-') for x in imp.split(' ')]
            pos, neg = [], []
            for news_ID, label in impressions:
                if label == '0':
                    neg.append(news_ID)
                elif label == '1':
                    pos.append(news_ID)
            if len(pos) == 0 or len(neg) == 0:
                continue
            for pos_id in pos:
                neg_candidate = get_sample(neg, npratio)
                neg_str = ' '.join(neg_candidate)
                new_line = '\t'.join([iid, uid, time, history, pos_id, neg_str]) + '\n'
                behaviors.append(new_line)

    random.shuffle(behaviors)
    processed_file_path = os.path.join(train_data_dir, f'behaviors_np{npratio}.tsv')
    with open(processed_file_path, 'w') as f:
        f.writelines(behaviors)
    return len(behaviors)

In [6]:
prepare_training_data()

156965it [00:02, 52619.34it/s]


236344

In [18]:
def update_dict(dict, key, value=None):
    if key not in dict:
        if value is None:
            dict[key] = len(dict) + 1
        else:
            dict[key] = value
            
def read_news(news_path, mode='train'):
    news = {}
    category_dict = {}
    subcategory_dict = {}
    news_index = {}
    word_cnt = Counter()
    
    filter_num = 0
    with open(news_path, 'r', encoding='utf-8') as f:
        for line in tqdm(f):
            splited = line.strip('\n').split('\t')
            doc_id, category, subcategory, title, abstract, url, _, _ = splited
            update_dict(news_index, doc_id)

            title = title.lower()
            title = word_tokenize(title)
            update_dict(news, doc_id, [title, category, subcategory])
            if mode == 'train':
                if use_category:
                    update_dict(category_dict, category)
                if use_subcategory:
                    update_dict(subcategory_dict, subcategory)
                word_cnt.update(title)

    if mode == 'train':
        word = [k for k, v in word_cnt.items() if v > filter_num]
        word_dict = {k: v for k, v in zip(word, range(1, len(word) + 1))}
        return news, news_index, category_dict, subcategory_dict, word_dict
    elif mode == 'test':
        return news, news_index
    else:
        assert False, 'Wrong mode!'


In [19]:
def get_doc_input(news, news_index, category_dict, subcategory_dict, word_dict):
    news_num = len(news) + 1
    news_title = np.zeros((news_num, num_words_title), dtype='int32')
    news_category = np.zeros((news_num, 1), dtype='int32') 
    news_subcategory = np.zeros((news_num, 1), dtype='int32') 

    for key in tqdm(news):
        title, category, subcategory = news[key]
        doc_index = news_index[key]

        for word_id in range(min(num_words_title, len(title))):
            if title[word_id] in word_dict:
                news_title[doc_index, word_id] = word_dict[title[word_id]]
        
        news_category[doc_index, 0] = category_dict[category] if category in category_dict else 0
        news_subcategory[doc_index, 0] = subcategory_dict[subcategory] if subcategory in subcategory_dict else 0

    return news_title, news_category, news_subcategory

In [20]:
def load_matrix(embedding_file_path, word_dict, word_embedding_dim):
    embedding_matrix = np.zeros(shape=(len(word_dict) + 1, word_embedding_dim))
    have_word = []
    if embedding_file_path is not None:
        with open(embedding_file_path, 'rb') as f:
            while True:
                line = f.readline()
                if len(line) == 0:
                    break
                line = line.split()
                word = line[0].decode()
                if word in word_dict:
                    index = word_dict[word]
                    tp = [float(x) for x in line[1:]]
                    embedding_matrix[index] = np.array(tp)
                    have_word.append(word)
    return embedding_matrix, have_word

In [21]:
class AttentionPooling(nn.Module):
    def __init__(self, emb_size, hidden_size):
        super(AttentionPooling, self).__init__()
        self.att_fc1 = nn.Linear(emb_size, hidden_size)
        self.att_fc2 = nn.Linear(hidden_size, 1)

    def forward(self, x, attn_mask=None):
        """
        Args:
            x: batch_size, candidate_size, emb_dim
            attn_mask: batch_size, candidate_size
        Returns:
            (shape) batch_size, emb_dim
        """
        e = self.att_fc1(x)
        e = nn.Tanh()(e)
        alpha = self.att_fc2(e)
        alpha = torch.exp(alpha)

        if attn_mask is not None:
            alpha = alpha * attn_mask.unsqueeze(2)

        alpha = alpha / (torch.sum(alpha, dim=1, keepdim=True) + 1e-8)
        x = torch.bmm(x.permute(0, 2, 1), alpha).squeeze(dim=-1)
        return x


class ScaledDotProductAttention(nn.Module):
    def __init__(self, d_k):
        super(ScaledDotProductAttention, self).__init__()
        self.d_k = d_k

    def forward(self, Q, K, V, attn_mask=None):
        '''
            Q: batch_size, n_head, candidate_num, d_k
            K: batch_size, n_head, candidate_num, d_k
            V: batch_size, n_head, candidate_num, d_v
            attn_mask: batch_size, n_head, candidate_num
            Return: batch_size, n_head, candidate_num, d_v
        '''
        scores = torch.matmul(Q, K.transpose(-1, -2)) / np.sqrt(self.d_k)
        scores = torch.exp(scores)

        if attn_mask is not None:
            scores = scores * attn_mask.unsqueeze(dim=-2)

        attn = scores / (torch.sum(scores, dim=-1, keepdim=True) + 1e-8)
        context = torch.matmul(attn, V)
        return context


class MultiHeadSelfAttention(nn.Module):
    def __init__(self, d_model, n_heads, d_k, d_v):
        super(MultiHeadSelfAttention, self).__init__()
        self.d_model = d_model
        self.n_heads = n_heads
        self.d_k = d_k
        self.d_v = d_v

        self.W_Q = nn.Linear(d_model, d_k * n_heads)
        self.W_K = nn.Linear(d_model, d_k * n_heads)
        self.W_V = nn.Linear(d_model, d_v * n_heads)

        self.scaled_dot_product_attn = ScaledDotProductAttention(self.d_k)
        self._initialize_weights()

    def _initialize_weights(self):
        for m in self.modules():
            if isinstance(m, nn.Linear):
                nn.init.xavier_uniform_(m.weight, gain=1)

    def forward(self, Q, K, V, mask=None):
        '''
            Q: batch_size, candidate_num, d_model
            K: batch_size, candidate_num, d_model
            V: batch_size, candidate_num, d_model
            mask: batch_size, candidate_num
        '''
        batch_size = Q.shape[0]
        if mask is not None:
            mask = mask.unsqueeze(dim=1).expand(-1, self.n_heads, -1)

        q_s = self.W_Q(Q).view(batch_size, -1, self.n_heads, self.d_k).transpose(1, 2)
        k_s = self.W_K(K).view(batch_size, -1, self.n_heads, self.d_k).transpose(1, 2)
        v_s = self.W_V(V).view(batch_size, -1, self.n_heads, self.d_v).transpose(1, 2)

        context = self.scaled_dot_product_attn(q_s, k_s, v_s, mask)
        output = context.transpose(1, 2).contiguous().view(batch_size, -1, self.n_heads * self.d_v)
        return output

In [22]:
class NewsEncoder(nn.Module):
    def __init__(self, embedding_matrix, num_category, num_subcategory):
        super(NewsEncoder, self).__init__()
        self.embedding_matrix = embedding_matrix
        self.drop_rate = 0.2
        self.num_words_title = 20
        self.use_category = True
        self.use_subcategory = True
        category_emb_dim = 100
        news_dim = 400
        news_query_vector_dim = 200
        word_embedding_dim = 300
        self.category_emb = nn.Embedding(num_category + 1, category_emb_dim, padding_idx=0)
        self.category_dense = nn.Linear(category_emb_dim, news_dim)
        self.subcategory_emb = nn.Embedding(num_subcategory + 1, category_emb_dim, padding_idx=0)
        self.subcategory_dense = nn.Linear(category_emb_dim, news_dim)
        self.final_attn = AttentionPooling(news_dim, news_query_vector_dim)
        self.cnn = nn.Conv1d(
            in_channels=word_embedding_dim,
            out_channels=news_dim,
            kernel_size=3,
            padding=1
        )
        self.attn = AttentionPooling(news_dim, news_query_vector_dim)

    def forward(self, x, mask=None):
        '''
            x: batch_size, word_num
            mask: batch_size, word_num
        '''
        title = torch.narrow(x, -1, 0, self.num_words_title).long()
        word_vecs = F.dropout(self.embedding_matrix(title),
                              p=self.drop_rate,
                              training=self.training)
        context_word_vecs = self.cnn(word_vecs.transpose(1, 2)).transpose(1, 2)
        
        title_vecs = self.attn(context_word_vecs, mask)
        all_vecs = [title_vecs]

        start = self.num_words_title
        if self.use_category:
            category = torch.narrow(x, -1, start, 1).squeeze(dim=-1).long()
            category_vecs = self.category_dense(self.category_emb(category))
            all_vecs.append(category_vecs)
            start += 1
        if self.use_subcategory:
            subcategory = torch.narrow(x, -1, start, 1).squeeze(dim=-1).long()
            subcategory_vecs = self.subcategory_dense(self.subcategory_emb(subcategory))
            all_vecs.append(subcategory_vecs)

        if len(all_vecs) == 1:
            news_vecs = all_vecs[0]
        else:
            all_vecs = torch.stack(all_vecs, dim=1)
            
            news_vecs = self.final_attn(all_vecs)
        return news_vecs

In [23]:

class UserEncoder(nn.Module):
    def __init__(self):
        super(UserEncoder, self).__init__()
        news_dim = 400
        user_query_vector_dim = 200
        self.user_log_length = 100
        self.user_log_mask = False
        self.attn = AttentionPooling(news_dim, user_query_vector_dim)
        self.pad_doc = nn.Parameter(torch.empty(1, news_dim).uniform_(-1, 1)).type(torch.FloatTensor)

    def forward(self, news_vecs, log_mask=None):
        '''
            news_vecs: batch_size, history_num, news_dim
            log_mask: batch_size, history_num
        '''
        bz = news_vecs.shape[0]
        if self.user_log_mask:
            user_vec = self.attn(news_vecs, log_mask)
        else:
            padding_doc = self.pad_doc.unsqueeze(dim=0).expand(bz, self.user_log_length, -1)
            news_vecs = news_vecs * log_mask.unsqueeze(dim=-1) + padding_doc * (1 - log_mask.unsqueeze(dim=-1))
            user_vec = self.attn(news_vecs)
        return user_vec

In [24]:
class NAML(torch.nn.Module):
    def __init__(self, embedding_matrix, num_category, num_subcategory, **kwargs):
        super(NAML, self).__init__()
        pretrained_word_embedding = torch.from_numpy(embedding_matrix).float()
        word_embedding = nn.Embedding.from_pretrained(pretrained_word_embedding,
                                                      freeze=False,
                                                      padding_idx=0)

        self.news_encoder = NewsEncoder( word_embedding, num_category, num_subcategory)
        self.user_encoder = UserEncoder()
        self.loss_fn = nn.CrossEntropyLoss()
        self.npratio = 4
        self.news_dim = 400
        self.user_log_length = 100

    def forward(self, history, history_mask, candidate, label):
        '''
            history: batch_size, history_length, num_word_title
            history_mask: batch_size, history_length
            candidate: batch_size, 1+K, num_word_title
            label: batch_size, 1+K
        '''
        num_words = history.shape[-1]
        candidate_news = candidate.reshape(-1, num_words)
        candidate_news_vecs = self.news_encoder(candidate_news).reshape(-1, 1 + self.npratio, self.news_dim)
        history_news = history.reshape(-1, num_words)
        history_news_vecs = self.news_encoder(history_news).reshape(-1, self.user_log_length, self.news_dim)
        user_vec = self.user_encoder(history_news_vecs, history_mask)
        score = torch.bmm(candidate_news_vecs, user_vec.unsqueeze(dim=-1)).squeeze(dim=-1)
        loss = self.loss_fn(score, label)
        return loss, score

In [25]:
class DatasetTrain(IterableDataset):
    def __init__(self, filename, news_index, news_combined):
        super(DatasetTrain).__init__()
        self.filename = filename
        self.news_index = news_index
        self.news_combined = news_combined
        self.user_log_length = 100
        self.npratio = 4

    def trans_to_nindex(self, nids):
        return [self.news_index[i] if i in self.news_index else 0 for i in nids]

    def pad_to_fix_len(self, x, fix_length, padding_front=True, padding_value=0):
        if padding_front:
            pad_x = [padding_value] * (fix_length - len(x)) + x[-fix_length:]
            mask = [0] * (fix_length - len(x)) + [1] * min(fix_length, len(x))
        else:
            pad_x = x[-fix_length:] + [padding_value] * (fix_length - len(x))
            mask = [1] * min(fix_length, len(x)) + [0] * (fix_length - len(x))
        return pad_x, np.array(mask, dtype='float32')

    def line_mapper(self, line):
        line = line.strip().split('\t')
        click_docs = line[3].split()
        sess_pos = line[4].split()
        sess_neg = line[5].split()

        click_docs, log_mask = self.pad_to_fix_len(self.trans_to_nindex(click_docs), self.user_log_length)
        user_feature = self.news_combined[click_docs]

        pos = self.trans_to_nindex(sess_pos)
        neg = self.trans_to_nindex(sess_neg)

        label = random.randint(0, self.npratio)
        sample_news = neg[:label] + pos + neg[label:]
        news_feature = self.news_combined[sample_news]
        return user_feature, log_mask, news_feature, label

    def __iter__(self):
        file_iter = open(self.filename)
        return map(self.line_mapper, file_iter)


class DatasetTest(DatasetTrain):
    def __init__(self, filename, news_index, news_scoring):
        super(DatasetTrain).__init__()
        self.filename = filename
        self.news_index = news_index
        self.news_scoring = news_scoring
        self.user_log_length = 100

    def line_mapper(self, line):
        line = line.strip().split('\t')
        click_docs = line[3].split()
        click_docs, log_mask = self.pad_to_fix_len(self.trans_to_nindex(click_docs), self.user_log_length)
        user_feature = self.news_scoring[click_docs]

        candidate_news = self.trans_to_nindex([i.split('-')[0] for i in line[4].split()])
        labels = np.array([int(i.split('-')[1]) for i in line[4].split()])
        news_feature = self.news_scoring[candidate_news]

        return user_feature, log_mask, news_feature, labels

    def __iter__(self):
        file_iter = open(self.filename)
        return map(self.line_mapper, file_iter)


class NewsDataset(Dataset):
    def __init__(self, data):
        self.data = data

    def __getitem__(self, idx):
        return self.data[idx]

    def __len__(self):
        return self.data.shape[0]

In [26]:

def acc(y_true, y_hat):
    y_hat = torch.argmax(y_hat, dim=-1)
    tot = y_true.shape[0]
    hit = torch.sum(y_true == y_hat)
    return hit.data.float() * 1.0 / tot

In [27]:
# train():
news, news_index, category_dict, subcategory_dict, word_dict = read_news(
    os.path.join(train_data_dir, 'news.tsv'), mode='train')

news_title, news_category, news_subcategory = get_doc_input(
    news, news_index, category_dict, subcategory_dict, word_dict)
news_combined = np.concatenate([x for x in [news_title, news_category, news_subcategory] if x is not None], axis=-1)
embedding_matrix, have_word = load_matrix(glove_embedding_path,
                                                word_dict,
                                                word_embedding_dim)


51282it [00:05, 9537.24it/s]
100%|████████████████████████████████████████████████████████████████████████| 51282/51282 [00:00<00:00, 229934.33it/s]


In [28]:
len(word_dict)

37535

In [16]:
news_combined.shape

(51283, 22)

In [31]:
model = NAML(embedding_matrix, len(category_dict), len(subcategory_dict))
optimizer = optim.Adam(model.parameters(), lr=0.0003)
model = model.cuda()
torch.set_grad_enabled(True)
model.train()

NAML(
  (news_encoder): NewsEncoder(
    (embedding_matrix): Embedding(12507, 300, padding_idx=0)
    (category_emb): Embedding(18, 100, padding_idx=0)
    (category_dense): Linear(in_features=100, out_features=400, bias=True)
    (subcategory_emb): Embedding(265, 100, padding_idx=0)
    (subcategory_dense): Linear(in_features=100, out_features=400, bias=True)
    (final_attn): AttentionPooling(
      (att_fc1): Linear(in_features=400, out_features=200, bias=True)
      (att_fc2): Linear(in_features=200, out_features=1, bias=True)
    )
    (cnn): Conv1d(300, 400, kernel_size=(3,), stride=(1,), padding=(1,))
    (attn): AttentionPooling(
      (att_fc1): Linear(in_features=400, out_features=200, bias=True)
      (att_fc2): Linear(in_features=200, out_features=1, bias=True)
    )
  )
  (user_encoder): UserEncoder(
    (attn): AttentionPooling(
      (att_fc1): Linear(in_features=400, out_features=200, bias=True)
      (att_fc2): Linear(in_features=200, out_features=1, bias=True)
    )
 

In [32]:
dataset = DatasetTrain(processed_file_path, news_index, news_combined)
dataloader = DataLoader(dataset, batch_size=128)


In [33]:
for ep in range(6):
    loss = 0.0
    accuary = 0.0
    print("EPOCH: " + str(ep))
    for cnt, (log_ids, log_mask, input_ids, targets) in tqdm(enumerate(dataloader)):
        log_ids = log_ids.cuda()
        log_mask = log_mask.cuda()
        input_ids = input_ids.cuda()
        targets = targets.cuda()

        bz_loss, y_hat = model(log_ids, log_mask, input_ids, targets)
        loss += bz_loss.data.float()
        accuary += acc(targets, y_hat)
        optimizer.zero_grad()
        bz_loss.backward()
        optimizer.step()
        # stop
    print(loss, accuary)

    #     if rank == 0 and cnt != 0 and cnt % args.save_steps == 0:
    #         ckpt_path = os.path.join(args.model_dir, f'epoch-{ep+1}-{cnt}.pt')
    #         torch.save(
    #             {
    #                 'model_state_dict':
    #                     {'.'.join(k.split('.')[1:]): v for k, v in model.state_dict().items()}
    #                     if is_distributed else model.state_dict(),
    #                 'category_dict': category_dict,
    #                 'word_dict': word_dict,
    #                 'subcategory_dict': subcategory_dict
    #             }, ckpt_path)
    #         logging.info(f"Model saved to {ckpt_path}.")

    # logging.info('Training finish.')

    # if rank == 0:
    #     ckpt_path = os.path.join(args.model_dir, f'epoch-{ep+1}.pt')
    #     torch.save(
    #         {
    #             'model_state_dict':
    #                 {'.'.join(k.split('.')[1:]): v for k, v in model.state_dict().items()}
    #                 if is_distributed else model.state_dict(),
    #             'category_dict': category_dict,
    #             'subcategory_dict': subcategory_dict,
    #             'word_dict': word_dict,
    #         }, ckpt_path)
    #     logging.info(f"Model saved to {ckpt_path}.")

EPOCH: 0


1847it [03:47,  8.12it/s]


tensor(2566.7004, device='cuda:0') tensor(797.2232, device='cuda:0')
EPOCH: 1


1847it [03:46,  8.17it/s]


tensor(2443.4343, device='cuda:0') tensor(860.4632, device='cuda:0')
EPOCH: 2


1847it [03:49,  8.06it/s]


tensor(2390.1062, device='cuda:0') tensor(887.2489, device='cuda:0')
EPOCH: 3


1847it [03:48,  8.09it/s]


tensor(2348.0330, device='cuda:0') tensor(907.5112, device='cuda:0')
EPOCH: 4


1847it [03:50,  8.03it/s]


tensor(2310.7275, device='cuda:0') tensor(926.6741, device='cuda:0')
EPOCH: 5


1847it [03:46,  8.16it/s]

tensor(2273.4028, device='cuda:0') tensor(940.6909, device='cuda:0')





In [34]:
test_data_dir = './data/MINDsmall_dev'

In [35]:
model.cuda()

model.eval()
torch.set_grad_enabled(False)
news, news_index = read_news(os.path.join(test_data_dir, 'news.tsv'), mode='test')
news_title, news_category, news_subcategory = get_doc_input(
    news, news_index, category_dict, subcategory_dict, word_dict)
news_combined = np.concatenate([x for x in [news_title, news_category, news_subcategory] if x is not None], axis=-1)


42416it [00:04, 10416.95it/s]
100%|████████████████████████████████████████████████████████████████████████| 42416/42416 [00:00<00:00, 237593.63it/s]


In [36]:

news_dataset = NewsDataset(news_combined)
news_dataloader = DataLoader(news_dataset, batch_size=128)

In [37]:
news_scoring = []
with torch.no_grad():
    for input_ids in tqdm(news_dataloader):
        input_ids = input_ids.cuda()
        news_vec = model.news_encoder(input_ids)
        news_vec = news_vec.to(torch.device("cpu")).detach().numpy()
        news_scoring.extend(news_vec)

news_scoring = np.array(news_scoring)

# doc_sim = 0
# for _ in tqdm(range(1000000)):
#     i = random.randrange(1, len(news_scoring))
#     j = random.randrange(1, len(news_scoring))
#     if i != j:
#         doc_sim += np.dot(news_scoring[i], news_scoring[j]) / (np.linalg.norm(news_scoring[i]) * np.linalg.norm(news_scoring[j]))

data_file_path = os.path.join(test_data_dir, f'behaviors.tsv')

def collate_fn(tuple_list):
    log_vecs = torch.FloatTensor([x[0] for x in tuple_list])
    log_mask = torch.FloatTensor([x[1] for x in tuple_list])
    news_vecs = [x[2] for x in tuple_list]
    labels = [x[3] for x in tuple_list]
    return (log_vecs, log_mask, news_vecs, labels)

dataset = DatasetTest(data_file_path, news_index, news_scoring)
dataloader = DataLoader(dataset, batch_size=128, collate_fn=collate_fn)

100%|███████████████████████████████████████████████████████████████████████████████| 332/332 [00:00<00:00, 370.61it/s]


In [38]:
AUC = []
MRR = []
nDCG5 = []
nDCG10 = []

def print_metrics(cnt, x):
    print(cnt, x)

def get_mean(arr):
    return [np.array(i).mean() for i in arr]

def get_sum(arr):
    return [np.array(i).sum() for i in arr]


def dcg_score(y_true, y_score, k=10):
    order = np.argsort(y_score)[::-1]
    y_true = np.take(y_true, order[:k])
    gains = 2**y_true - 1
    discounts = np.log2(np.arange(len(y_true)) + 2)
    return np.sum(gains / discounts)


def ndcg_score(y_true, y_score, k=10):
    best = dcg_score(y_true, y_true, k)
    actual = dcg_score(y_true, y_score, k)
    return actual / best


def mrr_score(y_true, y_score):
    order = np.argsort(y_score)[::-1]
    y_true = np.take(y_true, order)
    rr_score = y_true / (np.arange(len(y_true)) + 1)
    return np.sum(rr_score) / np.sum(y_true)


def ctr_score(y_true, y_score, k=1):
    order = np.argsort(y_score)[::-1]
    y_true = np.take(y_true, order[:k])
    return np.mean(y_true)

In [39]:
for cnt, (log_vecs, log_mask, news_vecs, labels) in enumerate(dataloader):
    log_vecs = log_vecs.cuda()
    log_mask = log_mask.cuda()

    user_vecs = model.user_encoder(log_vecs, log_mask).to(torch.device("cpu")).detach().numpy()

    for user_vec, news_vec, label in zip(user_vecs, news_vecs, labels):
        if label.mean() == 0 or label.mean() == 1:
            continue

        score = np.dot(news_vec, user_vec)
        auc = roc_auc_score(label, score)
        mrr = mrr_score(label, score)
        ndcg5 = ndcg_score(label, score, k=5)
        ndcg10 = ndcg_score(label, score, k=10)

        AUC.append(auc)
        MRR.append(mrr)
        nDCG5.append(ndcg5)
        nDCG10.append(ndcg10)

    if cnt % 100 == 0:
        print_metrics(cnt, get_mean([AUC, MRR, nDCG5, nDCG10]))

print_metrics(cnt, get_mean([AUC, MRR, nDCG5, nDCG10]))

0 [0.6583311161846661, 0.28823356044382076, 0.30162660835532784, 0.3770027802180992]
100 [0.663075304847043, 0.3198690653649036, 0.3531824126277258, 0.41437885925414053]
200 [0.6612801711189159, 0.32131603397821135, 0.3540195760368032, 0.4159903165664601]
300 [0.6603789272031168, 0.31893979938771344, 0.3512724441808343, 0.4140787979606762]
400 [0.6599928270580613, 0.31718024348412643, 0.34940130085014703, 0.41200881140997625]
500 [0.6593289509983885, 0.31680712138703115, 0.3490334193262708, 0.4119601314507803]
571 [0.6591725605952566, 0.31704146764496266, 0.34940791521193026, 0.41236519077303857]
