In [None]:
import numpy as np
import os
import random
import pandas as pd
import pickle
import ast
import dgl
import argparse
from sklearn.neighbors import NearestNeighbors

import pickle
import argparse

import torch
import torch.nn as nn
#import torchtext
import torchtext.legacy as torchtext

from torch.utils.data import DataLoader
import tqdm

import layers
import sampler as sampler_module
import evaluation

from scipy import spatial
from sklearn.neighbors import NearestNeighbors


from numpy import dot
from numpy.linalg import norm

In [None]:
def seed_everything(seed: int = 42):
    random.seed(seed)
    np.random.seed(seed)
    os.environ["PYTHONHASHSEED"] = str(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)  # type: ignore
    torch.backends.cudnn.deterministic = True  # type: ignore
    torch.backends.cudnn.benchmark = True  # type: ignore

In [None]:
seed_everything(225)

In [None]:
def check_param_num(model):
    '''
    check num of model parameters

    :model: pytorch model object
    :return: int
    '''
    param_num = 0 
    for parameter in model.parameters():
        param_num += parameter.shape[0]
    return param

def node_to_item(nodes, id_dict, cateogry_dict):
    '''
    Transform node id to real item id

    :items: node id list
    :id_dict: {node id: item category id}
    :category_dict: {item category id: real item id}
    '''
    ids = [id_dict[i] for i in nodes]
    ids = [cateogry_dict[i] for i in ids]   
    return ids

def get_blocks(seeds, item_ntype, textset, sampler):
    blocks = []
    for seed in seeds:
        block = sampler.get_block(seed, item_ntype, textset)
        blocks.append(block)
    return blocks

def get_all_emb(gnn, seed_array, textset, item_ntype, neighbor_sampler, batch_size, device='cuda'):
    seeds = torch.arange(seed_array.shape[0]).split(batch_size)
    testset = get_blocks(seeds, item_ntype, textset, neighbor_sampler)
    gnn = gnn.to(device)
    gnn.eval()
    with torch.no_grad():
        h_item_batches = []
        for blocks in testset:
            for i in range(len(blocks)):
                blocks[i] = blocks[i].to(device)

            h_item_batches.append(gnn.get_repr(blocks))
        h_item = torch.cat(h_item_batches, 0)
    return h_item
    
def item_by_user_batch(graph, user_ntype, item_ntype, user_to_item_etype, weight, batch_size, k):
    '''
    :return: list of interacted node ids by every users 
    '''
    rec_engine = LatestNNRecommender(
        user_ntype, item_ntype, user_to_item_etype, weight, batch_size)

    graph_slice = graph.edge_type_subgraph([rec_engine.user_to_item_etype])
    n_users = graph.number_of_nodes(rec_engine.user_ntype)  # 유저개수
    latest_interactions = dgl.sampling.select_topk(graph_slice, k, rec_engine.timestamp, edge_dir='out')
    user, latest_items = latest_interactions.all_edges(form='uv', order='srcdst')
    # user, latest_items = (k * n_users)

    items_df = pd.DataFrame({'user': user.numpy(), 'item': latest_items.numpy()}).groupby('user')
    items_batch = [items_df.get_group(i)['item'].values for i in np.unique(user)]
    return items_batch

def prec(recommendations, ground_truth):
    n_users, n_items = ground_truth.shape
    K = recommendations.shape[1]
    user_idx = np.repeat(np.arange(n_users), K)
    item_idx = recommendations.flatten()
    relevance = ground_truth[user_idx, item_idx].reshape((n_users, K))
    hit = relevance.any(axis=1).mean()
    return hit

class LatestNNRecommender(object):
    def __init__(self, user_ntype, item_ntype, user_to_item_etype, timestamp, batch_size):
        self.user_ntype = user_ntype
        self.item_ntype = item_ntype
        self.user_to_item_etype = user_to_item_etype
        self.batch_size = batch_size
        self.timestamp = timestamp

    def recommend(self, full_graph, K, h_user, h_item):
        """
        Return a (n_user, K) matrix of recommended items for each user
        """
        graph_slice = full_graph.edge_type_subgraph([self.user_to_item_etype])
        n_users = full_graph.number_of_nodes(self.user_ntype)
        latest_interactions = dgl.sampling.select_topk(graph_slice, K, self.timestamp, edge_dir='out')
        user, latest_items = latest_interactions.all_edges(form='uv', order='srcdst')
        # each user should have at least one "latest" interaction
        assert torch.equal(user, torch.arange(n_users))

        recommended_batches = []
        user_batches = torch.arange(n_users).split(self.batch_size)
        for user_batch in user_batches:
            latest_item_batch = latest_items[user_batch]
            dist = h_item[latest_item_batch] @ h_item.t()

            # 기존 인터랙션 삭제
            # 이 부분을 주석처리했음
            # for i, u in enumerate(user_batch.tolist()):
            #     interacted_items = full_graph.successors(u, etype=self.user_to_item_etype)
            #     dist[i, interacted_items] = -np.inf
            recommended_batches.append(dist.topk(K, 1)[1])

        recommendations = torch.cat(recommended_batches, 0)
        return recommendations


def evaluate_nn(dataset, h_item, k, batch_size):
    g = dataset['train-graph']
    val_matrix = dataset['val-matrix'].tocsr()
    test_matrix = dataset['test-matrix'].tocsr()
    item_texts = dataset['item-texts']
    user_ntype = dataset['user-type']
    item_ntype = dataset['item-type']
    user_to_item_etype = dataset['user-to-item-type']
    timestamp = dataset['timestamp-edge-column']

    rec_engine = LatestNNRecommender(
        user_ntype, item_ntype, user_to_item_etype, timestamp, batch_size)

    recommendations = rec_engine.recommend(g, k, None, h_item).cpu().numpy()
    return prec(recommendations, val_matrix)

class PinSAGEModel(nn.Module):
    def __init__(self, full_graph, ntype, textsets, hidden_dims, n_layers):
        super().__init__()

        self.proj = layers.LinearProjector(full_graph, ntype, textsets, hidden_dims)
        self.sage = layers.SAGENet(hidden_dims, n_layers)
        self.scorer = layers.ItemToItemScorer(full_graph, ntype)

    def forward(self, pos_graph, neg_graph, blocks):
        h_item = self.get_repr(blocks)
        pos_score = self.scorer(pos_graph, h_item)
        neg_score = self.scorer(neg_graph, h_item)
        return (neg_score - pos_score + 1).clamp(min=0)

    def get_repr(self, blocks):
        h_item = self.proj(blocks[0].srcdata)
        h_item_dst = self.proj(blocks[-1].dstdata)
        return h_item_dst + self.sage(blocks, h_item)
        
def load_model(data_dict, device, lr, hidden_dims, num_layers, save_path):
    gnn = PinSAGEModel(data_dict['graph'], data_dict['item_ntype'], data_dict['textset'], hidden_dims, num_layers).to(device)
    opt = torch.optim.Adam(gnn.parameters(), lr=lr)
    checkpoint = torch.load(save_path + '.pt', map_location="cuda:1")
    gnn.load_state_dict(checkpoint['model_state_dict'])
    
    return gnn

def prepare_dataset(data_dict):
    g = data_dict['graph']
    item_texts = data_dict['item_texts']
    user_ntype = data_dict['user_ntype']
    item_ntype = data_dict['item_ntype']

    # Assign user and movie IDs and use them as features (to learn an individual trainable
    # embedding for each entity)
    g.nodes[user_ntype].data['id'] = torch.arange(g.number_of_nodes(user_ntype))
    g.nodes[item_ntype].data['id'] = torch.arange(g.number_of_nodes(item_ntype))
    data_dict['graph'] = g

    # Prepare torchtext dataset and vocabulary
    if not len(item_texts):
        data_dict['textset'] = None
    else:
        fields = {}
        examples = []
        for key, texts in item_texts.items():
            fields[key] = torchtext.data.Field(include_lengths=True, lower=True, batch_first=True)
        for i in range(g.number_of_nodes(item_ntype)):
            example = torchtext.data.Example.fromlist(
                [item_texts[key][i] for key in item_texts.keys()],
                [(key, fields[key]) for key in item_texts.keys()])
            examples.append(example)
            
        textset = torchtext.data.Dataset(examples, fields)
        for key, field in fields.items():
            field.build_vocab(getattr(textset, key))
            #field.build_vocab(getattr(textset, key), vectors='fasttext.simple.300d')
        data_dict['textset'] = textset

    return data_dict

def prepare_dataloader(data_dict, batch_size, 
                       random_walk_length, random_walk_restart_prob, 
                       num_random_walks, num_neighbors, num_layers, num_workers):
    g = data_dict['graph']
    user_ntype = data_dict['user_ntype']
    item_ntype = data_dict['item_ntype']
    textset = data_dict['textset']
    # Sampler
    batch_sampler = sampler_module.ItemToItemBatchSampler(
        g, user_ntype, item_ntype, batch_size)
    neighbor_sampler = sampler_module.NeighborSampler(
        g, user_ntype, item_ntype, random_walk_length,
        random_walk_restart_prob, num_random_walks, num_neighbors,
        num_layers)
    collator = sampler_module.PinSAGECollator(neighbor_sampler, g, item_ntype, textset)
    dataloader = DataLoader(
        batch_sampler,
        collate_fn=collator.collate_train,
        num_workers=num_workers)

    dataloader_test = DataLoader(
        torch.arange(g.number_of_nodes(item_ntype)),
        batch_size=batch_size,
        collate_fn=collator.collate_test,
        num_workers=num_workers)
    dataloader_it = iter(dataloader)

    return dataloader_it, dataloader_test, neighbor_sampler

In [None]:
dataset_path = "./graph_data/kdata_entire9.pkl"
device="cuda:1"
save_path="./model_first/model_20epoch"
random_walk_length = 2
random_walk_restart_prob = 0.5
num_random_walks = 10
num_neighbors = 10
num_layers = 6
hidden_dims = 1024
batch_size = 64
batches_per_epoch = 10000
num_epochs = 500
num_workers = 0
lr = 3e-5
eval_epochs = 10
save_epochs = 10
k = 10 

In [None]:
with open(dataset_path, 'rb') as f:
    dataset = pickle.load(f)

In [None]:
data_dict = {
        'graph': dataset['train-graph'],
        'val_matrix': None,
        'test_matrix': None,
        'item_texts': dataset['item-texts'],
        'testset': dataset['testset'], 
        'user_ntype': dataset['user-type'],
        'item_ntype': dataset['item-type'],
        'user_to_item_etype': dataset['user-to-item-type'],
        'timestamp': dataset['timestamp-edge-column'],
        'user_category': dataset['user-category'], 
        'item_category': dataset['item-category']
    }

In [None]:
device = torch.device(f'{device}' if torch.cuda.is_available() else 'cpu')
if device.type == 'cpu':
    print('Current using CPUs')
else:
    print ('Current cuda device ', torch.cuda.current_device()) # check

# Dataset
data_dict = prepare_dataset(data_dict)
dataloader_it, dataloader_test, neighbor_sampler = prepare_dataloader(data_dict, batch_size, 
                       random_walk_length, random_walk_restart_prob, 
                       num_random_walks, num_neighbors, num_layers, num_workers)

gnn = load_model(data_dict, device, lr, hidden_dims, num_layers, save_path)

g = data_dict['graph']
item_ntype = data_dict['item_ntype']
user_ntype = data_dict['user_ntype']
user_to_item_etype = data_dict['user_to_item_etype']
timestamp = data_dict['timestamp']
nid_uid_dict = {v: k for v, k in enumerate(list(g.ndata['userID'].values())[0].numpy())}
nid_wid_dict = {nid.item(): wid.item() for wid, nid in zip(g.ndata['item_id']['item'], g.ndata['id']['item'])}

gnn = gnn.to(device)
h_item = get_all_emb(gnn, g.ndata['id'][item_ntype], data_dict['textset'], item_ntype, neighbor_sampler, batch_size, device)
item_batch = item_by_user_batch(g, user_ntype, item_ntype, user_to_item_etype, timestamp, batch_size, k)

In [None]:
rate_data = pd.read_csv("KData/rate_data.csv", index_col=0)
rate_data = rate_data.sort_values(by="rate", ascending=False).reset_index(drop=True)
rate_data

In [None]:
sort_item = rate_data.loc[(rate_data["user"] == user_id), "item"].values.tolist()
sort_idx = [i for i, x in enumerate(sort_item) if x in list(label)]
label = [x for i, x in enumerate(sort_item)if i in sort_idx]

In [None]:
set(label)

### KNN 방법으로 했을 시

In [None]:
recalls = 0#[]
hitrates = 0
users = []
num_labels = 0
counts_n = 0
mean_recall = []
k_num = 10

model = NearestNeighbors(n_neighbors = k_num, 
                         metric = 'cosine',
                        )#cosine
model.fit(h_item.detach().cpu().numpy())
for i, nodes in tqdm.tqdm(enumerate(item_batch)):
    # 실제 유저 ID 탐색
    category = nid_uid_dict[i]
    user_id = data_dict['user_category'][category]  # 실제 유저 id
    label = data_dict['testset'][user_id]  # 테스트 라벨
    users.append(user_id)
    item = evaluation.node_to_item(nodes, nid_wid_dict, data_dict['item_category'])  # 와인 ID
    label_idx = [i for i, x in enumerate(item) if x in label]  # 라벨 인덱스
    #nodes = [x for i, x in enumerate(nodes)if i not in label_idx]  # 라벨 인덱스 미포함 입력 학습용 노드
    nodes = [x for i, x in enumerate(nodes)]
    h_nodes = h_item[nodes]
    h_center = torch.mean(h_nodes, axis=0)  # 중앙 임베딩 
    _, topk = model.kneighbors(h_center.detach().cpu().numpy().reshape(1, -1))
        
    topk = topk[0]
    tp = [x for x in label if x in topk]
    if not tp:
        recalls += 0
    else:
        mean_recall.append(len(tp)/len(label))
        recalls += len(tp)
        num_labels += len(label)
recall = np.mean(np.array(mean_recall))#recalls / num_labels
recall2 = recalls / num_labels
print(f'\tRecall@{k_num}:{recall}')
print(f'\tRecall@{k_num}:{recall2}')

In [None]:
hitrates = 0
users = []
counts_n = 0
k_num = 500

model = NearestNeighbors(n_neighbors = k_num, 
                         metric = 'cosine',
                        )#cosine
model.fit(h_item.detach().cpu().numpy())
for i, nodes in tqdm.tqdm(enumerate(item_batch)):
    # 실제 유저 ID 탐색
    category = nid_uid_dict[i]
    user_id = data_dict['user_category'][category]  # 실제 유저 id
    label = data_dict['testset'][user_id]  # 테스트 라벨
    users.append(user_id)
    
    item = evaluation.node_to_item(nodes, nid_wid_dict, data_dict['item_category'])  # 와인 ID
    label_idx = [i for i, x in enumerate(item) if x in label]  # 라벨 인덱스
    nodes = [x for i, x in enumerate(nodes)if i not in label_idx]
    h_nodes = h_item[nodes]
    h_center = torch.mean(h_nodes, axis=0)  # 중앙 임베딩 
    _, topk = model.kneighbors(h_center.detach().cpu().numpy().reshape(1, -1))
        
    topk = topk[0]
    label = [list(label)[0]]
    tp = [x for x in label if x in topk]
    if not tp:
        hitrates += 0
        counts_n += 1
    else:
        hitrates += 1  # 하나라도 있음
        counts_n += 1
    
hitrate = hitrates / counts_n
print(f'\tHitrate@{k_num}:{hitrate}')

In [None]:
users = []
k_num = 500
mrr_score = []
q_value = 0

model = NearestNeighbors(n_neighbors = k_num, 
                         metric = 'cosine',
                        )#cosine
model.fit(h_item.detach().cpu().numpy())
for i, nodes in tqdm.tqdm(enumerate(item_batch)):
    # 실제 유저 ID 탐색
    category = nid_uid_dict[i]
    user_id = data_dict['user_category'][category]  # 실제 유저 id
    label = data_dict['testset'][user_id]  # 테스트 라벨
    users.append(user_id)
    
    item = evaluation.node_to_item(nodes, nid_wid_dict, data_dict['item_category'])  # 와인 ID
    
    
    
    label_idx = [i for i, x in enumerate(item) if x in label]  # 라벨 인덱스
    nodes = [x for i, x in enumerate(nodes)]
    h_nodes = h_item[nodes]
    h_center = torch.mean(h_nodes, axis=0)  # 중앙 임베딩 
    _, topk = model.kneighbors(h_center.detach().cpu().numpy().reshape(1, -1))
        
    topk = topk[0]
    tp = [x for x in label if x in topk]
    
    topk = topk.tolist()
    
    if not tp:
        pass
    else:
        label = list(label)
        parents_value = []
        for i in range(len(topk)):
            try:
                parents_value.append(label.index(topk[i]))
            except:
                pass
    try:
        mrr_score.append(1/parents_value[0])
        q_value += 1
    except:
        continue
        
mrr = np.round(np.array(mrr_score).sum() / q_value, 3)
print(f"mrr@{k_num} : {mrr}")

In [None]:
recalls = 0#[]
hitrates = 0
users = []
num_labels = 0
counts_n = 0
mean_recall = []
k_num = 10

model = NearestNeighbors(n_neighbors = k_num, 
                         metric = 'cosine',
                        )#cosine
model.fit(h_item.detach().cpu().numpy())
for i, nodes in tqdm.tqdm(enumerate(item_batch)):
    # 실제 유저 ID 탐색
    category = nid_uid_dict[i]
    user_id = data_dict['user_category'][category]  # 실제 유저 id
    label = data_dict['testset'][user_id]  # 테스트 라벨
    users.append(user_id)
    
    sort_item = rate_data.loc[(rate_data["user"] == user_id), "item"].values.tolist()
    sort_idx = [i for i, x in enumerate(sort_item) if x in list(label)]
    label = [x for i, x in enumerate(sort_item)if i in sort_idx]
    
    item = evaluation.node_to_item(nodes, nid_wid_dict, data_dict['item_category'])  # 와인 ID
    #label_idx = [i for i, x in enumerate(item) if x in label]  # 라벨 인덱스
    #nodes = [x for i, x in enumerate(nodes)if i not in label_idx]  # 라벨 인덱스 미포함 입력 학습용 노드
    nodes = [x for i, x in enumerate(nodes)]
    h_nodes = h_item[nodes]
    h_center = torch.mean(h_nodes, axis=0)  # 중앙 임베딩 
    _, topk = model.kneighbors(h_center.detach().cpu().numpy().reshape(1, -1))
        
    topk = topk[0]
    #print("라벨 : ",label)
    tp = [x for x in label if x in topk]
    topk = topk.tolist()
    #print("맞춘거 : " , tp)
    if not tp:
        pass
    else:
        label = list(label)
        parents_value = []
        for i in range(len(topk)):
            try:
                parents_value.append(label.index(topk[i]))
            except:
                pass
    try:
        if not tp:
            #print("------")
            pass
        else:
            #print("맞춘거 최초 인덱스 : ", parents_value[0]+1)
            #print("------")
            mrr_score.append(1/(parents_value[0]+1))
            q_value += 1
    except:
        continue
mrr = np.round(np.array(mrr_score).sum() / q_value, 3)
print(f"mrr@{k_num} : {mrr}")

### KDtree 방법으로 했을 시 

In [None]:
recalls = 0#[]
users = []
num_labels = 0
mean_recall = []
k_num = 10

tree = spatial.KDTree(h_item.tolist())
for i, nodes in tqdm.tqdm(enumerate(item_batch)):
    
    # 실제 유저 ID 탐색
    category = nid_uid_dict[i]
    user_id = data_dict['user_category'][category]  # 실제 유저 id
    label = data_dict['testset'][user_id]  # 테스트 라벨
    users.append(user_id)
    
    # 실제 와인 ID 탐색
    item = evaluation.node_to_item(nodes, nid_wid_dict, data_dict['item_category'])  # 와인 ID
    label_idx = [i for i, x in enumerate(item) if x in label]  # 라벨 인덱스
    # 아이템 추천
    #nodes = [x for i, x in enumerate(nodes)if i not in label_idx]  # 라벨 인덱스 미포함 입력 학습용 노드
    nodes = [x for i, x in enumerate(nodes)]  # 라벨 인덱스 미포함 입력 학습용 노드
    h_nodes = h_item[nodes]
    h_center = torch.mean(h_nodes, axis=0)  # 중앙 임베딩 
    if k_num == 1 :
        topk = [tree.query(h_center.tolist(), 1)[1]]
    else:
        topk = tree.query(h_center.tolist(), k_num)[1]
    tp = [x for x in label if x in topk]
    if not tp:
        recalls += 0
    else:
        mean_recall.append(len(tp)/len(label))
        recalls += len(tp)
        num_labels += len(label)
    
recall = np.mean(np.array(mean_recall))#recalls / num_labels
recall2 = recalls / num_labels
print(f'\tRecall@{k_num}:{recall}')
print(f'\tRecall@{k_num}:{recall2}')

In [None]:
hitrates = 0
users = []
counts_n = 0
k_num = 500

tree = spatial.KDTree(h_item.tolist())
for i, nodes in tqdm.tqdm(enumerate(item_batch)):
    
    # 실제 유저 ID 탐색
    category = nid_uid_dict[i]
    user_id = data_dict['user_category'][category]  # 실제 유저 id
    label = data_dict['testset'][user_id]  # 테스트 라벨
    users.append(user_id)
    
    # 실제 와인 ID 탐색
    item = evaluation.node_to_item(nodes, nid_wid_dict, data_dict['item_category'])  # 와인 ID
    label_idx = [i for i, x in enumerate(item) if x in label]  # 라벨 인덱스
    # 아이템 추천
    #nodes = [x for i, x in enumerate(nodes)if i not in label_idx]  # 라벨 인덱스 미포함 입력 학습용 노드
    nodes = [x for i, x in enumerate(nodes)if i not in label_idx]  # 라벨 인덱스 미포함 입력 학습용 노드
    h_nodes = h_item[nodes]
    h_center = torch.mean(h_nodes, axis=0)  # 중앙 임베딩 
    if k_num == 1 :
        topk = [tree.query(h_center.tolist(), 1)[1]]
    else:
        topk = tree.query(h_center.tolist(), k_num)[1]
    tp = [x for x in label if x in topk]
    if not tp:
        hitrates += 0
        counts_n += 1
    else:
        hitrates += 1  # 하나라도 있음
        counts_n += 1
    
hitrate = hitrates / counts_n
print(f'\tHitrate@{k_num}:{hitrate}')

# pair data 검사

In [None]:
df = pd.read_csv("./output/pair_df.csv", index_col=0)
df

In [None]:
ddf = df.groupby(['item_id'])['pos_pair'].apply(','.join).reset_index()

In [None]:
def strTolst(x):
    try:
        return ast.literal_eval(str(x))   
    except Exception as e:
        print(e)
        return []

ddf['pos_pair'] = ddf["pos_pair"].apply(lambda x: strTolst(x))
ddf

In [None]:
for i in tqdm.tqdm(range(len(ddf))):
    try:
        ddf["pos_pair"][i] = sum(ddf["pos_pair"][i] , [])
    except:
        pass

In [None]:
def ramdom_sampling_list(x, seed=225):
    try:
        random.seed(seed)
        return random.sample(x, 10)   
    except Exception as e:
        print(e)
        return []

sample_df = ddf.copy()
sample_df["pos_pair"] = sample_df["pos_pair"].apply(lambda x: ramdom_sampling_list(x, 329))

In [None]:
recalls = 0#[]
hitrates = 0
users = []
num_labels = 0
counts_n = 0
k_num = 10

data_form = ddf.copy()
data_form["labels"] = 0
mean_recall = []

model = NearestNeighbors(n_neighbors = k_num, 
                         metric = 'cosine',
                        )#cosine
model.fit(h_item.detach().cpu().numpy())
for itm in tqdm.tqdm(data_form.item_id.values.tolist()):
    label = data_form.loc[data_form.item_id == itm, "pos_pair"].values[0]
    h_nodes = h_item[itm]
    _, topk = model.kneighbors(h_nodes.detach().cpu().numpy().reshape(1, -1))    
    topk = topk[0]
    tp = [x for x in label if x in topk]
    data_form.loc[data_form.item_id == itm,"labels"] = str(tp)

In [None]:
recalls = 0#[]
hitrates = 0
users = []
num_labels = 0
counts_n = 0
k_num = 10

data_form = ddf.copy()

mean_recall = []

model = NearestNeighbors(n_neighbors = k_num, 
                         metric = 'cosine',
                        )#cosine
model.fit(h_item.detach().cpu().numpy())
for itm in tqdm.tqdm(data_form.item_id.values.tolist()):
    label = data_form.loc[data_form.item_id == itm, "pos_pair"].values[0]
    h_nodes = h_item[itm]
    _, topk = model.kneighbors(h_nodes.detach().cpu().numpy().reshape(1, -1))    
    topk = topk[0]
    tp = [x for x in label if x in topk]
    print(label)
    print(topk)
    print()
    print(tp)
    print()
    print()
    if not tp:
        hitrates += 0
        counts_n += 1
        recalls += 0
    else:
        mean_recall.append(len(tp)/len(label))
        recalls += len(tp)
        num_labels += len(label)
        hitrates += 1  # 하나라도 있음
        counts_n += 1
hitrate = hitrates / counts_n
recall = np.mean(np.array(mean_recall))#recalls / num_labels
print(f'\tRecall:{recall}\tHitrate:{hitrate}')

In [None]:
recalls = 0#[]
hitrates = 0
users = []
num_labels = 0
counts_n = 0
k_num = 10

data_form = ddf.copy()

mean_recall = []

model = NearestNeighbors(n_neighbors = k_num, 
                         metric = 'cosine',
                        )#cosine
model.fit(h_item.detach().cpu().numpy())
for itm in tqdm.tqdm(data_form.item_id.values.tolist()):
    label = data_form.loc[data_form.item_id == itm, "pos_pair"].values[0]
    h_nodes = h_item[itm]
    _, topk = model.kneighbors(h_nodes.detach().cpu().numpy().reshape(1, -1))    
    topk = topk[0]
    tp = [x for x in label if x in topk]
    if not tp:
        hitrates += 0
        counts_n += 1
        recalls += 0
    else:
        mean_recall.append(len(tp)/len(label))
        recalls += len(tp)
        num_labels += len(label)
        hitrates += 1  # 하나라도 있음
        counts_n += 1
hitrate = hitrates / counts_n
recall = np.mean(np.array(mean_recall))#recalls / num_labels
print(f'\tRecall:{recall}\tHitrate:{hitrate}')

In [None]:
recalls = 0#[]
hitrates = 0
users = []
num_labels = 0
counts_n = 0
mean_recall = []
k_num = 10
tree = spatial.KDTree(h_item.tolist())
for itm in tqdm.tqdm(ddf.item_id.values.tolist()):
    label = ddf.loc[ddf.item_id == itm, "pos_pair"].values[0]
    h_nodes = h_item[itm]
    topk = tree.query(h_nodes.tolist(), k_num)[1]
    tp = [x for x in label if x in topk]
    if not tp:
        hitrates += 0
        counts_n += 1
        recalls += 0
    else:
        mean_recall.append(len(tp)/len(label))
        recalls += len(tp)
        num_labels += len(label)
        hitrates += 1  # 하나라도 있음
        counts_n += 1
    
hitrate = hitrates / counts_n
recall = recalls / num_labels
print(np.mean(np.array(mean_recall)))
print(f'\tRecall:{recall}\tHitrate:{hitrate}')