<a href="https://colab.research.google.com/github/KevenLi8888/KRED/blob/test/model_test.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## Environment Setup

In [3]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
# Secure GPU resource
import torch
placeholder = torch.Tensor([500, 500, 500]).cuda()

In [4]:
!git clone -b test https://github.com/KevenLi8888/KRED.git

Cloning into 'KRED'...
remote: Enumerating objects: 261, done.[K
remote: Counting objects: 100% (261/261), done.[K
remote: Compressing objects: 100% (132/132), done.[K
remote: Total 261 (delta 148), reused 226 (delta 126), pack-reused 0[K
Receiving objects: 100% (261/261), 44.15 MiB | 25.88 MiB/s, done.
Resolving deltas: 100% (148/148), done.


In [5]:
import os
print(os.getcwd())
os.chdir('/content/KRED')
print(os.getcwd())

/content
/content/KRED


In [8]:
!cp /content/drive/MyDrive/Developers/kg.zip ./data/kg
!cp /content/drive/MyDrive/Developers/checkpoint.pt ./out

In [6]:
!pip install sentence-transformers

Collecting sentence-transformers
  Downloading sentence-transformers-2.2.0.tar.gz (79 kB)
[K     |████████████████████████████████| 79 kB 5.2 MB/s 
[?25hCollecting transformers<5.0.0,>=4.6.0
  Downloading transformers-4.16.2-py3-none-any.whl (3.5 MB)
[K     |████████████████████████████████| 3.5 MB 15.2 MB/s 
Collecting sentencepiece
  Downloading sentencepiece-0.1.96-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.2 MB)
[K     |████████████████████████████████| 1.2 MB 54.7 MB/s 
[?25hCollecting huggingface-hub
  Downloading huggingface_hub-0.4.0-py3-none-any.whl (67 kB)
[K     |████████████████████████████████| 67 kB 7.0 MB/s 
Collecting sacremoses
  Downloading sacremoses-0.0.47-py2.py3-none-any.whl (895 kB)
[K     |████████████████████████████████| 895 kB 71.1 MB/s 
[?25hCollecting tokenizers!=0.11.3,>=0.10.1
  Downloading tokenizers-0.11.5-cp37-cp37m-manylinux_2_12_x86_64.manylinux2010_x86_64.whl (6.8 MB)
[K     |████████████████████████████████| 6.8 MB 38.6 M

In [9]:
!lscpu | grep -E '^Thread|^Core|^Socket|^CPU\('

CPU(s):              2
Thread(s) per core:  2
Core(s) per socket:  1
Socket(s):           1


In [10]:
!nvidia-smi

Fri Feb 25 08:11:36 2022       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 460.32.03    Driver Version: 460.32.03    CUDA Version: 11.2     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  Tesla T4            Off  | 00000000:00:04.0 Off |                    0 |
| N/A   43C    P0    27W /  70W |   1322MiB / 15109MiB |      0%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces

## Dataset Process

In [11]:
import os
from utils.util import *
from train_test import *

# Options: demo, small, large
MIND_type = 'small'
data_path = "./data/"

train_news_file = os.path.join(data_path, 'train', r'news.tsv')
train_behaviors_file = os.path.join(data_path, 'train', r'behaviors.tsv')
valid_news_file = os.path.join(data_path, 'valid', r'news.tsv')
valid_behaviors_file = os.path.join(data_path, 'valid', r'behaviors.tsv')
knowledge_graph_file = os.path.join(data_path, 'kg/wikidata-graph', r'wikidata-graph.tsv')
entity_embedding_file = os.path.join(data_path, 'kg/wikidata-graph', r'entity2vecd100.vec')
relation_embedding_file = os.path.join(data_path, 'kg/wikidata-graph', r'relation2vecd100.vec')

mind_url, mind_train_dataset, mind_dev_dataset, _ = get_mind_data_set(MIND_type)

kg_url = "https://kredkg.blob.core.windows.net/wikidatakg/"

if not os.path.exists(train_news_file):
    download_deeprec_resources(mind_url, os.path.join(data_path, 'train'), mind_train_dataset)

if not os.path.exists(valid_news_file):
    download_deeprec_resources(mind_url, \
                               os.path.join(data_path, 'valid'), mind_dev_dataset)

if not os.path.exists(knowledge_graph_file):
    download_deeprec_resources(kg_url, \
                               os.path.join(data_path, 'kg'), "kg.zip")

100%|██████████| 51.7k/51.7k [00:01<00:00, 41.9kKB/s]
100%|██████████| 30.2k/30.2k [00:01<00:00, 19.2kKB/s]


## Load Config

In [12]:
import sys
import os
sys.path.append('')
sys.argv = [''] # added by me, solved problem in this cell

import argparse
from parse_config import ConfigParser

parser = argparse.ArgumentParser(description='KRED')


parser.add_argument('-c', '--config', default="./config.json", type=str,
                    help='config file path (default: None)')
parser.add_argument('-r', '--resume', default=None, type=str,
                    help='path to latest checkpoint (default: None)')
parser.add_argument('-d', '--device', default=None, type=str,
                    help='indices of GPUs to enable (default: all)')

config = ConfigParser.from_args(parser)

## Create Hyper-parameters

In [13]:
epochs = 10
batch_size = 256
train_type = "single_task"
task = "user2item" # task should be within: user2item, item2item, vert_classify, pop_predict

config['trainer']['epochs'] = epochs
config['data_loader']['batch_size'] = batch_size
config['trainer']['training_type'] = train_type
config['trainer']['task'] = task

## Load Test Data and Model

In [15]:
test_data = get_user2item_test_data(config)
model = torch.load('./out/checkpoint.pt')
model.eval()

KREDModel(
  (news_embedding): News_embedding(
    (kgat): KGAT(
      (attention_layer1): Linear(in_features=300, out_features=128, bias=True)
      (attention_layer2): Linear(in_features=128, out_features=1, bias=True)
      (softmax): Softmax(dim=-1)
      (relu): ReLU(inplace=True)
      (convolve_layer): Linear(in_features=200, out_features=100, bias=True)
    )
    (final_embedding1): Linear(in_features=868, out_features=128, bias=True)
    (final_embedding2): Linear(in_features=128, out_features=100, bias=True)
    (relu): ReLU(inplace=True)
    (sigmoid): Sigmoid()
    (tanh): Tanh()
    (title_embeddings): Embedding(1000, 100)
    (type_embeddings): Embedding(100, 100)
    (entity_num_embeddings): Embedding(100, 100)
    (attention_embedding_layer1): Linear(in_features=868, out_features=128, bias=True)
    (attention_embedding_layer2): Linear(in_features=128, out_features=1, bias=True)
    (softmax): Softmax(dim=-2)
  )
  (user_modeling): User_modeling(
    (news_embedding): N

In [22]:
from tqdm import tqdm
from utils.metrics import *

y_pred = []
start_list = list(range(0, 256, config['data_loader']['batch_size'])) # range(start, stop[, step])
for start in tqdm(start_list):
    if start + config['data_loader']['batch_size'] <= len(test_data['label']):
        end = start + config['data_loader']['batch_size']
    else:
        end = len(test_data['label'])
    # had to change 'user_id' to 'item1' and 'news_id' to 'item2' according to key declarations in utils.util load_data_mind function
    # out = model(test_data['item1'][start:end], test_data['item2'][start:end], config['data_loader']['batch_size'])[0].cpu().data.numpy()
    out = model(test_data['item1'][start:end], test_data['item2'][start:end], config['data_loader']['batch_size'])[0].cpu().data.numpy()
    y_pred.extend(out)
truth = test_data['label'][0:256]
score = evaluate(y_pred, truth, test_data, config['trainer']['task'])

100%|██████████| 1/1 [00:01<00:00,  1.97s/it]

auc score:0.7197786998616874
ndcg score:0.5517282238681521



