In [2]:
import pickle
import argparse
import numpy as np
import torch
import torch.nn as nn
from torch.utils.data import DataLoader
from torchtext.legacy import data
import dgl
import tqdm

import layers
import sampler as sampler_module
import evaluation
from model import *

In [3]:
from dataclasses import dataclass

@dataclass
class TrainArgs:
    output_model_path: str
    random_walk_length: int = 2
    random_walk_restart_prob: float = 0.5
    num_random_walks: int = 10
    num_neighbors: int = 5
    num_layers: int = 2
    hidden_dims: int = 16
    batch_size: int = 64
    device: str = 'cpu'
    num_epochs: int = 1
    batches_per_epoch: int = 10000
    num_workers: int = 0
    lr: float = 3e-5
    k: int = 10
    n_latest_items: int = 10

ML-1m

In [3]:
with open('data/data_ml.pkl', 'rb') as f:
    dataset = pickle.load(f)

In [4]:
user_adj = dataset['train-graph'].adj(scipy_fmt='coo', etype=dataset['user-to-item-type']).toarray()
dataset['train-graph'].nodes['user'].data['adj'] = torch.FloatTensor(user_adj)
dataset['train-graph'].nodes['user'].data['degree'] = torch.LongTensor(user_adj.sum(axis=1))

item_adj = dataset['train-graph'].adj(scipy_fmt='coo', etype=dataset['item-to-user-type']).toarray()
dataset['train-graph'].nodes['movie'].data['adj'] = torch.FloatTensor(item_adj)
dataset['train-graph'].nodes['movie'].data['degree'] = torch.LongTensor(item_adj.sum(axis=1))

In [5]:
args = TrainArgs(output_model_path = 'models_gen_features/model_ml',
                 num_neighbors = 6,
                 hidden_dims = 50,
                 batch_size = 64,
                 num_epochs = 2,
                 k = 20,
                 num_layers = 1,
                 batches_per_epoch = 10000,
                 device = 'cuda',
                 n_latest_items = 10,
                 lr = 0.0008057)

In [6]:
train(dataset, args)

100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10000/10000 [05:43<00:00, 29.07it/s]


Rec by latest item (0.08772350993377676, 0.07912824644171329, 0.2901599306733959) 
 Rec by N latest items (0.09483443708609389, 0.08203526719824232, 0.311434122070666)


100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10000/10000 [05:34<00:00, 29.86it/s]


Rec by latest item (0.09400662251655742, 0.07879238970342181, 0.29620124163425915) 
 Rec by N latest items (0.09908940397351068, 0.08154589990666435, 0.2926982094505844)
RESULT
Epoch 1, Rec by latest item: PR@1: 0.11456953642384106|REC@1: 0.0046649356547495795|NDCG@1: 0.017218543046357615
Epoch 1, Rec by 10 latest items: PR@1: 0.11241721854304636|REC@1: 0.00467687176325284|NDCG@1: 0.012913907284768211
Epoch 1, Rec by latest item: PR@5: 0.10910596026490109|REC@5: 0.022983010795197018|NDCG@5: 0.09699215640546625
Epoch 1, Rec by 10 latest items: PR@5: 0.1099337748344378|REC@5: 0.022421286392241946|NDCG@5: 0.10111698672342864
Epoch 1, Rec by latest item: PR@10: 0.1037417218543075|REC@10: 0.04394383829454656|NDCG@10: 0.18324194912458058
Epoch 1, Rec by 10 latest items: PR@10: 0.1067715231788107|REC@10: 0.044338759262031645|NDCG@10: 0.17911162919504287
Epoch 1, Rec by latest item: PR@15: 0.09814569536423827|REC@15: 0.0618238893394605|NDCG@15: 0.24271703633237848
Epoch 1, Rec by 10 latest ite

Ta Feng

In [3]:
with open('data/tafeng.pkl', 'rb') as f:
    dataset = pickle.load(f)

In [4]:
user_adj = dataset['train-graph'].adj(scipy_fmt='coo', etype=dataset['user-to-item-type']).toarray()
dataset['train-graph'].nodes['customer'].data['adj'] = torch.FloatTensor(user_adj)
dataset['train-graph'].nodes['customer'].data['degree'] = torch.LongTensor(user_adj.sum(axis=1))

item_adj = dataset['train-graph'].adj(scipy_fmt='coo', etype=dataset['item-to-user-type']).toarray()
dataset['train-graph'].nodes['product'].data['adj'] = torch.FloatTensor(item_adj)
dataset['train-graph'].nodes['product'].data['degree'] = torch.LongTensor(item_adj.sum(axis=1))

del user_adj, item_adj

In [5]:
args = TrainArgs(output_model_path = 'models_gen_features/model_tafeng',
                 num_neighbors = 6,
                 hidden_dims = 68,
                 batch_size = 64,
                 num_epochs = 7,
                 k = 20,
                 num_layers = 2,
                 batches_per_epoch = 10000,
                 device = 'cuda',
                 n_latest_items = 10,
                 lr = 0.000135177)

In [None]:
train(dataset, args)

  0%|                                                                                                                                  | 1/10000 [00:05<15:36:40,  5.62s/it]

Amazon Video Games

In [4]:
with open('data/amazon.pkl', 'rb') as f:
    dataset = pickle.load(f)

In [5]:
user_adj = dataset['train-graph'].adj(scipy_fmt='coo', etype=dataset['user-to-item-type']).toarray()
dataset['train-graph'].nodes['customer'].data['adj'] = torch.FloatTensor(user_adj)
dataset['train-graph'].nodes['customer'].data['degree'] = torch.LongTensor(user_adj.sum(axis=1))

item_adj = dataset['train-graph'].adj(scipy_fmt='coo', etype=dataset['item-to-user-type']).toarray()
dataset['train-graph'].nodes['product'].data['adj'] = torch.FloatTensor(item_adj)
dataset['train-graph'].nodes['product'].data['degree'] = torch.LongTensor(item_adj.sum(axis=1))

del user_adj, item_adj

In [8]:
args = TrainArgs(output_model_path = 'models_gen_features/model_tafeng',
                 num_neighbors = 11,
                 hidden_dims = 131,
                 batch_size = 64,
                 num_epochs = 2,
                 k = 20,
                 num_layers = 1,
                 batches_per_epoch = 10000,
                 device = 'cuda',
                 n_latest_items = 10,
                 lr = 0.00066586)

In [9]:
train(dataset, args)

100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10000/10000 [14:53<00:00, 11.20it/s]


Rec by latest item (0.005675969912905612, 0.04060238224080806, 0.004385516014173432) 
 Rec by N latest items (0.005576999208234203, 0.039191777760323905, 0.003306749583319696)


100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10000/10000 [15:22<00:00, 10.84it/s]


Rec by latest item (0.005584422011084555, 0.03970387366588802, 0.0038871441991331374) 
 Rec by N latest items (0.005574524940617414, 0.039169891650685476, 0.003736355034419339)
RESULT
Epoch 1, Rec by latest item: PR@1: 0.007125890736342043|REC@1: 0.002509207274646705|NDCG@1: 0.0
Epoch 1, Rec by 10 latest items: PR@1: 0.006383610451306413|REC@1: 0.0020656207191242816|NDCG@1: 0.0
Epoch 1, Rec by latest item: PR@5: 0.00733372921615204|REC@5: 0.013552521244464455|NDCG@5: 0.0006295540405407765
Epoch 1, Rec by 10 latest items: PR@5: 0.007076405384006361|REC@5: 0.01241376622068771|NDCG@5: 0.0006429134401433299
Epoch 1, Rec by latest item: PR@10: 0.006299485352335634|REC@10: 0.0227514676501216|NDCG@10: 0.0016114708310950192
Epoch 1, Rec by 10 latest items: PR@10: 0.006373713380839195|REC@10: 0.022263986558265697|NDCG@10: 0.001718093444287966
Epoch 1, Rec by latest item: PR@15: 0.005826075481657415|REC@15: 0.031020675426303188|NDCG@15: 0.002545825755458548
Epoch 1, Rec by 10 latest items: PR@15