In [1]:
import pickle
import argparse
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F 
from torch.utils.data import DataLoader
import torchtext
import dgl
import tqdm
import evaluation
from torchtext.legacy import data
from dgl.nn import GATConv
import optuna
from model import *
from sampler import *
from layers import *

In [2]:
from dataclasses import dataclass

@dataclass
class TrainArgs:
    model_output_path: str
    random_walk_length: int = 2
    random_walk_restart_prob: float = 0.5
    num_random_walks: int = 10
    num_neighbors: int = 3
    num_layers: int = 2
    num_heads: int = 2
    hidden_dims: int = 16
    batch_size: int = 64
    device: str = 'cuda'
    num_epochs: int = 1
    batches_per_epoch: int = 20000
    num_workers: int = 0
    lr: float = 3e-5
    k: int = 10
    agg_att: str = 'mean'
        
args = TrainArgs(model_output_path='model', num_epochs=3, hidden_dims=64, batches_per_epoch=10000, k=20, num_neighbors=7)

ML-1m

In [3]:
with open('data/data_ml.pkl', 'rb') as f:
    dataset = pickle.load(f)

In [4]:
g = dataset['train-graph']
val_matrix = dataset['val-matrix'].tocsr()
test_matrix = dataset['test-matrix'].tocsr()
item_texts = dataset['item-texts']
user_ntype = dataset['user-type']
item_ntype = dataset['item-type']
user_to_item_etype = dataset['user-to-item-type']
item_to_user_etype = dataset['item-to-user-type']
timestamp = dataset['timestamp-edge-column']
device = torch.device(args.device)
# Assign user and movie IDs and use them as features (to learn an individual trainable
# embedding for each entity)
g.nodes[user_ntype].data['id'] = torch.arange(g.number_of_nodes(user_ntype))
g.nodes[item_ntype].data['id'] = torch.arange(g.number_of_nodes(item_ntype))
# Prepare torchtext dataset and vocabulary
if item_texts is not None:
    fields = {}
    examples = []
    for key, texts in item_texts.items():
        fields[key] = data.Field(include_lengths=True, lower=True, batch_first=True)
    for i in range(g.number_of_nodes(item_ntype)):
        example = data.Example.fromlist(
            [item_texts[key][i] for key in item_texts.keys()],
            [(key, fields[key]) for key in item_texts.keys()])
        examples.append(example)
    textset = data.Dataset(examples, fields)
    for key, field in fields.items():
        field.build_vocab(getattr(textset, key))
        #field.build_vocab(getattr(textset, key), vectors='fasttext.simple.300d')
else:
    textset = None
# Sampler

In [5]:
def objective(trial):

    # 2. Suggest values of the hyperparameters using a trial object.
    n_layers = trial.suggest_int('n_layers', 1, 3)
    n_heads = trial.suggest_int('n_heads', 1, 3)
    hidden_dims = trial.suggest_int('hidden_dims', 32, 128)
    #num_epochs = trial.suggest_int('num_epochs', 3, 10)
    learning_rate = trial.suggest_float("learning_rate_init", 1e-5, 1e-3)
    num_neighbors = trial.suggest_int('num_neighbors', 1, 15)
    agg_att = trial.suggest_categorical("agg_att", ["mean", "concat"])
    
    batch_sampler =  UserToItemBatchSampler(g, user_ntype, item_ntype, args.batch_size)
    neighbor_sampler = NeighborSampler(g, user_ntype, item_ntype, num_neighbors, n_layers)
    collator = PinSAGECollator(neighbor_sampler, g, item_ntype, user_ntype, textset)
    dataloader = DataLoader(
        batch_sampler,
        collate_fn=collator.collate_train,
        num_workers=args.num_workers)

    dataloader_test_items = DataLoader(
        torch.arange(g.number_of_nodes(item_ntype)),
        batch_size=args.batch_size,
        collate_fn=collator.collate_items,
        num_workers=args.num_workers)

    dataloader_test_users = DataLoader(
        torch.arange(g.number_of_nodes(user_ntype)),
        batch_size=args.batch_size,
        collate_fn=collator.collate_users,
        num_workers=args.num_workers)

    dataloader_it = iter(dataloader)
    
    model = PinSAGEModel(g, item_ntype, user_ntype, 
                         user_to_item_etype, item_to_user_etype,
                         textset, None, 
                         hidden_dims, n_layers, n_heads, agg_att).to(device)
    
    opt = torch.optim.Adam(model.parameters(), lr=learning_rate)
    layers = []

    for epoch_id in range(2):
        model.train()
        for batch_id in tqdm.trange(args.batches_per_epoch):
            pos_graph, neg_graph, blocks = next(dataloader_it)
            # Copy to GPU
            for i in range(len(blocks)):
                blocks[i] = blocks[i].to(device)
            pos_graph = pos_graph.to(device)
            neg_graph = neg_graph.to(device)

            pos_res, neg_res = model(pos_graph, neg_graph, blocks)
            loss = warp_loss(pos_res, neg_res, num_labels=g.number_of_nodes(item_ntype), device=torch.device('cuda'))
            opt.zero_grad()
            loss.backward()
            opt.step()
            
        model.eval()
        with torch.no_grad():
            h_item_batches = []

            for blocks in dataloader_test_items:
                for i in range(len(blocks)):
                    blocks[i] = blocks[i].to(device)
                h_item_batches.append(model.get_repr(blocks))
            h_item = torch.cat(h_item_batches, 0)
        
            h_user_batches = []

            for blocks in dataloader_test_users:
                for i in range(len(blocks)):
                    blocks[i] = blocks[i].to(device)
                h_user_batches.append(model.get_repr(blocks))
            h_user = torch.cat(h_user_batches, 0)
            
            metrics = evaluation.evaluate_nn(dataset, h_item, h_user, args.k, args.batch_size)
            
    return metrics[0][2]

# 3. Create a study object and optimize the objective function.
study = optuna.create_study(direction='maximize')
study.optimize(objective, n_trials=1, timeout=10)

[32m[I 2022-05-29 15:09:30,597][0m A new study created in memory with name: no-name-91d01028-84bf-4a0a-9fd0-95d4862e6c84[0m
100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10000/10000 [09:11<00:00, 18.12it/s]
100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10000/10000 [09:56<00:00, 16.76it/s]
[32m[I 2022-05-29 15:29:19,010][0m Trial 0 finished with value: 0.30409112359971147 and parameters: {'n_layers': 1, 'n_heads': 3, 'hidden_dims': 60, 'learning_rate_init': 0.0005080637474804812, 'num_neighbors': 2, 'agg_att': 'mean'}. Best is trial 0 with value: 0.30409112359971147.[0m


Ta Feng

In [3]:
with open('data/tafeng.pkl', 'rb') as f:
    dataset = pickle.load(f)
    
g = dataset['train-graph']
val_matrix = dataset['val-matrix'].tocsr()
test_matrix = dataset['test-matrix'].tocsr()
item_texts = dataset['item-texts']
user_ntype = dataset['user-type']
item_ntype = dataset['item-type']
user_to_item_etype = dataset['user-to-item-type']
item_to_user_etype = dataset['item-to-user-type']
dataset['timestamp-edge-column'] = 'TRANSACTION_DT'
timestamp = dataset['timestamp-edge-column']
device = torch.device(args.device)
# Assign user and movie IDs and use them as features (to learn an individual trainable
# embedding for each entity)
g.nodes[user_ntype].data['id'] = torch.arange(g.number_of_nodes(user_ntype))
g.nodes[item_ntype].data['id'] = torch.arange(g.number_of_nodes(item_ntype))
# Prepare torchtext dataset and vocabulary
if item_texts is not None:
    fields = {}
    examples = []
    for key, texts in item_texts.items():
        fields[key] = data.Field(include_lengths=True, lower=True, batch_first=True)
    for i in range(g.number_of_nodes(item_ntype)):
        example = data.Example.fromlist(
            [item_texts[key][i] for key in item_texts.keys()],
            [(key, fields[key]) for key in item_texts.keys()])
        examples.append(example)
    textset = data.Dataset(examples, fields)
    for key, field in fields.items():
        field.build_vocab(getattr(textset, key))
        #field.build_vocab(getattr(textset, key), vectors='fasttext.simple.300d')
else:
    textset = None
# Sampler

args = TrainArgs(model_output_path='models/model_tafeng_optuna', num_epochs=3, hidden_dims=64, batches_per_epoch=10000, k=20, num_neighbors=7)

In [5]:
def objective(trial):

    # 2. Suggest values of the hyperparameters using a trial object.
    n_layers = trial.suggest_int('n_layers', 1, 3)
    n_heads = trial.suggest_int('n_heads', 1, 3)
    hidden_dims = trial.suggest_int('hidden_dims', 16, 64)
    learning_rate = trial.suggest_float("learning_rate_init", 1e-5, 1e-4)
    num_neighbors = trial.suggest_int('num_neighbors', 1, 15)
    agg_att = trial.suggest_categorical("agg_att", ["mean", "concat"])
    
    batch_sampler =  UserToItemBatchSampler(g, user_ntype, item_ntype, args.batch_size)
    neighbor_sampler = NeighborSampler(g, user_ntype, item_ntype, num_neighbors, n_layers)
    collator = PinSAGECollator(neighbor_sampler, g, item_ntype, user_ntype, textset)
    dataloader = DataLoader(
        batch_sampler,
        collate_fn=collator.collate_train,
        num_workers=args.num_workers)

    dataloader_test_items = DataLoader(
        torch.arange(g.number_of_nodes(item_ntype)),
        batch_size=args.batch_size,
        collate_fn=collator.collate_items,
        num_workers=args.num_workers)

    dataloader_test_users = DataLoader(
        torch.arange(g.number_of_nodes(user_ntype)),
        batch_size=args.batch_size,
        collate_fn=collator.collate_users,
        num_workers=args.num_workers)

    dataloader_it = iter(dataloader)
    
    model = PinSAGEModel(g, item_ntype, user_ntype, 
                         user_to_item_etype, item_to_user_etype,
                         textset, None, 
                         hidden_dims, n_layers, n_heads, agg_att).to(device)
    
    opt = torch.optim.Adam(model.parameters(), lr=learning_rate)
    layers = []

    for epoch_id in range(2):
        model.train()
        for batch_id in tqdm.trange(args.batches_per_epoch):
            pos_graph, neg_graph, blocks = next(dataloader_it)
            # Copy to GPU
            for i in range(len(blocks)):
                blocks[i] = blocks[i].to(device)
            pos_graph = pos_graph.to(device)
            neg_graph = neg_graph.to(device)

            pos_res, neg_res = model(pos_graph, neg_graph, blocks)
            loss = warp_loss(pos_res, neg_res, num_labels=g.number_of_nodes(item_ntype), device=torch.device('cuda'))
            opt.zero_grad()
            loss.backward()
            opt.step()
            
        model.eval()
        with torch.no_grad():
            h_item_batches = []

            for blocks in dataloader_test_items:
                for i in range(len(blocks)):
                    blocks[i] = blocks[i].to(device)
                h_item_batches.append(model.get_repr(blocks))
            h_item = torch.cat(h_item_batches, 0)
        
            h_user_batches = []

            for blocks in dataloader_test_users:
                for i in range(len(blocks)):
                    blocks[i] = blocks[i].to(device)
                h_user_batches.append(model.get_repr(blocks))
            h_user = torch.cat(h_user_batches, 0)
            
            metrics = evaluation.evaluate_nn(dataset, h_item, h_user, args.k, args.batch_size)
            
    return metrics[0][2]

# 3. Create a study object and optimize the objective function.
study = optuna.create_study(direction='maximize')
study.optimize(objective, n_trials=1, timeout=10)

[32m[I 2022-05-29 15:59:54,979][0m A new study created in memory with name: no-name-23160220-0af7-4347-9f4b-9490f2e16850[0m
100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10000/10000 [15:24<00:00, 10.82it/s]
100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10000/10000 [16:32<00:00, 10.07it/s]
[32m[I 2022-05-29 16:36:56,293][0m Trial 0 finished with value: 0.00425680151852227 and parameters: {'n_layers': 3, 'n_heads': 3, 'hidden_dims': 50, 'learning_rate_init': 2.6397421055106022e-05, 'num_neighbors': 12, 'agg_att': 'mean'}. Best is trial 0 with value: 0.00425680151852227.[0m


Amazon Video Games

In [6]:
with open('data/amazon.pkl', 'rb') as f:
    dataset = pickle.load(f)
    
g = dataset['train-graph']
val_matrix = dataset['val-matrix'].tocsr()
test_matrix = dataset['test-matrix'].tocsr()
item_texts = dataset['item-texts']
user_ntype = dataset['user-type']
item_ntype = dataset['item-type']
user_to_item_etype = dataset['user-to-item-type']
item_to_user_etype = dataset['item-to-user-type']
timestamp = dataset['timestamp-edge-column']
device = torch.device(args.device)
# Assign user and movie IDs and use them as features (to learn an individual trainable
# embedding for each entity)
g.nodes[user_ntype].data['id'] = torch.arange(g.number_of_nodes(user_ntype))
g.nodes[item_ntype].data['id'] = torch.arange(g.number_of_nodes(item_ntype))
# Prepare torchtext dataset and vocabulary
if item_texts is not None:
    fields = {}
    examples = []
    for key, texts in item_texts.items():
        fields[key] = data.Field(include_lengths=True, lower=True, batch_first=True)
    for i in range(g.number_of_nodes(item_ntype)):
        example = data.Example.fromlist(
            [item_texts[key][i] for key in item_texts.keys()],
            [(key, fields[key]) for key in item_texts.keys()])
        examples.append(example)
    textset = data.Dataset(examples, fields)
    for key, field in fields.items():
        field.build_vocab(getattr(textset, key))
        #field.build_vocab(getattr(textset, key), vectors='fasttext.simple.300d')
else:
    textset = None
# Sampler

args = TrainArgs(model_output_path='models/model_amazon_optuna', num_epochs=3, hidden_dims=64, batches_per_epoch=10000, k=20, num_neighbors=7)

In [7]:
def objective(trial):

    # 2. Suggest values of the hyperparameters using a trial object.
    n_layers = trial.suggest_int('n_layers', 1, 3)
    n_heads = trial.suggest_int('n_heads', 1,3)
    hidden_dims = trial.suggest_int('hidden_dims', 32, 150)
    #num_epochs = trial.suggest_int('num_epochs', 3, 10)
    learning_rate = trial.suggest_float("learning_rate_init", 1e-5, 1e-3)
    num_neighbors = trial.suggest_int('num_neighbors', 1, 15)
    agg_att = trial.suggest_categorical("agg_att", ["mean", "concat"])
    
    batch_sampler =  UserToItemBatchSampler(g, user_ntype, item_ntype, args.batch_size)
    neighbor_sampler = NeighborSampler(g, user_ntype, item_ntype, num_neighbors, n_layers)
    collator = PinSAGECollator(neighbor_sampler, g, item_ntype, user_ntype, textset)
    dataloader = DataLoader(
        batch_sampler,
        collate_fn=collator.collate_train,
        num_workers=args.num_workers)

    dataloader_test_items = DataLoader(
        torch.arange(g.number_of_nodes(item_ntype)),
        batch_size=args.batch_size,
        collate_fn=collator.collate_items,
        num_workers=args.num_workers)

    dataloader_test_users = DataLoader(
        torch.arange(g.number_of_nodes(user_ntype)),
        batch_size=args.batch_size,
        collate_fn=collator.collate_users,
        num_workers=args.num_workers)

    dataloader_it = iter(dataloader)
    
    model = PinSAGEModel(g, item_ntype, user_ntype, 
                         user_to_item_etype, item_to_user_etype,
                         textset, None, 
                         hidden_dims, n_layers, n_heads, agg_att).to(device)
    
    opt = torch.optim.Adam(model.parameters(), lr=learning_rate)
    layers = []

    for epoch_id in range(2):
        model.train()
        for batch_id in tqdm.trange(args.batches_per_epoch):
            pos_graph, neg_graph, blocks = next(dataloader_it)
            # Copy to GPU
            for i in range(len(blocks)):
                blocks[i] = blocks[i].to(device)
            pos_graph = pos_graph.to(device)
            neg_graph = neg_graph.to(device)

            pos_res, neg_res = model(pos_graph, neg_graph, blocks)
            loss = warp_loss(pos_res, neg_res, num_labels=g.number_of_nodes(item_ntype), device=torch.device('cuda'))
            opt.zero_grad()
            loss.backward()
            opt.step()
            
        model.eval()
        with torch.no_grad():
            h_item_batches = []

            for blocks in dataloader_test_items:
                for i in range(len(blocks)):
                    blocks[i] = blocks[i].to(device)
                h_item_batches.append(model.get_repr(blocks))
            h_item = torch.cat(h_item_batches, 0)
        
            h_user_batches = []

            for blocks in dataloader_test_users:
                for i in range(len(blocks)):
                    blocks[i] = blocks[i].to(device)
                h_user_batches.append(model.get_repr(blocks))
            h_user = torch.cat(h_user_batches, 0)
            
            metrics = evaluation.evaluate_nn(dataset, h_item, h_user, args.k, args.batch_size)
            
    return metrics[0][2]

# 3. Create a study object and optimize the objective function.
study = optuna.create_study(direction='maximize')
study.optimize(objective, n_trials=1, timeout=10)

[32m[I 2022-05-29 16:37:51,098][0m A new study created in memory with name: no-name-6df787d4-baea-44b3-8930-a7d7458a8093[0m
100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10000/10000 [06:14<00:00, 26.68it/s]
100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10000/10000 [05:24<00:00, 30.83it/s]
[32m[I 2022-05-29 16:51:22,055][0m Trial 0 finished with value: 0.012539248013373904 and parameters: {'n_layers': 1, 'n_heads': 2, 'hidden_dims': 50, 'learning_rate_init': 0.00023437332472167507, 'num_neighbors': 6, 'agg_att': 'mean'}. Best is trial 0 with value: 0.012539248013373904.[0m
