In [1]:
import sys

In [2]:
!{sys.executable} -m pip install torch
!{sys.executable} -m pip install torch-scatter -f https://data.pyg.org/whl/torch-1.10.2+cu102.html
!{sys.executable} -m pip install torch-sparse -f https://data.pyg.org/whl/torch-1.10.2+cu102.html
!{sys.executable} -m pip install torch-geometric
!{sys.executable} -m pip install ray
!{sys.executable} -m pip install "ray[tune]"
!{sys.executable} -m pip install -Uqq ipdb

Looking in links: https://data.pyg.org/whl/torch-1.10.2+cu102.html
Looking in links: https://data.pyg.org/whl/torch-1.10.2+cu102.html




In [3]:
module_path = "/home/ec2-user/SageMaker/sb-rec-system"
if module_path not in sys.path:
    sys.path.append(module_path)
    
from algorithms.srgnn_benchmark.srgnn_light.model import *
import matplotlib.pyplot as plt

In [7]:
def train(config):
    model = SRGNN(config["hidden_dim"], config["num_items"], config["model_type"], config["K"], config["normalize"])

    device = "cpu"
    if torch.cuda.is_available():
        device = "cuda:0"
        if torch.cuda.device_count() > 1:
            model = nn.DataParallel(model)
    model.to(device)

    criterion = nn.CrossEntropyLoss()

    optimizer = torch.optim.Adam(model.parameters(),
                                 lr=config["lr"],
                                 weight_decay=config["l2_penalty"])

    scheduler = optim.lr_scheduler.StepLR(optimizer,
                                          step_size=config["step"],
                                          gamma=config["weight_decay"])
    
    train_dataset = GraphDataset('/home/ec2-user/SageMaker/sb-rec-system/algorithms/srgnn_benchmark/data/diginetica', 'train')
    '/home/ec2-user/SageMaker/sb-rec-system/algorithms/srgnn_benchmark/data/diginetica/raw'

    train_loader = pyg_data.DataLoader(train_dataset,
                                       batch_size=config["batch_size"],
                                       shuffle=False,
                                       drop_last=False)
    
    val_dataset = GraphDataset('/home/ec2-user/SageMaker/sb-rec-system/algorithms/srgnn_benchmark/data/diginetica', 'test')


    val_loader = pyg_data.DataLoader(val_dataset,
                                     batch_size=config["batch_size"],
                                     shuffle=False,
                                     drop_last=False)
    # Train
    losses = []
    test_accs = []
    test_mrr = []
    top_k_accs = []
    top_k_mrrs = []

    best_acc = 0
    best_model = None

    for epoch in range(config["epochs"]):
        total_loss = 0
        model.train()
        for _, batch in enumerate(tqdm(train_loader)):
            batch.to('cpu')
            optimizer.zero_grad()

            pred = model(batch)
            label = batch.y
            loss = criterion(pred, label)

            loss.backward()
            optimizer.step()
            total_loss += loss.item() * batch.num_graphs

        total_loss /= len(train_loader.dataset)
        losses.append(total_loss)

        scheduler.step()

           
    checkpoint_dir = '/home/ec2-user/SageMaker/sb-rec-system/algorithms/srgnn_benchmark/trained_models'
    path = os.path.join(checkpoint_dir, f"srgnn_light_diginetica")
    torch.save(model.state_dict(), path)

In [9]:
#config diginetica from hyper
config = {'l2_penalty': 1e-05, 
          'lr': 0.003268958436232341, 
          'epochs': 5, 
          'batch_size': 128, 
          'hidden_dim': 50, 
          'step': 1, 
          'weight_decay': 0.1,
          'K': 1, 
          'num_items': 43098, 
          'model_type': 'lightgcn', 
          'normalize': True}

In [5]:
# # config yoochoose yoochoose1_64 dataset and yoochoose1_4
# config = {'l2_penalty': 1e-05, 'lr': 0.0033144396293236108, 'epochs': 5, 'batch_size': 128, 'hidden_dim': 125, 'step': 3, 'weight_decay': 0.1, 'K': 1, 'num_items': 37484, 'model_type': 'lightgcn', 'normalize': True}

In [10]:
train(config)

100%|██████████| 5621/5621 [10:59<00:00,  8.53it/s]
100%|██████████| 5621/5621 [11:43<00:00,  7.99it/s]
100%|██████████| 5621/5621 [11:45<00:00,  7.96it/s]
100%|██████████| 5621/5621 [11:44<00:00,  7.98it/s]
100%|██████████| 5621/5621 [11:47<00:00,  7.94it/s]


In [None]:
# #best config settings from paper
# config={
#     'l2_penalty': 1e-05, 
#     'lr': 0.001, 
#     'epochs': 10, 
#     'batch_size': 100, 
#     'hidden_dim': 100, 
#     'step': 3, 
#     'weight_decay': 0.1,
#     'num_items': 43098, 
# }

In [None]:
test_accs, top_k_accs, top_k_mrrs, losses, best_model, best_acc, test_loader = train(config) 

print(f'test_accs : {max(test_accs)}, top_k_accs : {max(top_k_accs)}, top_k_mrrs : {top_k_mrrs}')
print("Maximum test set accuracy: {0}".format(max(test_accs)))
print("Minimum loss: {0}".format(min(losses)))

plt.plot(losses, label="training loss" + " - ")
plt.plot(test_accs, label="test accuracy" + " - ")
plt.legend()
fig = plt.gcf()
fig.savefig('train_loss_test_acc_diginetica_srgnn_light.pdf')
plt.show()