In [16]:
import sys
!{sys.executable} -m pip install torch
!{sys.executable} -m pip install torch-scatter -f https://data.pyg.org/whl/torch-1.10.2+cu102.html
!{sys.executable} -m pip install torch-sparse -f https://data.pyg.org/whl/torch-1.10.2+cu102.html
!{sys.executable} -m pip install torch-geometric
!{sys.executable} -m pip install ray
!{sys.executable} -m pip install "ray[tune]"
!{sys.executable} -m pip install -Uqq ipdb

Looking in links: https://data.pyg.org/whl/torch-1.10.2+cu102.html
Looking in links: https://data.pyg.org/whl/torch-1.10.2+cu102.html




In [17]:
import pickle
import os
import pandas as pd
import sys
from itertools import chain
from collections import Counter

module_path = "/home/ec2-user/SageMaker/sb-rec-system"
if module_path not in sys.path:
    sys.path.append(module_path)

from algorithms.srgnn_benchmark.srgnn_light.model import *

In [18]:
def test(loader, test_model, dataset, train_data, latencies_out_file, pred_out_file):
    test_model.eval()
    
    metrics = [NDCG(), MAP(), Precision(), Recall(), HitRate(), MRR()
    , Coverage(training_df=train_data)
    , Popularity(training_df=train_data)]
    
    predictions_writer = PredictionsWriter(outputfilename=pred_out_file, evaluation_n=20)
    latency_writer = LatencyWriter(latencies_out_file)
    
    prediction_sw = SimpleStopwatch()
    
    score_exp = []
    correct = 0
    for _, data in enumerate(loader):
        data.to('cpu') #initial cuda
        with torch.no_grad():
            prediction_sw.start()
            score = test_model(data)
            pred = score.max(dim=1)[1]
            label = data.y
            next_items = data.y_next
            
            prediction_sw.stop(0)

        correct += pred.eq(label).sum().item()

        sub_scores = score.topk(20)[1]
        sub_scores = sub_scores.cpu().detach().numpy()

        for ele in range(sub_scores.shape[0]):
            top_k_pred = sub_scores[ele]
            recommendations = pd.Series(0.0, top_k_pred)
            
            
            if isinstance(next_items[ele], int) == True:
                n_item = [next_items[ele]]
            else:
                n_item = next_items[ele]

            predictions_writer.appendline(recommendations, n_item)
            

            for metric in metrics:
                metric.add(recommendations, np.array(n_item))
                
    scores = []
    for metric in metrics:
        metric_name, score = metric.result()
        scores.append("%.4f" % score)
        print(metric_name, "%.4f" % score)
        
    predictions_writer.close()
    score_exp.append(scores)
        
        
    for (position, latency) in prediction_sw.get_prediction_latencies_in_micros():
        latency_writer.append_line(position, latency)
    latency_writer.close()
    
        
    mtr = ['NDCG@20, MAP@20, Precision@20, Recall@20, HitRate@20, MRR@20',' Coverage@20', 'Popularity@20' ]
    with open(f'results/{dataset}.csv', 'a+') as f:
        f.write(",".join(str(item) for item in mtr))
        f.write('\n')
        f.write(",".join(str(item) for item in scores))

In [4]:
yoochoose = {'l2_penalty': 1e-05, 
          'lr': 0.0033144396293236108, 
          'epochs': 5, 
          'batch_size': 1, 
          'hidden_dim': 125, 
          'step': 3, 
          'weight_decay': 0.1, 
          'K': 1, 
          'num_items': 37484, 
          'model_type': 'lightgcn', 
          'normalize': True}

In [5]:
diginetica = {'l2_penalty': 1e-05, 
          'lr': 0.003268958436232341, 
          'epochs': 5, 
          'batch_size': 1, 
          'hidden_dim': 50, 
          'step': 1, 
          'weight_decay': 0.1,
          'K': 1, 
          'num_items': 43098, 
          'model_type': 'lightgcn', 
          'normalize': True}

In [19]:
postnl = {'l2_penalty': 1e-10, 
          'lr': 0.010683499200047174, 
          'epochs': 5, 
          'batch_size': 1, 
          'hidden_dim': 100, 
          'step': 3, 
          'weight_decay': 0.1, 
          'K': 1, 
          'num_items': None, 
          'model_type': 'lightgcn', 
          'normalize': True
         }

In [20]:
data_dir = '/home/ec2-user/SageMaker/sb-rec-system/data/prepared/srgnn/'
model_dir = '/home/ec2-user/SageMaker/sb-rec-system/algorithms/srgnn_benchmark/trained_models'

items = [463,488,522]
for slice in range(3):
    
    dataset = f'srgnn_light_postnl_{slice}'
    
    config = postnl
    nitems = items[slice]
    config.update({"num_items" : nitems})
    
    train_data = pickle.load(open(data_dir + f'item_views_tra_seqs.{slice}.txt', 'rb'))
    lst = list(chain(*train_data))
    train_data = pd.DataFrame (lst, columns = ['ItemId'])
    
    latencies_out_file = f'latencies/latencies_{dataset}.txt'
    pred_out_file = f'predictions/predictions_{dataset}.txt'
    
    test_dataset = GraphDataset(data_dir, f'item_views_test.{slice}')
    
    test_loader = pyg_data.DataLoader(test_dataset,
                             batch_size=config["batch_size"],
                             shuffle=False,
                             drop_last=False)

    
    best_model = SRGNN(config["hidden_dim"], config["num_items"], config["model_type"], config["K"], config["normalize"])
    path = os.path.join(model_dir, f"srgnn_light_postnl_{slice}")
    best_model.load_state_dict(torch.load(path, map_location=torch.device('cpu')))
    test(test_loader, best_model, dataset, train_data, latencies_out_file, pred_out_file)

NDCG@20:  0.4799
MAP@20:  0.0481
Precision@20:  0.0811
Recall@20:  0.6966
HitRate@20:  0.7642
MRR@20:  0.3563
Coverage@20:  0.9459
Popularity@20:  0.2370
NDCG@20:  0.4338
MAP@20:  0.0438
Precision@20:  0.0736
Recall@20:  0.6402
HitRate@20:  0.7029
MRR@20:  0.3249
Coverage@20:  0.9589
Popularity@20:  0.2241
NDCG@20:  0.4507
MAP@20:  0.0452
Precision@20:  0.0753
Recall@20:  0.6666
HitRate@20:  0.7417
MRR@20:  0.3362
Coverage@20:  0.9482
Popularity@20:  0.0932


In [8]:
#model directory
dataset = 'yoochoose1_64'
model_dir = '/home/ec2-user/SageMaker/sb-rec-system/algorithms/srgnn_benchmark/trained_models'
data_dir = f'/home/ec2-user/SageMaker/sb-rec-system/algorithms/srgnn_benchmark/data/{dataset}'



#best hyper
config = yoochoose


train_data = pickle.load(open(data_dir + '/raw/all_train_seq.txt', 'rb'))
lst = list(chain(*train_data))
train_data = pd.DataFrame (lst, columns = ['ItemId'])

latencies_out_file = f'latencies/latencies_{dataset}.txt'
pred_out_file = f'predictions/predictions_{dataset}.txt'


test_dataset = GraphDataset(data_dir, 'test')


test_loader = pyg_data.DataLoader(test_dataset,
                             batch_size=config["batch_size"],
                             shuffle=False,
                             drop_last=False)


best_model = SRGNN(config["hidden_dim"], config["num_items"], config["model_type"], config["K"], config["normalize"])
path = os.path.join(model_dir, f"srgnn_light_{dataset}")
best_model.load_state_dict(torch.load(path))
test(test_loader, best_model, dataset, train_data, latencies_out_file, pred_out_file)

KeyboardInterrupt: 