# imports

In [None]:
import numpy as np
import pandas as pd
import os.path
from os import path
import tensorflow as tf
from argparse import ArgumentParser
from batch import get_batch_test_data
from evaluate import evaluate
import heapq
from NAIS import NAIS # change to import NAIS
import json
from dataset import DataSet 

#utils

In [None]:
#  convert json dict to an args object 
class obj(object):
    def __init__(self, dict_):
        self.__dict__.update(dict_)

def dict2obj(d):
    return json.loads(json.dumps(d), object_hook=obj)

In [None]:
# for each user evaluate the hr, ndcg and mrr at K, save csv of avarage scores based all users
def predict_and_get_results():
  hits, ndcgs, mrrs = [], [], []
  test_rank_list_data = []
  for batch_id in range(dataset.num_users):
      user_input, item_input, test_item, n_u = get_batch_test_data(batch_id=batch_id,
                                                                    dataset=dataset)
      predictions = nais.predict(user_input=user_input,
                                  item_input=item_input,
                                  num_idx=n_u)
      map_item_score = {}
      for i in range(len(item_input)):
          item = item_input[i]
          map_item_score[item] = predictions[i]

      rank_list = heapq.nlargest(topN, map_item_score, key=map_item_score.get)
      test_rank_list_data.append([batch_id, test_item, rank_list])
      hit, ndcg, mrr = evaluate(rank_list, test_item)
      hits.append(hit)
      ndcgs.append(ndcg)
      mrrs.append(mrr)
  test_hr, test_ndcg, test_mrr = np.array(hits).mean(), np.array(ndcgs).mean(), np.array(mrrs).mean()
  print(f'data_set_name:{args.data_set_name}, model: {args.checkpoint_name}, HR@{topN}: {test_hr}, NDCG@{topN}: {test_ndcg}, MRR@{topN}: {test_mrr}')
  test_rank_list_data_df = pd.DataFrame(test_rank_list_data, columns=['user', 'ranked_item', 'predicted_list'])
  if not path.exists(args.save_pred_path):
      os.makedirs(args.save_pred_path)
  test_rank_list_data_df.to_csv(f'{args.save_pred_path}/predictions.csv', index=False)


# MovieLens

In [None]:
args = {
    'path': '/data', 
    'data_set_name': 'ml-1m',
    'topN': 20,
    'checkpoint_name' : 'NAIS_1624547999',
    'save_pred_path' : '/predictions/NAIS/ml-1m/32-prod',
    'epochs' : 60,
    'num_neg' : 4,
    'pretrain' : 1,
    'embedding_size' : 32,
    'attention_factor' : 16,
    'algorithm' : 'prod',
    'lr' : 0.01,
    'beta' : 0.5,
    'regs' : '(1e-7, 1e-7, 1e-5, 1e-7, 1e-7)',
    'verbose' : 1,
    'out' : 1
}

args = dict2obj(args)
topN = args.topN

In [None]:
dataset = DataSet(path=args.path,
                      data_set_name=args.data_set_name)

already load the trainList...


## 32 embedding_size

In [None]:
nais = NAIS(num_users=dataset.num_users,
            num_items=dataset.num_items,
            args=args)
optimizer = tf.keras.optimizers.Adagrad(learning_rate=args.lr,
                                        initial_accumulator_value=1e-8)
checkpoint = tf.train.Checkpoint(model=nais,
                                  optimizer=optimizer)
manager = tf.train.CheckpointManager(checkpoint,
                                      directory=f'/NAIS_pretrain/ml-1m/32',
                                      checkpoint_name='{args.checkpoint_name}.ckpt',
                                      max_to_keep=1)
checkpoint.restore(manager.latest_checkpoint)

In [None]:
predict_and_get_results()

data_set_name:ml-1m, model: NAIS_1624547999, HR@20: 0.8370860927152318, NDCG@20: 0.45807190314259716, MRR@20: 0.34767224905658956


## 16 embedding_size

In [None]:
args = {
    'path': '/data', 
    'data_set_name': 'ml-1m',
    'topN': 20,
    'checkpoint_name' : 'NAIS_1624541449',
    'save_pred_path' : '/predictions/NAIS/ml-1m/16-prod',
    'epochs' : 60,
    'num_neg' : 4,
    'pretrain' : 1,
    'embedding_size' : 16,
    'attention_factor' : 16,
    'algorithm' : 'prod',
    'lr' : 0.01,
    'beta' : 0.5,
    'regs' : '(1e-7, 1e-7, 1e-5, 1e-7, 1e-7)',
    'verbose' : 1,
    'out' : 1
}

args = dict2obj(args)
topN = args.topN

In [None]:
nais = NAIS(num_users=dataset.num_users,
            num_items=dataset.num_items,
            args=args)
optimizer = tf.keras.optimizers.Adagrad(learning_rate=args.lr,
                                        initial_accumulator_value=1e-8)
checkpoint = tf.train.Checkpoint(model=nais,
                                  optimizer=optimizer)
manager = tf.train.CheckpointManager(checkpoint,
                                      directory=f'/NAIS_pretrain/ml-1m/16', # cahnge
                                      checkpoint_name='{args.checkpoint_name}.ckpt',
                                      max_to_keep=1)
checkpoint.restore(manager.latest_checkpoint)

In [None]:
predict_and_get_results()

## 64 embedding_size

In [None]:
args = {
    'path': '/data',
    'data_set_name': 'ml-1m',
    'topN': 20,
    'checkpoint_name' : 'NAIS_1624807076',
    'save_pred_path' : '/predictions/NAIS/ml-1m/64-prod',
    'epochs' : 60,
    'num_neg' : 4,
    'pretrain' : 1,
    'embedding_size' : 64,
    'attention_factor' : 64,
    'algorithm' : 'prod',
    'lr' : 0.01,
    'beta' : 0.5,
    'regs' : '(1e-7, 1e-7, 1e-5, 1e-7, 1e-7)',
    'verbose' : 1,
    'out' : 1
}

args = dict2obj(args)

In [None]:
nais = NAIS(num_users=dataset.num_users,
            num_items=dataset.num_items,
            args=args)
optimizer = tf.keras.optimizers.Adagrad(learning_rate=args.lr,
                                        initial_accumulator_value=1e-8)
checkpoint = tf.train.Checkpoint(model=nais,
                                  optimizer=optimizer)
manager = tf.train.CheckpointManager(checkpoint,
                                      directory=f'/NAIS_pretrain/ml-1m/64',
                                      checkpoint_name='{args.checkpoint_name}.ckpt',
                                      max_to_keep=1)
checkpoint.restore(manager.latest_checkpoint)

<tensorflow.python.training.tracking.util.CheckpointLoadStatus at 0x7f5bd90aad90>

In [None]:
predict_and_get_results()

data_set_name:ml-1m, model: NAIS_1624807076, HR@20: 0.8375827814569536, NDCG@20: 0.45769023349891197, MRR@20: 0.34734078727893825


# pinterest

In [None]:
args = {
    'path': 'data',
    'data_set_name': 'pinterest-20',
    'topN': 20,
    'checkpoint_name' : 'NAIS_1624607273',
    'save_pred_path' : '/predictions/NAIS/pinterest-20/16-prod',
    'epochs' : 60,
    'num_neg' : 4,
    'pretrain' : 1,
    'embedding_size' : 16,
    'attention_factor' : 16,
    'algorithm' : 'prod',
    'lr' : 0.01,
    'beta' : 0.5,
    'regs' : '(1e-7, 1e-7, 1e-5, 1e-7, 1e-7)',
    'verbose' : 1,
    'out' : 1
}

args = dict2obj(args)
topN = args.topN

In [None]:
dataset = DataSet(path=args.path,
                      data_set_name=args.data_set_name)

already load the trainList...


## 16 embedding_size

In [None]:
nais = NAIS(num_users=dataset.num_users,
            num_items=dataset.num_items,
            args=args)
optimizer = tf.keras.optimizers.Adagrad(learning_rate=args.lr,
                                        initial_accumulator_value=1e-8)
checkpoint = tf.train.Checkpoint(model=nais,
                                  optimizer=optimizer)
manager = tf.train.CheckpointManager(checkpoint,
                                      directory=f'/NAIS_pretrain/pinterest-20/16',
                                      checkpoint_name='{args.checkpoint_name}.ckpt',
                                      max_to_keep=1)
checkpoint.restore(manager.latest_checkpoint)

<tensorflow.python.training.tracking.util.CheckpointLoadStatus at 0x7f5be5999e90>

In [None]:
predict_and_get_results()

data_set_name:pinterest-20, model: NAIS_1624607273, HR@20: 0.964194466088028, NDCG@20: 0.5796277153608185, MRR@20: 0.46368349984519325


## 32 embedding_size

In [None]:
args = {
    'path': '/data',
    'data_set_name': 'pinterest-20',
    'topN': 20,
    'checkpoint_name' : 'NAIS_1625224145',
    'save_pred_path' : '/predictions/NAIS/pinterest-20/32-prod',
    'epochs' : 60,
    'num_neg' : 4,
    'pretrain' : 1,
    'embedding_size' : 32,
    'attention_factor' : 32,
    'algorithm' : 'prod',
    'lr' : 0.01,
    'beta' : 0.5,
    'regs' : '(1e-7, 1e-7, 1e-5, 1e-7, 1e-7)',
    'verbose' : 1,
    'out' : 1
}

args = dict2obj(args)
topN = args.topN

In [None]:
nais = NAIS(num_users=dataset.num_users,
            num_items=dataset.num_items,
            args=args)
optimizer = tf.keras.optimizers.Adagrad(learning_rate=args.lr,
                                        initial_accumulator_value=1e-8)
checkpoint = tf.train.Checkpoint(model=nais,
                                  optimizer=optimizer)
manager = tf.train.CheckpointManager(checkpoint,
                                      directory=f'/NAIS_pretrain/pinterest-20/32',
                                      checkpoint_name='{args.checkpoint_name}.ckpt',
                                      max_to_keep=1)
checkpoint.restore(manager.latest_checkpoint)

<tensorflow.python.training.tracking.util.CheckpointLoadStatus at 0x7f5a9b896e50>

In [None]:
predict_and_get_results()

data_set_name:pinterest-20, model: NAIS_1625224145, HR@20: 0.9656984434740066, NDCG@20: 0.5812947137794424, MRR@20: 0.46525380984964393


## 64 embedding_size

In [None]:
args = {
    'path': '/data',
    'data_set_name': 'pinterest-20',
    'topN': 20,
    'checkpoint_name' : 'NAIS_1624604790',
    'save_pred_path' : '/predictions/NAIS/pinterest-20/64-prod',
    'epochs' : 60,
    'num_neg' : 4,
    'pretrain' : 1,
    'embedding_size' : 64,
    'attention_factor' : 64,
    'algorithm' : 'prod',
    'lr' : 0.01,
    'beta' : 0.5,
    'regs' : '(1e-7, 1e-7, 1e-5, 1e-7, 1e-7)',
    'verbose' : 1,
    'out' : 1
}

args = dict2obj(args)
topN = args.topN

In [None]:
nais = NAIS(num_users=dataset.num_users,
            num_items=dataset.num_items,
            args=args)
optimizer = tf.keras.optimizers.Adagrad(learning_rate=args.lr,
                                        initial_accumulator_value=1e-8)
checkpoint = tf.train.Checkpoint(model=nais,
                                  optimizer=optimizer)
manager = tf.train.CheckpointManager(checkpoint,
                                      directory=f'/NAIS_pretrain/pinterest-20/64',
                                      checkpoint_name='{args.checkpoint_name}.ckpt',
                                      max_to_keep=1)
checkpoint.restore(manager.latest_checkpoint)

<tensorflow.python.training.tracking.util.CheckpointLoadStatus at 0x7f5bd90b2950>

In [None]:
predict_and_get_results()

data_set_name:pinterest-20, model: NAIS_1624604790, HR@20: 0.9658977657781724, NDCG@20: 0.582161199884665, MRR@20: 0.46655216974303615
