# ABOUT 
- this notebook trains LightGCN on Modcloth dataset and saves the model

In [1]:
from recommenders.models.sar import SAR
from recommenders.evaluation.python_evaluation import precision_at_k

import pandas as pd
import os
from scipy.sparse import save_npz, load_npz
from numpy import save, load 
import json
import yaml

### read data

In [3]:
path = r"C:\Users\tanch\Documents\NTU\NTU Year 4\FYP - GNN\data\RecSys Datasets\train.pkl"
train = pd.read_pickle(path) 
path = r"C:\Users\tanch\Documents\NTU\NTU Year 4\FYP - GNN\data\RecSys Datasets\test.pkl"
test = pd.read_pickle(path)

In [5]:
len(train), len(test)

(10565, 3589)

### Load model

In [3]:
with open("sar.yml", "r") as f:
    try:
        config = yaml.safe_load(f)
    except yaml.YAMLError as exc:
        print(exc)

In [8]:
# Similarity types: ["cooccurrence" | "cosine" | "inclusion index" | "jaccard" | "lift" | "mutual information" | "lexicographers mutual information"]
model_sar = SAR(
    col_user=config['COL_USER'],
    col_item=config['COL_ITEM'],
    col_rating=config['COL_RATING'],
    similarity_type=config['SIMILARITY_TYPE'], 
    time_decay_coefficient=30, 
    timedecay_formula=False,
    normalize=False
)

In [9]:
def sar_load(model, directory):
    model.user_affinity = load_npz(file = os.path.join(directory,"sar_user_affinity.npz"))
    model.item_similarity = load(file = os.path.join(directory,"sar_item_similarity.npy"))
    with open(os.path.join(directory,"sar_index2item.json"), "r") as f:
        tmp_dict =  json.load(f)
        model.index2item = {int(k):v for k,v in tmp_dict.items()}
    with open(os.path.join(directory,"sar_user2index.json"), "r") as f:
        model.user2index = json.load(f)

In [10]:
sar_load(model_sar, r"C:\Users\tanch\Documents\NTU\NTU Year 4\FYP - GNN\Recommender API\models")

In [14]:
top_k = model_sar.recommend_k_items(test, top_k=config['TOP_K'], remove_seen=True)
eval_precision = precision_at_k(test, top_k, col_user=config['COL_USER'], col_item=config['COL_ITEM'], col_rating=config['COL_RATING'], k=config['TOP_K'])
eval_precision     

0.04263681592039801

## make prediction

In [15]:
model_sar.recommend_k_items(pd.DataFrame({"user_id":["999697"]}), top_k=config['TOP_K'], remove_seen=True)

Unnamed: 0,user_id,item_id,prediction
0,999697,645822,0.278955
1,999697,401773,0.211416
2,999697,412737,0.181482
3,999697,406357,0.17857
4,999697,175771,0.154103
5,999697,200824,0.148545
6,999697,693560,0.139601
7,999697,654585,0.123006
8,999697,486643,0.118607
9,999697,391519,0.117674


{'COL_USER': 'user_id', 'COL_ITEM': 'item_id', 'COL_RATING': 'rating', 'SIMILARITY_TYPE': 'jaccard', 'TOP_K': 10, 'MODEL_DIR': './models'}
