### Load Libraries

In [1]:
# set the environment path to find Recommenders
import sys
import itertools
import logging
import os
import pandas as pd

from recommenders.datasets.python_splitters import python_stratified_split
from recommenders.evaluation.python_evaluation import map_at_k, ndcg_at_k, precision_at_k, recall_at_k
from recommenders.models.sar import SAR

print("System version: {}".format(sys.version))
print("Pandas version: {}".format(pd.__version__))

System version: 3.8.16 (default, Mar  2 2023, 03:18:16) [MSC v.1916 64 bit (AMD64)]
Pandas version: 1.5.3


### Load Dataset

In [2]:
# Load the dataset from csv file
df1 = pd.read_csv('rats.csv')
df2 = pd.read_csv('icat.csv')
merged_df = pd.merge(df1, df2, on='itemId')

# create new dataframe
df3 = pd.DataFrame(merged_df)
df3['Category'] = df3['Category'].astype('category').cat.codes
df3.head()

Unnamed: 0,userId,itemId,rating,ItemName,Category,Quality
0,13,0,1.533462,Restaurant Fake,5,1.647351
1,15,0,1.283205,Restaurant Fake,5,1.647351
2,17,0,1.506836,Restaurant Fake,5,1.647351
3,19,0,1.260289,Restaurant Fake,5,1.647351
4,23,0,3.203168,Restaurant Fake,5,1.647351


In [6]:
# Model parameters
TOP_K = 10
header = {
    "col_user": "userId",
    "col_item": "itemId",
    "col_rating": "rating",
    "col_timestamp": "Category",
    "col_prediction": "Prediction",
}

### Split Data

In [4]:
train, test = python_stratified_split(df3, ratio=0.75, col_user=header["col_user"], col_item=header["col_item"], seed=42)

# SAR Model

In [7]:
# set log level to INFO
logging.basicConfig(level=logging.DEBUG, 
                    format='%(asctime)s %(levelname)-8s %(message)s')

model = SAR(
    similarity_type="jaccard", 
    time_decay_coefficient=30, 
    time_now=None, 
    timedecay_formula=True, 
    **header
)

### Model Training

In [8]:
model.fit(train)

2023-05-14 02:42:26,654 INFO     Collecting user affinity matrix
2023-05-14 02:42:26,669 INFO     Calculating time-decayed affinities
2023-05-14 02:42:27,163 INFO     Creating index columns
2023-05-14 02:42:28,332 INFO     Building user affinity sparse matrix
2023-05-14 02:42:28,362 INFO     Calculating item co-occurrence
2023-05-14 02:42:28,450 INFO     Calculating item similarity
2023-05-14 02:42:28,452 INFO     Using jaccard based similarity
2023-05-14 02:42:28,456 INFO     Done training


In [9]:
# top k items to recommend
top_k = model.recommend_k_items(test, top_k=TOP_K, remove_seen=True)

2023-05-14 02:42:29,374 INFO     Calculating recommendation scores
2023-05-14 02:42:29,434 INFO     Removing seen items


## Top 10 Item Id and Name

In [10]:
top_k_with_titles = (top_k.join(df3[['itemId', 'ItemName']].drop_duplicates().set_index('itemId'), 
                                on='itemId', 
                                how='inner').sort_values(by=['userId', 'Prediction'], ascending=False))
display(top_k_with_titles.head(10))

Unnamed: 0,userId,itemId,Prediction,ItemName
693070,99999,2,0.621051,Random Shopping Mall
693071,99999,1,0.619378,Fiction Nightclub
693072,99999,0,0.61197,Restaurant Fake
693073,99999,3,0.604664,Bogus Waterpark
693074,99999,4,0.599879,Unknown Nature Route
693075,99999,6,0.589236,Never Happened Festival
693076,99999,7,0.580514,False Tavern
693077,99999,8,0.552795,MakeBelieve Pub
693078,99999,9,0.551365,Another Sport Event
693079,99999,10,0.513836,Surprise Concert


In [11]:
# all ranking metrics have the same arguments
args = [test, top_k]
kwargs = dict(col_user='userId', 
              col_item='itemId', 
              col_rating='rating', 
              col_prediction='Prediction', 
              relevancy_method='top_k', 
              k=TOP_K)

eval_map = map_at_k(*args, **kwargs)
eval_ndcg = ndcg_at_k(*args, **kwargs)
eval_precision = precision_at_k(*args, **kwargs)
eval_recall = recall_at_k(*args, **kwargs)

### Results

In [13]:
print(f"Model: SAR",
      f"Top K:\t\t {TOP_K}",
      f"MAP:\t\t {eval_map:f}",
      f"NDCG:\t\t {eval_ndcg:f}",
      f"Precision@K:\t {eval_precision:f}",
      f"Recall@K:\t {eval_recall:f}", sep='\n')

Model: SAR
Top K:		 10
MAP:		 0.054901
NDCG:		 0.094697
Precision@K:	 0.026674
Recall@K:	 0.219833
