In [4]:
import sys
sys.path.append("../")

import pandas as pd

from utils.common.timer import Timer
from Datasets import Movielens
from Evaluation.data_split import split_data
from Evaluation.ranking_metrics import *
from WRMF.wrmf import *
from WRMF import wrmf_rec

# **1. Load Dataset**

In [12]:
import os
DATANAME = 'movielens100k'
if os.path.exists(f"{DATANAME}.csv.gz"):
    df_movielens = pd.read_parquet(f"{DATANAME}.csv.gz", compression='gzip')
else:
    df_movielens = Movielens.load_data()
    df_movielens.to_csv(f"{DATANAME}.csv.gz", compression='gzip', index=False)

100%|██████████| 4.81k/4.81k [00:01<00:00, 2.95kKB/s]


# **2. Split Data into Train/Test**

In [6]:
# train, test = split_data(df_movielens, split_strategy="temporal_global")
train, test = split_data(df_movielens, split_strategy="random_by_user", random_state=0)

print(train.shape, test.shape)

100%|██████████| 943/943 [00:01<00:00, 841.95it/s]


(79619, 4) (20352, 4)


# **3. WRMF Models**

In [7]:
factors = 128
models = [
    WRMF(train, weight_strategy="uniform_pos", alpha=0.01, k=32, 
         learning_rate=0.01),
    WRMF(train, weight_strategy="uniform_neg", alpha=0.1, lambda_u=0.1, 
         lambda_v=0.1, k=factors, learning_rate=0.01),
    WRMF(train, weight_strategy="user_oriented", alpha=0.007, lambda_u=0.1, 
         lambda_v=0.1, k=factors, learning_rate=0.01),
    WRMF(train, weight_strategy="item_oriented", alpha=0.001, k=factors, 
         learning_rate=0.01),
    WRMF(train, weight_strategy="item_popularity", alpha=0.001, c_0=500, 
         lambda_u=0.1, lambda_v=0.1, k=factors, learning_rate=0.0)
]

strategies = [
    "uniform_pos",
    "uniform_neg",
    "user_oriented",
    "item_oriented",
    "item_popularity"
]

maximum of weights=0.01, minimum=0.01
maximum of weights=0.1, minimum=0.1
maximum of weights=4.123, minimum=0.112
maximum of weights=0.9420000000000001, minimum=0.482
maximum of weights=0.3027277087538526, minimum=0.30087664168585826


# **4. Train Models and Test**

In [8]:
def generate_summary(data, strategy, k, train_time, ranking_metrics):
    summary = {"Data": data, "Strategy": strategy, "K": k, "Train time (s)": train_time}
    if ranking_metrics is None:
        ranking_metrics = {
            "Precision@k": np.nan,
            "Recall@k": np.nan,
            "NDCG@k": np.nan,
        }
    summary.update(ranking_metrics)
    return summary

In [9]:
cols = ["Data", "Strategy", "K", "Train time (s)","Precision@k", "Recall@k", "NDCG@k"]
df_result = pd.DataFrame(columns=cols)
k = 10


for strategy, model in zip(strategies, models):
    # 1. train
    with Timer() as train_time:
        model =  train_cornac(model, train)
    
    # 2. recommendation
    top_10 = wrmf_rec.recommend_top_k(model, train, k)
    
    # 3. evaluation - ranking metric@k
    eval_result = ranking_metrics(top_10, test)
    
    # 4. summary of evaluation results
    summary = generate_summary(DATANAME, strategy, k, train_time, eval_result)
    df_result.loc[df_result.shape[0] + 1] = summary

100%|██████████| 100/100 [00:06<00:00, 16.23it/s, loss=1.31e+5]


Learning completed!


100%|██████████| 100/100 [00:07<00:00, 13.31it/s, loss=2.78e+6]


Learning completed!


100%|██████████| 100/100 [00:07<00:00, 12.87it/s, loss=6.13e+6]


Learning completed!


100%|██████████| 100/100 [00:07<00:00, 12.89it/s, loss=6.33e+6]


Learning completed!


100%|██████████| 100/100 [00:07<00:00, 12.64it/s, loss=1.34e+7]


Learning completed!


# **5. Summary**

In [10]:
df_result

Unnamed: 0,Data,Strategy,K,Train time (s),Precision@k,Recall@k,NDCG@k
1,movielens100k,uniform_pos,10,9.6486,0.293425,0.181763,0.342781
2,movielens100k,uniform_neg,10,7.8639,0.331919,0.22015,0.390266
3,movielens100k,user_oriented,10,8.1153,0.369141,0.232154,0.435058
4,movielens100k,item_oriented,10,8.254,0.365748,0.23325,0.431009
5,movielens100k,item_popularity,10,8.2639,0.016543,0.007205,0.017686
