In [1]:
import sys
sys.path.append("../")

import pandas as pd

from utils.common.timer import Timer
from Datasets import Movielens
from Evaluation.data_split import split_data
from Evaluation.ranking_metrics import *
from WRMF.wrmf import *
from WRMF import wrmf_rec

FM model is only supported on Linux.
Windows executable can be found at http://www.libfm.org.


# **1. Load Dataset**

In [2]:
DATANAME = 'movielens100k'
df_movielens = Movielens.load_data()

100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4.81k/4.81k [00:02<00:00, 1.92kKB/s]


# **2. Split Data into Train/Test**

In [3]:
# train, test = split_data(df_movielens, split_strategy="temporal_global")
train, test = split_data(df_movielens, split_strategy="random_by_user", random_state=0)

print(train.shape, test.shape)

100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 943/943 [00:04<00:00, 223.13it/s]

(79619, 4) (20352, 4)





# **3. WRMF Models**

In [4]:
factors = 128
models = [
    WRMF(train, weight_strategy="uniform_pos", alpha=0.01, k=32, 
         learning_rate=0.01),
    WRMF(train, weight_strategy="uniform_neg", alpha=0.1, lambda_u=0.1, 
         lambda_v=0.1, k=factors, learning_rate=0.01),
    WRMF(train, weight_strategy="user_oriented", alpha=0.007, lambda_u=0.1, 
         lambda_v=0.1, k=factors, learning_rate=0.01),
    WRMF(train, weight_strategy="item_oriented", alpha=0.001, k=factors, 
         learning_rate=0.01),
    WRMF(train, weight_strategy="item_popularity", alpha=0.001, c_0=500, 
         lambda_u=0.1, lambda_v=0.1, k=factors, learning_rate=0.0)
]

strategies = [
    "uniform_pos",
    "uniform_neg",
    "user_oriented",
    "item_oriented",
    "item_popularity"
]

maximum of weights=0.01, minimum=0.01
maximum of weights=0.1, minimum=0.1
maximum of weights=4.123, minimum=0.112
maximum of weights=0.9420000000000001, minimum=0.482
maximum of weights=0.3027277087538526, minimum=0.30087664168585826


# **4. Train Models and Test**

In [5]:
def generate_summary(data, strategy, k, train_time, ranking_metrics):
    summary = {"Data": data, "Strategy": strategy, "K": k, "Train time (s)": train_time}
    if ranking_metrics is None:
        ranking_metrics = {
            "Precision@k": np.nan,
            "Recall@k": np.nan,
            "NDCG@k": np.nan,
        }
    summary.update(ranking_metrics)
    return summary

In [7]:
cols = ["Data", "Strategy", "K", "Train time (s)","Precision@k", "Recall@k", "NDCG@k"]
df_result = pd.DataFrame(columns=cols)
k = 10


for strategy, model in zip(strategies, models):
    # 1. train
    with Timer() as train_time:
        model =  train_cornac(model, train)
    
    # 2. recommendation
    top_10 = wrmf_rec.recommend_top_k(model, train, k)
    
    # 3. evaluation - ranking metric@k
    eval_result = ranking_metrics(top_10, test)
    
    # 4. summary of evaluation results
    summary = generate_summary(DATANAME, strategy, k, train_time, eval_result)
    df_result.loc[df_result.shape[0] + 1] = summary

HBox(children=(HTML(value=''), FloatProgress(value=0.0), HTML(value='')))


Learning completed!


HBox(children=(HTML(value=''), FloatProgress(value=0.0), HTML(value='')))


Learning completed!


HBox(children=(HTML(value=''), FloatProgress(value=0.0), HTML(value='')))


Learning completed!


HBox(children=(HTML(value=''), FloatProgress(value=0.0), HTML(value='')))


Learning completed!


HBox(children=(HTML(value=''), FloatProgress(value=0.0), HTML(value='')))


Learning completed!


# **5. Summary**

In [9]:
df_result

Unnamed: 0,Data,Strategy,K,Train time (s),Precision@k,Recall@k,NDCG@k
1,movielens100k,uniform_pos,10,7.5657,0.295864,0.183342,0.348125
2,movielens100k,uniform_neg,10,16.6732,0.331813,0.21896,0.388555
3,movielens100k,user_oriented,10,14.8902,0.363945,0.227079,0.42819
4,movielens100k,item_oriented,10,12.7446,0.367126,0.232437,0.428994
5,movielens100k,item_popularity,10,11.9179,0.013892,0.004979,0.01435
