In [20]:
import pandas as pd
from libreco.data import random_split, DatasetPure
from libreco.algorithms import FM, DeepFM # pure data,
from libreco.evaluation import evaluate


import tensorflow as tf

print(tf.__version__)

2.15.0


In [21]:
data = pd.read_parquet("../data/dicoding_user_item_rating.gzip")
data.rename(columns = 
    {
        "user_id":"user",
        "course_id":"item",
        "rating":"label"
    }, inplace= True
) 

In [35]:
data

Unnamed: 0,user,item,graduated_at,label
0,623699,14,2020-03-10 11:45:50,5
1,406371,14,2020-03-10 08:44:09,4
2,1946,14,2020-03-11 13:24:10,5
3,186713,14,2020-03-11 17:36:04,4
4,462580,14,2020-03-11 08:43:36,4
...,...,...,...,...
303951,3244648,615,2023-09-04 17:09:09,5
303952,3258243,615,2023-09-04 19:12:11,5
303953,3270793,615,2023-09-04 20:11:31,5
303954,2547435,615,2023-09-04 20:38:22,5


In [23]:
train_data, eval_data, test_data = random_split(data[[
    "user", "item", "label"
]], multi_ratios=[0.8, 0.1, 0.1])

In [24]:
# LibRecommender Datatype

train_data, data_info= DatasetPure.build_trainset(train_data)
eval_data = DatasetPure.build_evalset(eval_data)
test_data = DatasetPure.build_testset(test_data)

# Rating Task

## FM

In [25]:
tf.compat.v1.reset_default_graph()

fm = FM(
    task="rating",
    data_info=data_info,
    loss_type="cross_entropy",
    embed_size=16,
    n_epochs=10,
    lr=1e-3,
    batch_size=2048,
    num_neg=1,
)

In [26]:
fm.fit(
    train_data,
    neg_sampling=False, #for rating, this param is false else True
    verbose=0,
    eval_data=eval_data,
    metrics=["loss"],
    
)

# do final evaluation on test data
evaluate(
    model=fm,
    data=test_data,
    neg_sampling=False,
    metrics=["mae", "rmse"],

)


Training start time: [35m2023-12-28 14:50:36[0m
total params: [33m1,589,654[0m | embedding params: [33m1,589,604[0m | network params: [33m50[0m


  pairwise_term = tf.layers.batch_normalization(
eval_pointwise: 100%|██████████| 4/4 [00:00<00:00, 584.88it/s]


{'mae': 0.2733981, 'rmse': 0.41698718}

## DeepFM

In [27]:
tf.compat.v1.reset_default_graph()

deepfm = DeepFM(
    task="rating",
    data_info=data_info,
    loss_type="cross_entropy",
    embed_size=16,
    n_epochs=10,
    lr=1e-3,
    batch_size=2048,
    num_neg=1,
    
)


In [30]:

deepfm.fit(
    train_data,
    neg_sampling=False, #for rating, this param is false else True
    verbose=0,
    eval_data=eval_data,
    metrics=["loss"],
    
)

# do final evaluation on test data
evaluate(
    model=deepfm,
    data=test_data,
    neg_sampling=False,
    metrics=["mae", "rmse"],

)

Training start time: [35m2023-12-28 14:51:41[0m


eval_pointwise: 100%|██████████| 4/4 [00:00<00:00, 253.08it/s]


{'mae': 0.1981104, 'rmse': 0.3776615}

# Ranking Task

In [44]:
data_ranking = data.copy()
data_ranking.label = data_ranking.label.apply(lambda x: 0 if x < 4.5 else 1)

In [45]:
data_ranking

Unnamed: 0,user,item,graduated_at,label
0,623699,14,2020-03-10 11:45:50,1
1,406371,14,2020-03-10 08:44:09,0
2,1946,14,2020-03-11 13:24:10,1
3,186713,14,2020-03-11 17:36:04,0
4,462580,14,2020-03-11 08:43:36,0
...,...,...,...,...
303951,3244648,615,2023-09-04 17:09:09,1
303952,3258243,615,2023-09-04 19:12:11,1
303953,3270793,615,2023-09-04 20:11:31,1
303954,2547435,615,2023-09-04 20:38:22,1


In [46]:
train_data, eval_data, test_data = random_split(data_ranking[[
    "user", "item", "label"
]], multi_ratios=[0.8, 0.1, 0.1])

In [47]:
# LibRecommender Datatype

train_data, data_info= DatasetPure.build_trainset(train_data)
eval_data = DatasetPure.build_evalset(eval_data)
test_data = DatasetPure.build_testset(test_data)

## FM

In [61]:
tf.compat.v1.reset_default_graph()

fm = FM(
    task="ranking",
    data_info=data_info,
    loss_type="cross_entropy",
    embed_size=64,
    n_epochs=100,
    lr=1e-3,
    batch_size=2048,
    num_neg=5,
)

In [63]:
fm.fit(
    train_data,
    neg_sampling=True, #for rating, this param is false else True
    verbose=0,
    eval_data=eval_data,
    metrics=["loss"],

)

# do final evaluation on test data
evaluate(
    model=fm,
    data=test_data,
    k=10,
    neg_sampling=True,
    metrics=["precision", "recall"],

)


Training start time: [35m2023-12-28 15:58:32[0m


eval_listwise: 100%|██████████| 135/135 [00:00<00:00, 149.61it/s]


{'precision': 0.10340928657521667, 'recall': 0.8175449528947779}