In [40]:
import numpy as np
import pandas as pd
from libreco.data import random_split, DatasetPure
from libreco.algorithms import NCF, FM, DeepFM # pure data, 
from libreco.evaluation import evaluate


import tensorflow as tf

print(tf.__version__)

2.14.0


In [41]:
data = pd.read_parquet("../data/dicoding_user_item_rating.gzip")
data.rename(columns = 
    {
        "user_id":"user",
        "course_id":"item",
        "rating":"label"
    }, inplace= True
) 

In [42]:
data

Unnamed: 0,user,item,graduated_at,label
0,623699,14,2020-03-10 11:45:50,5
1,406371,14,2020-03-10 08:44:09,4
2,1946,14,2020-03-11 13:24:10,5
3,186713,14,2020-03-11 17:36:04,4
4,462580,14,2020-03-11 08:43:36,4
...,...,...,...,...
303951,3244648,615,2023-09-04 17:09:09,5
303952,3258243,615,2023-09-04 19:12:11,5
303953,3270793,615,2023-09-04 20:11:31,5
303954,2547435,615,2023-09-04 20:38:22,5


In [43]:
train_data, eval_data, test_data = random_split(data[[
    "user", "item", "label"
]], multi_ratios=[0.8, 0.1, 0.1])

In [44]:
# LibRecommender Datatype


train_data, data_info= DatasetPure.build_trainset(train_data)
eval_data = DatasetPure.build_evalset(eval_data)
test_data = DatasetPure.build_testset(test_data)

In [45]:
### NCF
tf.compat.v1.reset_default_graph()

ncf = NCF(
    task="rating",
    data_info=data_info,
    loss_type="cross_entropy",
    embed_size=16,
    n_epochs=10,
    lr=1e-3,
    batch_size=2048,
    num_neg=1,
    
)

In [46]:
ncf.fit(
    train_data,
    neg_sampling=False, #for rating, this param is false else True
    verbose=0,
    eval_data=eval_data,
    metrics=["loss"],
    
)

# do final evaluation on test data
evaluate(
    model=ncf,
    data=test_data,
    neg_sampling=False,
    metrics=["mae", "rmse"],

)


Training start time: [35m2023-10-18 10:40:07[0m


  net = tf.layers.batch_normalization(net, training=is_training)
  net = tf.layers.batch_normalization(net, training=is_training)
eval_pointwise: 100%|██████████| 4/4 [00:00<00:00, 147.48it/s]


{'mae': 0.1968647, 'rmse': 0.379914}

# FM

In [47]:
tf.compat.v1.reset_default_graph()

fm = FM(
    task="rating",
    data_info=data_info,
    loss_type="cross_entropy",
    embed_size=16,
    n_epochs=10,
    lr=1e-3,
    batch_size=2048,
    num_neg=1,
    
)

In [48]:
fm.fit(
    train_data,
    neg_sampling=False, #for rating, this param is false else True
    verbose=0,
    eval_data=eval_data,
    metrics=["loss"],
    
)

# do final evaluation on test data
evaluate(
    model=fm,
    data=test_data,
    neg_sampling=False,
    metrics=["mae", "rmse"],

)


  pairwise_term = tf.layers.batch_normalization(


Training start time: [35m2023-10-18 10:40:45[0m
total params: [33m1,589,654[0m | embedding params: [33m1,589,604[0m | network params: [33m50[0m


eval_pointwise: 100%|██████████| 4/4 [00:00<00:00, 351.56it/s]


{'mae': 0.27339804, 'rmse': 0.41698718}

# DeepFM

In [49]:
tf.compat.v1.reset_default_graph()

deepfm = DeepFM(
    task="rating",
    data_info=data_info,
    loss_type="cross_entropy",
    embed_size=16,
    n_epochs=10,
    lr=1e-3,
    batch_size=2048,
    num_neg=1,
    
)


In [50]:

deepfm.fit(
    train_data,
    neg_sampling=False, #for rating, this param is false else True
    verbose=0,
    eval_data=eval_data,
    metrics=["loss"],
    
)

# do final evaluation on test data
evaluate(
    model=deepfm,
    data=test_data,
    neg_sampling=False,
    metrics=["mae", "rmse"],

)

  net = tf.layers.batch_normalization(net, training=is_training)
  net = tf.layers.batch_normalization(net, training=is_training)


Training start time: [35m2023-10-18 10:41:13[0m
total params: [33m1,604,663[0m | embedding params: [33m1,589,828[0m | network params: [33m14,835[0m


eval_pointwise: 100%|██████████| 4/4 [00:00<00:00, 128.21it/s]


{'mae': 0.21199709, 'rmse': 0.38812432}