In [55]:
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

from recommenders.datasets import movielens
from recommenders.models.ncf.dataset import Dataset as NCFDataset
from recommenders.models.ncf.ncf_singlenode import NCF
from recommenders.utils.timer import Timer
from recommenders.evaluation.python_evaluation import rmse, mae
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, mean_absolute_error









In [28]:
# Tải dữ liệu MovieLens 100K
data = movielens.load_pandas_df(
    size="100k",
    header=["userID", "itemID", "rating", "timestamp"]
)

data = data[["userID", "itemID", "rating"]]

# Xem trước dữ liệu
data.head()

100%|██████████| 4.81k/4.81k [01:40<00:00, 47.9KB/s]


Unnamed: 0,userID,itemID,rating
0,196,242,3.0
1,186,302,3.0
2,22,377,1.0
3,244,51,2.0
4,166,346,1.0


In [36]:
data_path = "data_ncf.csv"
data = data.sort_values(by="userID") 
data.to_csv(data_path, index=False)

ncf_data = NCFDataset(data_path, seed=42)
train_loader = ncf_data.train_loader(256)
test_loader = ncf_data.test_loader(256)

INFO:recommenders.models.ncf.dataset:Indexing data_ncf.csv ...


In [16]:
train, test = train_test_split(data, test_size=0.2, random_state=42)

In [44]:
ncf_model = NCF(
    n_users=ncf_data.n_users,
    n_items=ncf_data.n_items,
    model_type="NeuMF",  # có thể chọn GMF, MLP, hoặc NeuMF
    batch_size=256,
    learning_rate=0.001,
    seed=42
)

ncf_model.fit(ncf_data)

INFO:recommenders.models.ncf.ncf_singlenode:Epoch 1 [22.27s]: train_loss = 0.377875 
INFO:recommenders.models.ncf.ncf_singlenode:Epoch 2 [20.45s]: train_loss = 0.306297 
INFO:recommenders.models.ncf.ncf_singlenode:Epoch 3 [12.27s]: train_loss = 0.289422 
INFO:recommenders.models.ncf.ncf_singlenode:Epoch 4 [15.73s]: train_loss = 0.276322 
INFO:recommenders.models.ncf.ncf_singlenode:Epoch 5 [13.91s]: train_loss = 0.266201 
INFO:recommenders.models.ncf.ncf_singlenode:Epoch 6 [13.80s]: train_loss = 0.259726 
INFO:recommenders.models.ncf.ncf_singlenode:Epoch 7 [14.78s]: train_loss = 0.254786 
INFO:recommenders.models.ncf.ncf_singlenode:Epoch 8 [13.98s]: train_loss = 0.250033 
INFO:recommenders.models.ncf.ncf_singlenode:Epoch 9 [13.40s]: train_loss = 0.246006 
INFO:recommenders.models.ncf.ncf_singlenode:Epoch 10 [12.26s]: train_loss = 0.242178 
INFO:recommenders.models.ncf.ncf_singlenode:Epoch 11 [13.24s]: train_loss = 0.239438 
INFO:recommenders.models.ncf.ncf_singlenode:Epoch 12 [12.24s]: 

In [49]:
user_input = test["userID"].values
item_input = test["itemID"].values

predictions = []
for u, i in zip(user_input, item_input):
    pred = ncf_model.predict(u, i)
    predictions.append(pred)
predictions = np.array(predictions)

# Tạo DataFrame dự đoán
df_pred = test.copy()
df_pred["prediction"] = predictions

# Đánh giá
rmse_score = rmse(
    rating_true=test,
    rating_pred=df_pred,
    col_user="userID",
    col_item="itemID",
    col_rating="rating",
    col_prediction="prediction"
)
mae_score = mae(
    rating_true=test,
    rating_pred=df_pred,
    col_user="userID",
    col_item="itemID",
    col_rating="rating",
    col_prediction="prediction"
)

print(f"RMSE: {rmse_score}")
print(f"MAE: {mae_score}")

RMSE: 3.050107148367481
MAE: 2.841166837998649


In [51]:
user_id = data["userID"].iloc[0]
items_to_predict = data["itemID"].unique()

scores = []
for item in items_to_predict:
    score = ncf_model.predict(user_id, item)
    scores.append((item, score))

top_k = 5
top_items = sorted(scores, key=lambda x: x[1], reverse=True)[:top_k]

print(f"Top {top_k} items recommended for user {user_id}:")
for idx, (item, score) in enumerate(top_items, 1):
    print(f"{idx}. ItemID: {item}, Predicted rating: {score:.4f}")

Top 5 items recommended for user 1:
1. ItemID: 50, Predicted rating: 0.9889
2. ItemID: 174, Predicted rating: 0.9846
3. ItemID: 168, Predicted rating: 0.9806
4. ItemID: 181, Predicted rating: 0.9790
5. ItemID: 204, Predicted rating: 0.9626


In [54]:
import os
import numpy as np
import matplotlib.pyplot as plt

n_runs = 10
rmse_list = []
mae_list = []

for i in range(n_runs):
    # Tách train/test ngẫu nhiên mỗi lần
    train, test = train_test_split(data, test_size=0.2, random_state=42+i)
    train = train.sort_values(by="userID")
    train_path = f"train_{i}.csv"
    train.to_csv(train_path, index=False)
    ncf_data = NCFDataset(train_path, seed=42+i)
    ncf_model = NCF(
        n_users=ncf_data.n_users,
        n_items=ncf_data.n_items,
        model_type="NeuMF",
        batch_size=256,
        learning_rate=0.001,
        seed=42+i
    )
    ncf_model.fit(ncf_data)
    # Dự đoán trên test
    valid_users = set(train["userID"].unique())
    valid_items = set(train["itemID"].unique())
    predictions = []
    test_filtered = []
    for u, it, rating in zip(test["userID"], test["itemID"], test["rating"]):
        if u in valid_users and it in valid_items:
            pred = ncf_model.predict(u, it)
            predictions.append(pred)
            test_filtered.append((u, it, rating))
    df_pred = pd.DataFrame(test_filtered, columns=["userID", "itemID", "rating"])
    df_pred["prediction"] = predictions
    # Đánh giá
    rmse_score = rmse(
        rating_true=test,
        rating_pred=df_pred,
        col_user="userID",
        col_item="itemID",
        col_rating="rating",
        col_prediction="prediction"
    )
    mae_score = mae(
        rating_true=test,
        rating_pred=df_pred,
        col_user="userID",
        col_item="itemID",
        col_rating="rating",
        col_prediction="prediction"
    )
    rmse_list.append(rmse_score)
    mae_list.append(mae_score)
    print(f"Lan {i+1}: RMSE = {rmse_score:.4f} | MAE = {mae_score:.4f}")
    os.remove(train_path)

# Tổng kết
mean_rmse = np.mean(rmse_list)
mean_mae = np.mean(mae_list)
print(f"\nTrung binh sau 10 lan:")
print(f"RMSE trung binh: {mean_rmse:.4f}")
print(f"MAE trung binh: {mean_mae:.4f}")

# Vẽ biểu đồ
iterations = np.arange(1, n_runs+1)
plt.figure(figsize=(10, 6))
plt.plot(iterations, rmse_list, marker='o', label='RMSE')
plt.plot(iterations, mae_list, marker='s', label='MAE')
plt.xlabel('Lần lặp')
plt.ylabel('Giá trị')
plt.title('Biểu đồ RMSE và MAE qua các lần lặp')
plt.legend()
plt.grid(True)
plt.show()

INFO:recommenders.models.ncf.dataset:Indexing train_0.csv ...
INFO:recommenders.models.ncf.ncf_singlenode:Epoch 1 [16.37s]: train_loss = 0.399318 
INFO:recommenders.models.ncf.ncf_singlenode:Epoch 2 [9.69s]: train_loss = 0.335035 
INFO:recommenders.models.ncf.ncf_singlenode:Epoch 3 [10.20s]: train_loss = 0.315766 
INFO:recommenders.models.ncf.ncf_singlenode:Epoch 4 [8.97s]: train_loss = 0.303451 
INFO:recommenders.models.ncf.ncf_singlenode:Epoch 5 [7.53s]: train_loss = 0.293628 
INFO:recommenders.models.ncf.ncf_singlenode:Epoch 6 [7.79s]: train_loss = 0.286000 
INFO:recommenders.models.ncf.ncf_singlenode:Epoch 7 [9.98s]: train_loss = 0.279550 
INFO:recommenders.models.ncf.ncf_singlenode:Epoch 8 [7.81s]: train_loss = 0.275205 
INFO:recommenders.models.ncf.ncf_singlenode:Epoch 9 [8.26s]: train_loss = 0.270304 
INFO:recommenders.models.ncf.ncf_singlenode:Epoch 10 [10.98s]: train_loss = 0.266860 
INFO:recommenders.models.ncf.ncf_singlenode:Epoch 11 [15.08s]: train_loss = 0.263632 
INFO:rec

Lan 1: RMSE = 3.1903 | MAE = 2.9975


INFO:recommenders.models.ncf.ncf_singlenode:Epoch 1 [6.64s]: train_loss = 0.410919 
INFO:recommenders.models.ncf.ncf_singlenode:Epoch 2 [5.53s]: train_loss = 0.339418 
INFO:recommenders.models.ncf.ncf_singlenode:Epoch 3 [5.44s]: train_loss = 0.318454 
INFO:recommenders.models.ncf.ncf_singlenode:Epoch 4 [5.35s]: train_loss = 0.307119 
INFO:recommenders.models.ncf.ncf_singlenode:Epoch 5 [5.86s]: train_loss = 0.298113 
INFO:recommenders.models.ncf.ncf_singlenode:Epoch 6 [15.46s]: train_loss = 0.289441 
INFO:recommenders.models.ncf.ncf_singlenode:Epoch 7 [5.60s]: train_loss = 0.281349 
INFO:recommenders.models.ncf.ncf_singlenode:Epoch 8 [11.74s]: train_loss = 0.275456 
INFO:recommenders.models.ncf.ncf_singlenode:Epoch 9 [16.50s]: train_loss = 0.269951 
INFO:recommenders.models.ncf.ncf_singlenode:Epoch 10 [13.95s]: train_loss = 0.266958 
INFO:recommenders.models.ncf.ncf_singlenode:Epoch 11 [5.66s]: train_loss = 0.263657 
INFO:recommenders.models.ncf.ncf_singlenode:Epoch 12 [5.95s]: train_lo

Lan 2: RMSE = 3.1660 | MAE = 2.9697


INFO:recommenders.models.ncf.dataset:Indexing train_2.csv ...
INFO:recommenders.models.ncf.ncf_singlenode:Epoch 1 [23.63s]: train_loss = 0.398012 
INFO:recommenders.models.ncf.ncf_singlenode:Epoch 2 [8.54s]: train_loss = 0.331464 
INFO:recommenders.models.ncf.ncf_singlenode:Epoch 3 [6.82s]: train_loss = 0.312745 
INFO:recommenders.models.ncf.ncf_singlenode:Epoch 4 [5.90s]: train_loss = 0.302629 
INFO:recommenders.models.ncf.ncf_singlenode:Epoch 5 [5.84s]: train_loss = 0.293334 
INFO:recommenders.models.ncf.ncf_singlenode:Epoch 6 [6.15s]: train_loss = 0.284212 
INFO:recommenders.models.ncf.ncf_singlenode:Epoch 7 [8.53s]: train_loss = 0.276568 
INFO:recommenders.models.ncf.ncf_singlenode:Epoch 8 [5.97s]: train_loss = 0.271864 
INFO:recommenders.models.ncf.ncf_singlenode:Epoch 9 [5.94s]: train_loss = 0.268100 
INFO:recommenders.models.ncf.ncf_singlenode:Epoch 10 [6.02s]: train_loss = 0.264632 
INFO:recommenders.models.ncf.ncf_singlenode:Epoch 11 [6.15s]: train_loss = 0.262793 
INFO:recomm

Lan 3: RMSE = 3.2091 | MAE = 3.0131


INFO:recommenders.models.ncf.dataset:Indexing train_3.csv ...
INFO:recommenders.models.ncf.ncf_singlenode:Epoch 1 [11.93s]: train_loss = 0.430436 
INFO:recommenders.models.ncf.ncf_singlenode:Epoch 2 [18.41s]: train_loss = 0.355978 
INFO:recommenders.models.ncf.ncf_singlenode:Epoch 3 [12.34s]: train_loss = 0.333825 
INFO:recommenders.models.ncf.ncf_singlenode:Epoch 4 [25.70s]: train_loss = 0.316897 
INFO:recommenders.models.ncf.ncf_singlenode:Epoch 5 [20.85s]: train_loss = 0.307520 
INFO:recommenders.models.ncf.ncf_singlenode:Epoch 6 [7.90s]: train_loss = 0.300598 
INFO:recommenders.models.ncf.ncf_singlenode:Epoch 7 [6.60s]: train_loss = 0.293334 
INFO:recommenders.models.ncf.ncf_singlenode:Epoch 8 [8.37s]: train_loss = 0.287591 
INFO:recommenders.models.ncf.ncf_singlenode:Epoch 9 [10.20s]: train_loss = 0.281484 
INFO:recommenders.models.ncf.ncf_singlenode:Epoch 10 [15.33s]: train_loss = 0.275847 
INFO:recommenders.models.ncf.ncf_singlenode:Epoch 11 [12.09s]: train_loss = 0.271208 
INFO

KeyboardInterrupt: 

In [None]:
import time
rmse_list = []
mae_list = []
mse_list = []
time_train = []
time_test = []

for each in range(1,11):
  train, test = train_test_split(data, test_size=0.2, random_state=42)
  # X_Train, X_Test, Y_Train, Y_Test = train_test_split(dulieu_X_shuffled, dulieu_Y_shuffled, test_size = 1/3, random_state = 125)

  st = time.time()
  ncf_model = NCF(
    n_users=ncf_data.n_users,
    n_items=ncf_data.n_items,
    model_type="NeuMF",  # có thể chọn GMF, MLP, hoặc NeuMF
    batch_size=256,
    learning_rate=0.001,
    seed=42
  )

  ncf_model.fit(ncf_data)
  
  et = time.time()-st
  time_train.append(et)

  st = time.time()
  Y_Pred = model.predict(X_Test)
  et = time.time()-st
  time_test.append(et)

  mse_score = mean_squared_error(Y_Test, Y_Pred)
  rmse_score = np.sqrt(mse_score)
  mae_score = mean_absolute_error(Y_Test, Y_Pred)

  mse_list.append(mse_score)
  rmse_list.append(rmse_score)
  mae_list.append(mae_score)

  print(f"Lan {i+1}: RMSE = {rmse_score:.4f} | MAE = {mae_score:.4f} | MSE = {mse_score:.4f}")
  
mean_rmse = np.mean(rmse_list)
mean_mae = np.mean(mae_list)
mean_mse = np.mean(mse_list)

print("Trung binh sau 10 lan: ")
print(f"RMSE trung binh: {mean_rmse:.4f}")
print(f"MAE trung binh: {mean_mae:.4f}")
print(f"MSE trung binh: {mean_mse:.4f}")
Ketqua_timeTrain = statistics.mean(time_train)
print("Trung binh thoi gian train: ", Ketqua_timeTrain)
Ketqua_timeTest = statistics.mean(time_test)
print("Trung binh thoi gian test: ", Ketqua_timeTest)


In [None]:
ncf_model = NCF(
    n_users=ncf_data.n_users,
    n_items=ncf_data.n_items,
    model_type="NeuMF",  # có thể chọn GMF, MLP, hoặc NeuMF
    batch_size=256,
    learning_rate=0.001,
    seed=42
)

ncf_model.fit(ncf_data)

INFO:recommenders.models.ncf.ncf_singlenode:Epoch 1 [22.27s]: train_loss = 0.377875 
INFO:recommenders.models.ncf.ncf_singlenode:Epoch 2 [20.45s]: train_loss = 0.306297 
INFO:recommenders.models.ncf.ncf_singlenode:Epoch 3 [12.27s]: train_loss = 0.289422 
INFO:recommenders.models.ncf.ncf_singlenode:Epoch 4 [15.73s]: train_loss = 0.276322 
INFO:recommenders.models.ncf.ncf_singlenode:Epoch 5 [13.91s]: train_loss = 0.266201 
INFO:recommenders.models.ncf.ncf_singlenode:Epoch 6 [13.80s]: train_loss = 0.259726 
INFO:recommenders.models.ncf.ncf_singlenode:Epoch 7 [14.78s]: train_loss = 0.254786 
INFO:recommenders.models.ncf.ncf_singlenode:Epoch 8 [13.98s]: train_loss = 0.250033 
INFO:recommenders.models.ncf.ncf_singlenode:Epoch 9 [13.40s]: train_loss = 0.246006 
INFO:recommenders.models.ncf.ncf_singlenode:Epoch 10 [12.26s]: train_loss = 0.242178 
INFO:recommenders.models.ncf.ncf_singlenode:Epoch 11 [13.24s]: train_loss = 0.239438 
INFO:recommenders.models.ncf.ncf_singlenode:Epoch 12 [12.24s]: 