# Recommend dishes to users with the LightGCN model

Predictions based on user features as well as food features and also based on the interaction between users and dishes

In [None]:
pip install -U LibRecommender



In [None]:
# import essential libraries
import pandas as pd
import numpy as np

### Read dataset

In [None]:
# read dataset
data = pd.read_csv('./dataset.csv', encoding='utf-8', dtype={'user': str}, sep=",")

In [None]:
data.head()

Unnamed: 0,user,item,gender,birthday,item_name,item_price,label,category_id,category_name,time
0,581702741,31,Male,1978-12-15,Dừa tươi,35000,4,9,Nước trái cây,2023-06-14 22:54:26
1,250860901,3,Female,2000-07-14,Đậu hũ chiên giòn,30000,3,1,Khai vị,2024-04-13 04:26:03
2,164718485,40,Male,1998-07-28,Trà vải cam sả,40000,1,12,Trà,2024-04-03 07:08:50
3,155591228,3,Female,2003-11-17,Đậu hũ chiên giòn,30000,2,1,Khai vị,2023-01-29 00:29:19
4,438370458,12,Male,1977-07-26,Nấm xào chay,70000,5,3,Món xào,2024-05-06 07:28:24


In [None]:
data.columns

Index(['user', 'item', 'gender', 'birthday', 'item_name', 'item_price',
       'label', 'category_id', 'category_name', 'time'],
      dtype='object')

### Data Cleaning
Convert birthday column to age

In [None]:
from datetime import datetime

def calculate_age(birthday):
    today = datetime.today()
    birth_date = pd.to_datetime(birthday)
    age = today.year - birth_date.year - ((today.month, today.day) < (birth_date.month, birth_date.day))
    return age

# Add age column into DataFrame
data['age'] = data['birthday'].apply(calculate_age)

# Remove birthday column
data = data.drop(columns=['birthday'])

data.head()

Unnamed: 0,user,item,gender,item_name,item_price,label,category_id,category_name,time,age
0,581702741,31,Male,Dừa tươi,35000,4,9,Nước trái cây,2023-06-14 22:54:26,45
1,250860901,3,Female,Đậu hũ chiên giòn,30000,3,1,Khai vị,2024-04-13 04:26:03,24
2,164718485,40,Male,Trà vải cam sả,40000,1,12,Trà,2024-04-03 07:08:50,25
3,155591228,3,Female,Đậu hũ chiên giòn,30000,2,1,Khai vị,2023-01-29 00:29:19,20
4,438370458,12,Male,Nấm xào chay,70000,5,3,Món xào,2024-05-06 07:28:24,46


In [None]:
# data.to_csv('dataset_after_cleaning.csv', index=False, encoding='utf-8-sig')

### Feature selection

In [None]:
featuresTrain = ['user', 'item', 'gender', 'age', 'item_price', 'label', 'category_id', 'time']

### Split dataset

In [None]:
dataTrain = data[featuresTrain]
dataTrain.head()

Unnamed: 0,user,item,gender,age,item_price,label,category_id,time
0,581702741,31,Male,45,35000,4,9,2023-06-14 22:54:26
1,250860901,3,Female,24,30000,3,1,2024-04-13 04:26:03
2,164718485,40,Male,25,40000,1,12,2024-04-03 07:08:50
3,155591228,3,Female,20,30000,2,1,2023-01-29 00:29:19
4,438370458,12,Male,46,70000,5,3,2024-05-06 07:28:24


In [None]:
from libreco.data import random_split, DatasetPure, split_by_ratio_chrono, DatasetFeat
from libreco.algorithms import LightGCN  # pure data, algorithm LightGCN
from libreco.evaluation import evaluate

In [None]:
# split whole data into three folds for training, evaluating and testing
train_data, eval_data, test_data = random_split(data, multi_ratios=[0.8, 0.1, 0.1])

train_data, data_info = DatasetPure.build_trainset(train_data)
eval_data = DatasetPure.build_evalset(eval_data)
test_data = DatasetPure.build_testset(test_data)
print(data_info)  # n_users: 4658, n_items: 505, data density: 0.2000 %

n_users: 4658, n_items: 505, data density: 0.2000 %


In [None]:
print(train_data)
for i, (user, item, label) in enumerate(train_data):
    if i < 5:  # In ra 5 mẫu đầu tiên
        print(train_data[i])
        #print(f"user: {user}, item: {item}, label: {label}")
    else:
        break
# print(test_data)

<libreco.data.transformed.TransformedSet object at 0x7a362cd35510>
(4267, 281, 4.0)
(2913, 281, 4.0)
(205, 261, 3.0)
(3135, 20, 5.0)
(2748, 24, 4.0)


### Training Model

In [None]:
lightgcn = LightGCN(
    task="ranking",
    data_info=data_info,
    loss_type="bpr",
    embed_size=16,
    n_epochs=3,
    lr=1e-3,
    batch_size=2048,
    num_neg=1,
    device="cuda",
)

In [None]:
# monitor metrics on eval data during training
lightgcn.fit(
    train_data,
    neg_sampling=True,
    verbose=2,
    eval_data=eval_data,
    metrics=["loss", "roc_auc", "precision", "recall", "ndcg"],
)

Training start time: [35m2024-07-20 18:09:43[0m


train: 100%|██████████| 3/3 [00:00<00:00, 72.08it/s]


Epoch 1 elapsed: 0.048s
	 [32mtrain_loss: 0.6844[0m


eval_pointwise: 100%|██████████| 1/1 [00:00<00:00, 1384.26it/s]
eval_listwise: 100%|██████████| 1/1 [00:00<00:00, 305.08it/s]


	 eval log_loss: 0.6930
	 eval roc_auc: 0.5496
	 eval precision@10: 0.0000
	 eval recall@10: 0.0000
	 eval ndcg@10: 0.0000


train: 100%|██████████| 3/3 [00:00<00:00, 78.89it/s]


Epoch 2 elapsed: 0.050s
	 [32mtrain_loss: 0.683[0m


eval_pointwise: 100%|██████████| 1/1 [00:00<00:00, 971.58it/s]
eval_listwise: 100%|██████████| 1/1 [00:00<00:00, 453.83it/s]


	 eval log_loss: 0.6930
	 eval roc_auc: 0.5468
	 eval precision@10: 0.0000
	 eval recall@10: 0.0000
	 eval ndcg@10: 0.0000


train: 100%|██████████| 3/3 [00:00<00:00, 60.59it/s]


Epoch 3 elapsed: 0.058s
	 [32mtrain_loss: 0.681[0m


eval_pointwise: 100%|██████████| 1/1 [00:00<00:00, 1432.97it/s]
eval_listwise: 100%|██████████| 1/1 [00:00<00:00, 187.54it/s]

	 eval log_loss: 0.6930
	 eval roc_auc: 0.5455
	 eval precision@10: 0.0091
	 eval recall@10: 0.0909
	 eval ndcg@10: 0.0324





In [None]:
# do final evaluation on test data
evaluate(
    model=lightgcn,
    data=test_data,
    neg_sampling=True,
    metrics=["loss", "roc_auc", "precision", "recall", "ndcg"],
)

eval_pointwise: 100%|██████████| 1/1 [00:00<00:00, 1413.65it/s]
eval_listwise: 100%|██████████| 1/1 [00:00<00:00, 338.66it/s]


{'loss': 0.6932151478249067,
 'roc_auc': 0.4225895316804407,
 'precision': 0.018181818181818184,
 'recall': 0.18181818181818182,
 'ndcg': 0.05658165087738374}

### Predict item for user

In [None]:
# predict preference of user 0581702741	 to item 30
# lightgcn.predict(user="0581702741	", item=30)

# # recommend 7 items for user 0581702741
lightgcn.recommend_user(user="0581702741", n_rec=7)

# # cold-start prediction
# lightgcn.predict(user="ccc", item="not item", cold_start="average")
# # cold-start recommendation
# lightgcn.recommend_user(user="are we good?", n_rec=7, cold_start="popular")

{'0581702741': array([ 5,  1, 15, 37, 13, 28, 23])}

In [None]:
from libreco.data import random_split, DatasetPure, split_by_ratio_chrono, DatasetFeat
from libreco.algorithms import LightGCN  # pure data, algorithm LightGCN
from libreco.evaluation import evaluate

# split whole data into three folds for training, evaluating and testing
train_data, eval_data, test_data = random_split(data, multi_ratios=[0.8, 0.1, 0.1])

train_data, data_info = DatasetPure.build_trainset(train_data)
eval_data = DatasetPure.build_evalset(eval_data)
test_data = DatasetPure.build_testset(test_data)
print(data_info)  # n_users: 4658, n_items: 505, data density: 0.2000 %

lightgcn = LightGCN(
    task="ranking",
    data_info=data_info,
    loss_type="bpr",
    embed_size=16,
    n_epochs=3,
    lr=1e-3,
    batch_size=2048,
    num_neg=1,
    device="cuda",
)

# monitor metrics on eval data during training
lightgcn.fit(
    train_data,
    neg_sampling=True,
    verbose=2,
    eval_data=eval_data,
    metrics=["loss", "roc_auc", "precision", "recall", "ndcg"],
)


n_users: 100, n_items: 40, data density: 20.0000 %
Training start time: [35m2024-07-20 19:44:35[0m


train: 100%|██████████| 1/1 [00:00<00:00, 20.53it/s]


Epoch 1 elapsed: 0.061s
	 [32mtrain_loss: 0.6923[0m


eval_pointwise: 100%|██████████| 1/1 [00:00<00:00, 626.39it/s]
eval_listwise: 100%|██████████| 1/1 [00:00<00:00, 64.18it/s]

	 eval log_loss: 0.6930





	 eval roc_auc: 0.5393
	 eval precision@10: 0.0324
	 eval recall@10: 0.2284
	 eval ndcg@10: 0.1188


train: 100%|██████████| 1/1 [00:00<00:00, 80.93it/s]


Epoch 2 elapsed: 0.023s
	 [32mtrain_loss: 0.6923[0m


eval_pointwise: 100%|██████████| 1/1 [00:00<00:00, 1095.69it/s]
eval_listwise: 100%|██████████| 1/1 [00:00<00:00, 80.94it/s]


	 eval log_loss: 0.6930
	 eval roc_auc: 0.5406
	 eval precision@10: 0.0338
	 eval recall@10: 0.2333
	 eval ndcg@10: 0.1191


train: 100%|██████████| 1/1 [00:00<00:00, 133.18it/s]


Epoch 3 elapsed: 0.016s
	 [32mtrain_loss: 0.6923[0m


eval_pointwise: 100%|██████████| 1/1 [00:00<00:00, 2098.20it/s]
eval_listwise: 100%|██████████| 1/1 [00:00<00:00, 93.48it/s]


	 eval log_loss: 0.6930
	 eval roc_auc: 0.5429
	 eval precision@10: 0.0338
	 eval recall@10: 0.2333
	 eval ndcg@10: 0.1195
