# Recommend dishes to users with the LightGCN model

Predictions based on user features as well as food features and also based on the interaction between users and dishes

In [1]:
pip install -U LibRecommender

Collecting LibRecommender
  Downloading LibRecommender-1.5.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (2.1 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.1/2.1 MB[0m [31m18.9 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: LibRecommender
Successfully installed LibRecommender-1.5.1


In [2]:
# import essential libraries
import pandas as pd
import numpy as np

### Read dataset

In [21]:
# read dataset
data = pd.read_csv('./dataset.csv', encoding='utf-8', dtype={'user': str}, sep=",")

In [22]:
data.head()

Unnamed: 0,user,item,gender,birthday,item_name,item_price,label,category_id,category_name,time
0,717578507,33,Female,1992-07-23,Coca,15000,2,10,Nước ngọt,2023-11-05 06:44:06
1,807764389,24,Male,1981-09-21,Gỏi xoài khô mặn,57000,1,7,Món trộn,2023-10-04 11:42:14
2,220861620,36,Male,1988-08-27,Sữa chua trái cây,35000,1,11,Sữa,2024-01-24 04:16:00
3,317868593,23,Male,1974-11-06,Lẩu nhiệt đới,180000,3,6,Lẩu,2024-05-08 02:41:02
4,810933260,39,Male,1991-02-08,Trà chanh thái đỏ,39000,5,12,Trà,2023-08-10 12:53:39


In [23]:
data.columns

Index(['user', 'item', 'gender', 'birthday', 'item_name', 'item_price',
       'label', 'category_id', 'category_name', 'time'],
      dtype='object')

### Data Preprocessing
Convert birthday column to age

In [24]:
from datetime import datetime

def calculate_age(birthday):
    today = datetime.today()
    birth_date = pd.to_datetime(birthday)
    age = today.year - birth_date.year - ((today.month, today.day) < (birth_date.month, birth_date.day))
    return age

# Add age column into DataFrame
data['age'] = data['birthday'].apply(calculate_age)

# Remove birthday column
data = data.drop(columns=['birthday'])

data.head()

Unnamed: 0,user,item,gender,item_name,item_price,label,category_id,category_name,time,age
0,717578507,33,Female,Coca,15000,2,10,Nước ngọt,2023-11-05 06:44:06,31
1,807764389,24,Male,Gỏi xoài khô mặn,57000,1,7,Món trộn,2023-10-04 11:42:14,42
2,220861620,36,Male,Sữa chua trái cây,35000,1,11,Sữa,2024-01-24 04:16:00,35
3,317868593,23,Male,Lẩu nhiệt đới,180000,3,6,Lẩu,2024-05-08 02:41:02,49
4,810933260,39,Male,Trà chanh thái đỏ,39000,5,12,Trà,2023-08-10 12:53:39,33


In [None]:
# data.to_csv('dataset_after_cleaning.csv', index=False, encoding='utf-8-sig')

### Feature selection

In [25]:
featuresTrain = ['user', 'item', 'gender', 'age', 'item_price', 'label', 'category_id', 'time']

### Split dataset

In [26]:
data = data[featuresTrain]
data.head()

Unnamed: 0,user,item,gender,age,item_price,label,category_id,time
0,717578507,33,Female,31,15000,2,10,2023-11-05 06:44:06
1,807764389,24,Male,42,57000,1,7,2023-10-04 11:42:14
2,220861620,36,Male,35,35000,1,11,2024-01-24 04:16:00
3,317868593,23,Male,49,180000,3,6,2024-05-08 02:41:02
4,810933260,39,Male,33,39000,5,12,2023-08-10 12:53:39


In [27]:
from libreco.data import random_split, DatasetPure, split_by_ratio_chrono, DatasetFeat
from libreco.algorithms import LightGCN  # pure data, algorithm LightGCN
from libreco.evaluation import evaluate

In [28]:
# split whole data into three folds for training, evaluating and testing
train_data, eval_data, test_data = random_split(data, multi_ratios=[0.8, 0.1, 0.1])

train_data, data_info = DatasetPure.build_trainset(train_data)
eval_data = DatasetPure.build_evalset(eval_data)
test_data = DatasetPure.build_testset(test_data)
print(data_info)  # n_users: 4658, n_items: 505, data density: 0.2000 %

n_users: 200, n_items: 40, data density: 40.0000 %


In [29]:
print(train_data)
for i, (user, item, label) in enumerate(train_data):
    if i < 5:  # Print 5 first data columns
        print(train_data[i])
        #print(f"user: {user}, item: {item}, label: {label}")
    else:
        break
# print(test_data)

<libreco.data.transformed.TransformedSet object at 0x79a0eaede9e0>
(56, 9, 4.0)
(198, 11, 2.0)
(25, 24, 2.0)
(27, 37, 3.0)
(81, 5, 5.0)


### Training Model

In [30]:
# Config LightGCN Model
lightgcn = LightGCN(
    task="ranking",
    data_info=data_info,
    loss_type="bpr",
    embed_size=16,
    n_epochs=3,
    lr=1e-3,
    batch_size=2048,
    num_neg=1,
    device="cuda",
)

In [31]:
# monitor metrics on eval data during training
lightgcn.fit(
    train_data,
    neg_sampling=True,
    verbose=2,
    eval_data=eval_data,
    metrics=["loss", "roc_auc", "precision", "recall", "ndcg"],
)

Training start time: [35m2024-07-22 05:58:30[0m


train: 100%|██████████| 2/2 [00:00<00:00, 72.44it/s]


Epoch 1 elapsed: 0.037s
	 [32mtrain_loss: 0.6928[0m


eval_pointwise: 100%|██████████| 1/1 [00:00<00:00, 418.93it/s]
eval_listwise: 100%|██████████| 1/1 [00:00<00:00, 25.75it/s]


	 eval log_loss: 0.6932
	 eval roc_auc: 0.5035
	 eval precision@10: 0.0540
	 eval recall@10: 0.2537
	 eval ndcg@10: 0.2215


train: 100%|██████████| 2/2 [00:00<00:00, 64.12it/s]


Epoch 2 elapsed: 0.051s
	 [32mtrain_loss: 0.6928[0m


eval_pointwise: 100%|██████████| 1/1 [00:00<00:00, 1240.92it/s]
eval_listwise: 100%|██████████| 1/1 [00:00<00:00, 32.98it/s]


	 eval log_loss: 0.6932
	 eval roc_auc: 0.5040
	 eval precision@10: 0.0540
	 eval recall@10: 0.2537
	 eval ndcg@10: 0.2217


train: 100%|██████████| 2/2 [00:00<00:00, 66.48it/s]


Epoch 3 elapsed: 0.043s
	 [32mtrain_loss: 0.6928[0m


eval_pointwise: 100%|██████████| 1/1 [00:00<00:00, 1275.25it/s]
eval_listwise: 100%|██████████| 1/1 [00:00<00:00, 38.15it/s]


	 eval log_loss: 0.6932
	 eval roc_auc: 0.5042
	 eval precision@10: 0.0540
	 eval recall@10: 0.2523
	 eval ndcg@10: 0.2190


In [32]:
# do final evaluation on test data
evaluate(
    model=lightgcn,
    data=test_data,
    neg_sampling=True,
    metrics=["loss", "roc_auc", "precision", "recall", "ndcg"],
)

eval_pointwise: 100%|██████████| 1/1 [00:00<00:00, 1239.82it/s]
eval_listwise: 100%|██████████| 1/1 [00:00<00:00, 47.01it/s]


{'loss': 0.6931811877036071,
 'roc_auc': 0.49049062499999996,
 'precision': 0.06331360946745562,
 'recall': 0.27712031558185407,
 'ndcg': 0.25316569516753273}

### Predict item for user

In [34]:
# predict preference of user 0717578507 to item 30
lightgcn.predict(user="0717578507	", item=30)

# # recommend 7 items for user 0717578507
lightgcn.recommend_user(user="0717578507", n_rec=7)

# # cold-start prediction
lightgcn.predict(user="0717578507", item="not item", cold_start="average")
# # cold-start recommendation
lightgcn.recommend_user(user="0717578507", n_rec=7, cold_start="popular")

[31mDetect 1 unknown interaction(s), position: [0][0m
[31mDetect 1 unknown interaction(s), position: [0][0m


{'0717578507': array([30, 13, 29, 39, 31,  6, 26])}