#### Install Package

In [None]:
import torch
!pip install -q torch-scatter~=2.1.0 torch-sparse~=0.6.16 torch-cluster~=1.6.0 torch-spline-conv~=1.2.1 torch-geometric==2.2.0 -f https://data.pyg.org/whl/torch-{torch.__version__}.html

torch.manual_seed(0)
torch.cuda.manual_seed(0)
torch.cuda.manual_seed_all(0)
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False

#### Mount Drive

In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
project_path = "/content/drive/MyDrive/PORTFOLIO/DS/Rec-Sepatu/src"
%cd $project_path

/content/drive/MyDrive/PORTFOLIO/DS/Rec-Sepatu/src


In [3]:
# check all file in project path
%ls "$project_path"

data_preprocessing.py  get_pyg_data.py    load.py              train_test.py
EDA.ipynb              LightGCN.ipynb     [0m[01;34m__pycache__[0m/         transform.py
evaluator.py           lightgcn_model.py  recommendations.csv


In [4]:
import sys
sys.path.append(project_path)

In [None]:
import os
os.makedirs("../results", exist_ok=True)

#### Check Data

In [5]:
from data_preprocessing import TrainTestGenerator

data_path = "../data/load/full_review.csv"
generator = TrainTestGenerator(data_path)
df = generator.prepare_data()
df.head()

Unnamed: 0,user_id,product_id,rating,timestamp
0,227174004,26505545956,5,2024
1,168705704,26505545956,5,2024
2,270972543,26505545956,5,2024
3,362783875,26505545956,5,2024
4,728242817,26505545956,5,2025


#### Data Descriptive

In [6]:
# 1. Total review
total_reviews = len(df)

# 2. Unique user
unique_users = df['user_id'].nunique()

# 3. Unique product
unique_products = df['product_id'].nunique()

# 4. Top 3 user with most review
top_users = df['user_id'].value_counts().head(3)

# 5. Top 3 product with most review
top_products = df['product_id'].value_counts().head(3)

# Tampilkan hasil
print("Total Reviews:", total_reviews)
print("Unique Users:", unique_users)
print("Unique Products:", unique_products)
print("\nTop 3 Users with Most Reviews:\n", top_users)
print("\nTop 3 Products with Most Reviews:\n", top_products)

Total Reviews: 11105
Unique Users: 10285
Unique Products: 59

Top 3 Users with Most Reviews:
 user_id
1272355370    6
319695615     5
89589272      4
Name: count, dtype: int64

Top 3 Products with Most Reviews:
 product_id
22339951760    797
23722869496    753
20670027211    668
Name: count, dtype: int64


In [9]:
# Check data that make 2 review
user_with_multiple_reviews = df.groupby('user_id').filter(lambda x: len(x) >= 2)

# Get random 1 user
random_user_id = user_with_multiple_reviews['user_id'].sample(1, random_state=42).values[0]

# Get all product that user review
user_reviews = df[df['user_id'] == random_user_id][['product_id', 'rating']]

# Show result
print("Random User ID:", random_user_id)
print("Products Reviewed:")
print(user_reviews)

Random User ID: 284317847
Products Reviewed:
       product_id  rating
6323  16298235394       5
7136  22382982788       5


#### Load Function for LightGCN

In [None]:
from functools import partial
from lightgcn_model import LightGCNStack
from evaluator import Evaluator
from get_pyg_data import load_bipartitedata
from train_test import train, test
import torch
import pandas as pd

#### Main Function LightGCN

In [None]:
class objectview:
    def __init__(self, d):
        self.__dict__ = d

# Wrapper LightGCN
class LightGCN_recommender:
    def __init__(self, args):
        self.args = objectview(args)
        self.model = LightGCNStack(latent_dim=64, args=self.args).to('cuda')
        self.a_rev_dict = None
        self.u_rev_dict = None
        self.a_dict = None
        self.u_dict = None

    def fit(self, data: pd.DataFrame, test_year: int = None):
        from time import time

        self.default_recommendation = data["product_id"].value_counts().index.tolist()
        data, self.u_rev_dict, self.a_rev_dict, self.u_dict, self.a_dict = load_bipartitedata(data)
        data = data.to("cuda")
        self.model.init_data(data)
        self.optimizer = torch.optim.Adam(params=self.model.parameters(), lr=0.001)

        train_losses = []
        val_scores = []

        print(f"⚙️  Evaluation on: {test_year}")
        start_train = time()

        best_val_perf = test_perf = 0
        for epoch in range(1, self.args.epochs + 1):
            train_loss = train(self.model, data, self.optimizer)
            val_perf, tmp_test_perf = test(self.model, (data, data))

            # ✅ Convert tensor to float
            train_losses.append(train_loss.item())
            val_scores.append(val_perf if isinstance(val_perf, float) else val_perf.item())

            if val_perf > best_val_perf:
                best_val_perf = val_perf
                test_perf = tmp_test_perf

            print(f"Epoch {epoch:2d} | Loss: {train_loss:.4f} | Val: {val_perf:.4f} | Test: {tmp_test_perf:.4f}")

        end_train = time()
        print(f"🕒 Time taken (train+val): {end_train - start_train:.2f} seconds\n")

        return train_losses, val_scores

    def recommend(self, user_id, n):
        try:
            recommendations = self.model.topN(self.u_dict[str(user_id)], n=n)
        except KeyError:
            recommendations = self.default_recommendation
        else:
            recommendations = recommendations.indices.cpu().tolist()
            recommendations = list(map(lambda x: self.a_rev_dict[x], recommendations))
        return recommendations

#### Model Parameter Setup

In [None]:
args = {
    'model_type': 'LightGCN',
    'num_layers': 3,
    'batch_size': 32,
    'hidden_dim': 32,
    'dropout': 0.2,
    'epochs': 20,
    'opt': 'adam',
    'opt_scheduler': 'none',
    'opt_restart': 0,
    'weight_decay': 5e-3,
    'lr': 0.1,
    'lambda_reg': 1e-4
}

data_path = "../data/load/full_review.csv"
generator = TrainTestGenerator(data_path)
evaluator = Evaluator(partial(LightGCN_recommender, args), generator)

#### Model Evaluation

In [None]:
evaluator.evaluate()
evaluator.save_results('../results/lightgcn.csv', '../results/lightgcn_time.csv')

⚙️  Evaluation on: 2024
Epoch  1 | Loss: 0.0002 | Val: 0.9992 | Test: 0.9992
Epoch  2 | Loss: 0.0002 | Val: 0.9992 | Test: 0.9992
Epoch  3 | Loss: 0.0002 | Val: 0.9996 | Test: 0.9996
Epoch  4 | Loss: 0.0001 | Val: 1.0000 | Test: 1.0000
Epoch  5 | Loss: 0.0001 | Val: 0.9992 | Test: 0.9992
Epoch  6 | Loss: 0.0001 | Val: 0.9992 | Test: 0.9992
Epoch  7 | Loss: 0.0001 | Val: 1.0000 | Test: 1.0000
Epoch  8 | Loss: 0.0001 | Val: 0.9992 | Test: 0.9992
Epoch  9 | Loss: 0.0001 | Val: 1.0000 | Test: 1.0000
Epoch 10 | Loss: 0.0001 | Val: 1.0000 | Test: 1.0000
🕒 Time taken (train+val): 8.12 seconds

⚙️  Evaluation on: 2025
Epoch  1 | Loss: 0.3017 | Val: 0.9981 | Test: 0.9981
Epoch  2 | Loss: 0.2866 | Val: 0.9981 | Test: 0.9981
Epoch  3 | Loss: 0.2722 | Val: 0.9982 | Test: 0.9982
Epoch  4 | Loss: 0.2584 | Val: 0.9985 | Test: 0.9985
Epoch  5 | Loss: 0.2453 | Val: 0.9981 | Test: 0.9981
Epoch  6 | Loss: 0.2327 | Val: 0.9987 | Test: 0.9987
Epoch  7 | Loss: 0.2207 | Val: 0.9984 | Test: 0.9984
Epoch  8 | 

  self.results = pd.concat(results).reset_index(drop=True)


#### Detail Evaluation Metric

In [None]:
print("HitRate@K:")
print(evaluator.get_hit_rates())

print("Recall@K:")
print(evaluator.get_recalls())

print("Precision@K:")
print(evaluator.get_precisions())

print("NDCG@K:")
print(evaluator.get_ndcgs())

print("MRR:")
print(evaluator.get_mrr())

HitRate@K:
      cases         5        10        20        50
2024   5580  0.305914  0.437993  0.505914  0.505914
2025   4227  0.151171  0.269695  0.464396  0.762952
Recall@K:
      cases         5        10        20        50
2024   5580  0.307527  0.460573  0.528674  0.528674
2025   4227  0.156139  0.302579  0.502484  0.840312
Precision@K:
      cases         5        10        20        50
2024   5580  0.061505  0.046057  0.026434  0.010573
2025   4227  0.031228  0.030258  0.025124  0.016806
NDCG@K:
      cases         5        10        20        50
2024   5580  0.178545  0.225776  0.244763  0.244763
2025   4227  0.110465  0.154527  0.206836  0.274183
MRR:
      cases       mrr
2024   2823  0.291873
2025   3225  0.157471


In [None]:
model = evaluator.get_model()

#### Sample Recommendation Result

In [None]:
user_id = "270972543"
model.recommend(user_id, n=5)

[np.int64(26505545956),
 np.int64(21571572350),
 np.int64(24560402793),
 np.int64(29510009219),
 np.int64(29400098684)]

In [None]:
product_data = pd.read_csv("../data/transform/910_product.csv")

# Drop rows with missing product_id
product_data = product_data.dropna(subset=["product_id"]).copy()

# Convert product_id from float to int, then to str (removes .0)
product_data["product_id"] = product_data["product_id"].astype(float).astype("int64").astype(str)

product_data.head()

Unnamed: 0,product_name,product_price,product_rating,product_sales,product_id
0,910 Nineten Haze Fuzz Lite Sepatu Lari Putih H...,645905,5,343,29653201231
1,910 Nineten Haze Fuzz Lite Sepatu Lari Hitam U...,645905,5,386,29653199814
2,910 Nineten Kishi Run Flow Sepatu Lari Putih B...,749900,5,353,27659612421
4,910Nineten TAKASHI RUN ELITE sepatu lari HITAM...,645905,5,137,25520925247
5,910Nineten TAKASHI RUN ELITE sepatu lari Hijau...,645905,5,231,27665748404


In [None]:
user_data = pd.read_csv("../data/transform/user_data.csv")
user_data.head()

Unnamed: 0,user_id,user_name
0,227174004,alno21
1,168705704,ziporamusidy80
2,270972543,rinnie_aning
3,362783875,rahman_arif2812
4,728242817,jecauisgian


In [None]:
import pandas as pd

# List all recommendations result
all_recommendations = []

for _, row in user_data.iterrows():
    user_id = str(row['user_id'])
    user_name = row['user_name']

    try:
        recommended_products = model.recommend(user_id, n=5)
        for rank, prod_id in enumerate(recommended_products, start=1):
            all_recommendations.append({
                'user_id': user_id,
                'user_name': user_name,
                'product_id': str(prod_id),
                'rank': rank
            })
    except KeyError:
        # User ID not found in model
        continue

# Create dataframe
recommendation_df = pd.DataFrame(all_recommendations)

# Join with product data
product_data['product_id'] = product_data['product_id'].astype(str)
recommendation_df = recommendation_df.merge(product_data[['product_id', 'product_name']], on='product_id', how='left')

# Result sampel
print(recommendation_df.head())

     user_id user_name   product_id  rank  \
0  227174004    alno21  26505545956     1   
1  227174004    alno21  26670973620     2   
2  227174004    alno21  16298235394     3   
3  227174004    alno21  25316145291     4   
4  227174004    alno21  23722869496     5   

                                        product_name  
0  910 Nineten Ekiden Hyperion Sepatu Lari Magent...  
1  910 Nineten Haze Infinity Sprint Sepatu Lari P...  
2  910 Nineten Irezumi Hover Sepatu Lari Hijau te...  
3  910nineten HAZE METAMOZAIC sepatu lari Biru Mu...  
4  910 Nineten Ultra Ekiden 1.0 Sepatu Lari Hitam...  


In [None]:
# Save data
recommendation_df.to_csv("../results/lightgcn_recommendation.csv", index=False)