In [13]:
import os
import sys

current_dir = os.getcwd()

# Walk up the directory tree until we find 'src'
path = current_dir
src_path = None

while True:
    if os.path.basename(path) == "src":
        src_path = path
        break
    parent = os.path.dirname(path)
    if parent == path:  # reached filesystem root
        break
    path = parent

# Add src to sys.path if found
if src_path and src_path not in sys.path:
    sys.path.insert(0, src_path)

# Import AutoRec model and data utilities
from utils.model import AutoRec
from utils.autorecdata import AutoRecData
from utils.preprocessor import PreProcessor
from helpers.data_downloader import download_ml1m_dataset
# Import external libraries
import torch
import torch.utils.data as data
import torch.nn as nn
import torch.optim as optim
import pandas as pd
import numpy as np
import os
from tqdm.auto import tqdm
print("=" * 70)
print("Imports Successful")
print("=" * 70)

Imports Successful


In [14]:
data_dir = os.path.join(os.path.dirname(os.getcwd()), '..', 'data')
data_path = os.path.join(data_dir,'ml-1m', 'ratings.dat')
print(data_path)
print(data_dir)
# Check if file exists
if not os.path.exists(data_path):
    download_ml1m_dataset(data_dir=data_dir)

def load_ml_1m_data(data_path = data_path) -> pd.DataFrame:  
    print("=" * 70)
    print("Loading MovieLens 1M Dataset")
    print("=" * 70)
    print(f"Data path: {data_path}")
    return pd.read_csv(
        data_path,
        sep='::',
        header=None,
        names=['user_id', 'item_id', 'rating', 'timestamp'],
        engine='python',  # Explicitly use python engine to avoid warning
        dtype={
            'user_id': np.int32,
            'item_id': np.int32,
            'rating': np.float32,
            'timestamp': np.int32
        }
    )


# Load ratings data with proper engine to avoid warnings
print("\nLoading ratings data...")

ratings_df = load_ml_1m_data()

print(f"✓ Successfully loaded {len(ratings_df):,} ratings")
print("=" * 70)

/Users/abbas/Documents/Codes/thesis/recommender/src/../data/ml-1m/ratings.dat
/Users/abbas/Documents/Codes/thesis/recommender/src/../data

Loading ratings data...
Loading MovieLens 1M Dataset
Data path: /Users/abbas/Documents/Codes/thesis/recommender/src/../data/ml-1m/ratings.dat
✓ Successfully loaded 1,000,209 ratings


In [15]:
# Create PreProcessor instance
preprocessor = PreProcessor()

# Preprocess the data: split into train/test and create rating matrices
print("\nSplitting data into train/test sets and creating rating matrices...")
print("=" * 70)
train_mat, test_mat, num_users, num_items = preprocessor.preprocess_ml1m_data(
    ratings_df, 
    test_size=0.2,  # 20% for testing
    random_state=42  # For reproducibility
)

print(f"✓ Data preprocessing complete!")
print("=" * 70)


Splitting data into train/test sets and creating rating matrices...
✓ Data preprocessing complete!


In [16]:
train_set = AutoRecData(data=train_mat)
test_set = AutoRecData(data=test_mat)

In [17]:
train_loader = data.DataLoader(
    dataset=train_set,
    batch_size=256,
    shuffle=True,
    num_workers=0,
)
print('Train loader created')
print("=" * 70)
test_loader = data.DataLoader(
    dataset=test_set, batch_size=len(test_set), shuffle=False, num_workers=0
)
print('Test loader created')
print("=" * 70)

Train loader created
Test loader created


In [18]:
# Set device (CPU or CUDA if available)
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Using device: {device}")

model = AutoRec(
    num_users=num_users,
    num_items=num_items,
    num_hidden_units=500,
).to(device)

Using device: cpu


In [19]:
loss_f = nn.MSELoss()
optimizer = optim.Adam(
    model.parameters(),
    lr=0.001,
    weight_decay=0.01,
)

best_epoch, best_rmse = 0, np.inf
best_hr_at_10, best_ndcg_at_10 = 0.0, 0.0


print(model)

AutoRec(
  (encoder): Sequential(
    (0): Linear(in_features=3706, out_features=500, bias=True)
    (1): Sigmoid()
  )
  (decoder): Sequential(
    (0): Linear(in_features=500, out_features=3706, bias=True)
  )
)


In [20]:
from utils.helper import get_metrics, get_ranking_metrics
for epoch in tqdm(range(40)):
    model.train()
    for input_vec in train_loader:
        input_mask = (input_vec > 0).to(device)
        input_vec = input_vec.float().to(device)

        model.zero_grad()
        reconstruction = model(input_vec)
        loss = loss_f(reconstruction * input_mask, input_vec * input_mask)
        loss.backward()
        optimizer.step()

    model.eval()
    rmse = get_metrics(model=model, train_set=train_set, test_set=test_set, device=device)
    hr_at_10, ndcg_at_10 = get_ranking_metrics(
        model=model, 
        train_set=train_set, 
        test_set=test_set, 
        top_k=10, 
        device=device
    )

    print(f"[Epoch {epoch}]:: RMSE: {rmse:.6f}, HR@10: {hr_at_10:.6f}, NDCG@10: {ndcg_at_10:.6f}")

    print("=" * 70)

    if rmse < best_rmse:
        best_rmse, best_epoch = rmse, epoch
        best_hr_at_10, best_ndcg_at_10 = hr_at_10, ndcg_at_10
        # Save best model
        print(f"Saving best model at epoch {epoch}")
        print(f"Best epoch: {best_epoch}, best_rmse: {best_rmse:.6f}")
        print("=" * 70)
        print(f"HR@10: {hr_at_10:.6f}, NDCG@10: {ndcg_at_10:.6f}")

    print(f"Done. Best epoch {best_epoch}, best_rmse: {best_rmse:.6f}.")
    print(epoch)
    print("=" * 70)


  2%|▎         | 1/40 [00:01<00:55,  1.41s/it]

[Epoch 0]:: RMSE: 2.466145, HR@10: 0.653031, NDCG@10: 0.190714
Saving best model at epoch 0
Best epoch: 0, best_rmse: 2.466145
HR@10: 0.653031, NDCG@10: 0.190714
Done. Best epoch 0, best_rmse: 2.466145.
0


  5%|▌         | 2/40 [00:02<00:50,  1.34s/it]

[Epoch 1]:: RMSE: 2.284595, HR@10: 0.655515, NDCG@10: 0.187925
Saving best model at epoch 1
Best epoch: 1, best_rmse: 2.284595
HR@10: 0.655515, NDCG@10: 0.187925
Done. Best epoch 1, best_rmse: 2.284595.
1


  8%|▊         | 3/40 [00:03<00:48,  1.31s/it]

[Epoch 2]:: RMSE: 2.264551, HR@10: 0.648228, NDCG@10: 0.184216
Saving best model at epoch 2
Best epoch: 2, best_rmse: 2.264551
HR@10: 0.648228, NDCG@10: 0.184216
Done. Best epoch 2, best_rmse: 2.264551.
2


 10%|█         | 4/40 [00:05<00:46,  1.30s/it]

[Epoch 3]:: RMSE: 2.246852, HR@10: 0.652037, NDCG@10: 0.185599
Saving best model at epoch 3
Best epoch: 3, best_rmse: 2.246852
HR@10: 0.652037, NDCG@10: 0.185599
Done. Best epoch 3, best_rmse: 2.246852.
3


 12%|█▎        | 5/40 [00:06<00:45,  1.30s/it]

[Epoch 4]:: RMSE: 2.265607, HR@10: 0.651209, NDCG@10: 0.182499
Done. Best epoch 3, best_rmse: 2.246852.
4


 15%|█▌        | 6/40 [00:07<00:44,  1.31s/it]

[Epoch 5]:: RMSE: 2.266955, HR@10: 0.643756, NDCG@10: 0.182147
Done. Best epoch 3, best_rmse: 2.246852.
5


 18%|█▊        | 7/40 [00:09<00:43,  1.31s/it]

[Epoch 6]:: RMSE: 2.276916, HR@10: 0.656178, NDCG@10: 0.186589
Done. Best epoch 3, best_rmse: 2.246852.
6


 20%|██        | 8/40 [00:10<00:41,  1.31s/it]

[Epoch 7]:: RMSE: 2.275064, HR@10: 0.648394, NDCG@10: 0.181284
Done. Best epoch 3, best_rmse: 2.246852.
7


 22%|██▎       | 9/40 [00:11<00:40,  1.30s/it]

[Epoch 8]:: RMSE: 2.284742, HR@10: 0.649387, NDCG@10: 0.184526
Done. Best epoch 3, best_rmse: 2.246852.
8


 25%|██▌       | 10/40 [00:13<00:38,  1.30s/it]

[Epoch 9]:: RMSE: 2.286881, HR@10: 0.646406, NDCG@10: 0.184604
Done. Best epoch 3, best_rmse: 2.246852.
9


 28%|██▊       | 11/40 [00:14<00:38,  1.34s/it]

[Epoch 10]:: RMSE: 2.284777, HR@10: 0.644253, NDCG@10: 0.181318
Done. Best epoch 3, best_rmse: 2.246852.
10


 30%|███       | 12/40 [00:15<00:37,  1.35s/it]

[Epoch 11]:: RMSE: 2.234593, HR@10: 0.646075, NDCG@10: 0.182296
Saving best model at epoch 11
Best epoch: 11, best_rmse: 2.234593
HR@10: 0.646075, NDCG@10: 0.182296
Done. Best epoch 11, best_rmse: 2.234593.
11


 32%|███▎      | 13/40 [00:17<00:37,  1.38s/it]

[Epoch 12]:: RMSE: 2.242713, HR@10: 0.650215, NDCG@10: 0.184013
Done. Best epoch 11, best_rmse: 2.234593.
12


 35%|███▌      | 14/40 [00:18<00:36,  1.39s/it]

[Epoch 13]:: RMSE: 2.250595, HR@10: 0.653031, NDCG@10: 0.184572
Done. Best epoch 11, best_rmse: 2.234593.
13


 38%|███▊      | 15/40 [00:20<00:35,  1.41s/it]

[Epoch 14]:: RMSE: 2.260312, HR@10: 0.657668, NDCG@10: 0.191369
Done. Best epoch 11, best_rmse: 2.234593.
14


 40%|████      | 16/40 [00:21<00:33,  1.40s/it]

[Epoch 15]:: RMSE: 2.276869, HR@10: 0.646572, NDCG@10: 0.181666
Done. Best epoch 11, best_rmse: 2.234593.
15


 42%|████▎     | 17/40 [00:23<00:32,  1.42s/it]

[Epoch 16]:: RMSE: 2.260778, HR@10: 0.649884, NDCG@10: 0.185940
Done. Best epoch 11, best_rmse: 2.234593.
16


 45%|████▌     | 18/40 [00:24<00:32,  1.49s/it]

[Epoch 17]:: RMSE: 2.280761, HR@10: 0.649718, NDCG@10: 0.183836
Done. Best epoch 11, best_rmse: 2.234593.
17


 48%|████▊     | 19/40 [00:26<00:30,  1.47s/it]

[Epoch 18]:: RMSE: 2.279555, HR@10: 0.643425, NDCG@10: 0.181779
Done. Best epoch 11, best_rmse: 2.234593.
18


 50%|█████     | 20/40 [00:27<00:28,  1.43s/it]

[Epoch 19]:: RMSE: 2.266348, HR@10: 0.649718, NDCG@10: 0.182219
Done. Best epoch 11, best_rmse: 2.234593.
19


 52%|█████▎    | 21/40 [00:28<00:27,  1.44s/it]

[Epoch 20]:: RMSE: 2.296949, HR@10: 0.644419, NDCG@10: 0.182953
Done. Best epoch 11, best_rmse: 2.234593.
20


 55%|█████▌    | 22/40 [00:30<00:25,  1.41s/it]

[Epoch 21]:: RMSE: 2.256595, HR@10: 0.649718, NDCG@10: 0.182651
Done. Best epoch 11, best_rmse: 2.234593.
21


 57%|█████▊    | 23/40 [00:31<00:23,  1.40s/it]

[Epoch 22]:: RMSE: 2.284018, HR@10: 0.640278, NDCG@10: 0.181019
Done. Best epoch 11, best_rmse: 2.234593.
22


 60%|██████    | 24/40 [00:32<00:21,  1.35s/it]

[Epoch 23]:: RMSE: 2.266742, HR@10: 0.624876, NDCG@10: 0.176533
Done. Best epoch 11, best_rmse: 2.234593.
23


 62%|██████▎   | 25/40 [00:34<00:19,  1.33s/it]

[Epoch 24]:: RMSE: 2.247797, HR@10: 0.653031, NDCG@10: 0.180839
Done. Best epoch 11, best_rmse: 2.234593.
24


 65%|██████▌   | 26/40 [00:35<00:18,  1.31s/it]

[Epoch 25]:: RMSE: 2.259249, HR@10: 0.643922, NDCG@10: 0.183942
Done. Best epoch 11, best_rmse: 2.234593.
25


 68%|██████▊   | 27/40 [00:36<00:16,  1.29s/it]

[Epoch 26]:: RMSE: 2.266426, HR@10: 0.654853, NDCG@10: 0.184973
Done. Best epoch 11, best_rmse: 2.234593.
26


 70%|███████   | 28/40 [00:37<00:15,  1.29s/it]

[Epoch 27]:: RMSE: 2.260797, HR@10: 0.642431, NDCG@10: 0.182773
Done. Best epoch 11, best_rmse: 2.234593.
27


 72%|███████▎  | 29/40 [00:39<00:14,  1.30s/it]

[Epoch 28]:: RMSE: 2.282547, HR@10: 0.642100, NDCG@10: 0.175574
Done. Best epoch 11, best_rmse: 2.234593.
28


 75%|███████▌  | 30/40 [00:40<00:12,  1.29s/it]

[Epoch 29]:: RMSE: 2.281355, HR@10: 0.654687, NDCG@10: 0.187782
Done. Best epoch 11, best_rmse: 2.234593.
29


 78%|███████▊  | 31/40 [00:41<00:11,  1.29s/it]

[Epoch 30]:: RMSE: 2.223268, HR@10: 0.643259, NDCG@10: 0.178749
Saving best model at epoch 30
Best epoch: 30, best_rmse: 2.223268
HR@10: 0.643259, NDCG@10: 0.178749
Done. Best epoch 30, best_rmse: 2.223268.
30


 80%|████████  | 32/40 [00:43<00:10,  1.28s/it]

[Epoch 31]:: RMSE: 2.288813, HR@10: 0.647234, NDCG@10: 0.179209
Done. Best epoch 30, best_rmse: 2.223268.
31


 82%|████████▎ | 33/40 [00:44<00:08,  1.27s/it]

[Epoch 32]:: RMSE: 2.263995, HR@10: 0.663133, NDCG@10: 0.190075
Done. Best epoch 30, best_rmse: 2.223268.
32


 85%|████████▌ | 34/40 [00:45<00:07,  1.27s/it]

[Epoch 33]:: RMSE: 2.288017, HR@10: 0.631004, NDCG@10: 0.173959
Done. Best epoch 30, best_rmse: 2.223268.
33


 88%|████████▊ | 35/40 [00:46<00:06,  1.27s/it]

[Epoch 34]:: RMSE: 2.267496, HR@10: 0.660980, NDCG@10: 0.190571
Done. Best epoch 30, best_rmse: 2.223268.
34


 90%|█████████ | 36/40 [00:48<00:05,  1.30s/it]

[Epoch 35]:: RMSE: 2.284183, HR@10: 0.653362, NDCG@10: 0.184700
Done. Best epoch 30, best_rmse: 2.223268.
35


 92%|█████████▎| 37/40 [00:49<00:04,  1.37s/it]

[Epoch 36]:: RMSE: 2.267484, HR@10: 0.657999, NDCG@10: 0.189776
Done. Best epoch 30, best_rmse: 2.223268.
36


 95%|█████████▌| 38/40 [00:51<00:03,  1.56s/it]

[Epoch 37]:: RMSE: 2.281683, HR@10: 0.653196, NDCG@10: 0.180621
Done. Best epoch 30, best_rmse: 2.223268.
37


 98%|█████████▊| 39/40 [00:53<00:01,  1.53s/it]

[Epoch 38]:: RMSE: 2.246792, HR@10: 0.645412, NDCG@10: 0.178022
Done. Best epoch 30, best_rmse: 2.223268.
38


100%|██████████| 40/40 [00:54<00:00,  1.36s/it]

[Epoch 39]:: RMSE: 2.275295, HR@10: 0.653859, NDCG@10: 0.184137
Done. Best epoch 30, best_rmse: 2.223268.
39





In [None]:
# Example: Load saved model for inference
# Uncomment and run this cell to load a saved model

# from utils.model import AutoRec
# import torch
# 
# # Load checkpoint
# model_dir = os.path.join(os.path.dirname(os.getcwd()), '..', 'models')
# checkpoint_path = os.path.join(model_dir, 'AutoRec.pth')  # or 'AutoRec-best.pth' for best model
# 
# checkpoint = torch.load(checkpoint_path, map_location='cpu')
# 
# # Recreate model with saved parameters
# loaded_model = AutoRec(
#     num_users=checkpoint['num_users'],
#     num_items=checkpoint['num_items'],
#     num_hidden_units=checkpoint['num_hidden_units']
# )
# 
# # Load model weights
# loaded_model.load_state_dict(checkpoint['model_state_dict'])
# loaded_model.eval()
# 
# print(f"Model loaded from: {checkpoint_path}")
# print(f"Epoch: {checkpoint['epoch']}")
# print(f"RMSE: {checkpoint['rmse']:.6f}")
# print(f"HR@10: {checkpoint['hr_at_10']:.6f}")
# print(f"NDCG@10: {checkpoint['ndcg_at_10']:.6f}")
# 
# # Now you can use loaded_model for inference
