In [1]:
# Import AutoRec model and data utilities
from utils.model import AutoRec
from utils.autorecdata import AutoRecData
from utils.preprocessor import PreProcessor
# Import external libraries
import torch
import torch.utils.data as data
import torch.nn as nn
import torch.optim as optim
import pandas as pd
import numpy as np
import os
from tqdm.auto import tqdm

print("=" * 70)
print("Imports Successful")
print("=" * 70)

Imports Successful


  from .autonotebook import tqdm as notebook_tqdm


In [2]:
data_dir = os.path.join(os.path.dirname(os.getcwd()), '..', 'data', 'ml-1m')
data_path = os.path.join(data_dir, 'ratings.dat')

# Check if file exists
if not os.path.exists(data_path):
    raise FileNotFoundError(
        f"Data file not found at: {data_path}\n"
        f"Please ensure the MovieLens 1M dataset is downloaded and extracted."
    )

def load_ml_1m_data(data_path = data_path) -> pd.DataFrame:  
    print("=" * 70)
    print("Loading MovieLens 1M Dataset")
    print("=" * 70)
    print(f"Data path: {data_path}")
    return pd.read_csv(
        data_path,
        sep='::',
        header=None,
        names=['user_id', 'item_id', 'rating', 'timestamp'],
        engine='python',  # Explicitly use python engine to avoid warning
        dtype={
            'user_id': np.int32,
            'item_id': np.int32,
            'rating': np.float32,
            'timestamp': np.int32
        }
    )


# Load ratings data with proper engine to avoid warnings
print("\nLoading ratings data...")

ratings_df = load_ml_1m_data()

print(f"✓ Successfully loaded {len(ratings_df):,} ratings")


Loading ratings data...
Loading MovieLens 1M Dataset
Data path: /Users/abbas/Documents/Codes/thesis/NCF/src/../data/ml-1m/ratings.dat
✓ Successfully loaded 1,000,209 ratings


In [3]:
# Create PreProcessor instance
preprocessor = PreProcessor()

# Preprocess the data: split into train/test and create rating matrices
print("\nSplitting data into train/test sets and creating rating matrices...")
print("=" * 70)
train_mat, test_mat, num_users, num_items = preprocessor.preprocess_ml1m_data(
    ratings_df, 
    test_size=0.2,  # 20% for testing
    random_state=42  # For reproducibility
)

print(f"✓ Data preprocessing complete!")
print("=" * 70)


Splitting data into train/test sets and creating rating matrices...
✓ Data preprocessing complete!


In [4]:
train_set = AutoRecData(data=train_mat)
test_set = AutoRecData(data=test_mat)

In [5]:
train_loader = data.DataLoader(
    dataset=train_set,
    batch_size=32,
    shuffle=True,
    num_workers=4,
)
print('Train loader created')
print("=" * 70)
test_loader = data.DataLoader(
    dataset=test_set, batch_size=len(test_set), shuffle=False, num_workers=0
)
print('Test loader created')
print("=" * 70)

Train loader created
Test loader created


In [6]:
# Set device (CPU or CUDA if available)
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Using device: {device}")

model = AutoRec(
    num_users=num_users,
    num_items=num_items,
    num_hidden_units=500,
).to(device)

Using device: cpu


In [None]:
loss_f = nn.MSELoss()
optimizer = optim.Adam(
    model.parameters(),
    lr=0.001,
    weight_decay=0,
)

best_epoch, best_rmse = 0, np.inf
best_hr_at_10, best_ndcg_at_10 = 0.0, 0.0


print(model)

AutoRec(
  (encoder): Sequential(
    (0): Linear(in_features=3706, out_features=500, bias=True)
    (1): Sigmoid()
  )
  (decoder): Sequential(
    (0): Linear(in_features=500, out_features=3706, bias=True)
  )
)


In [None]:
from utils.helper import get_metrics, get_ranking_metrics
for epoch in tqdm(range(40)):
    model.train()
    for input_vec in train_loader:
        input_mask = (input_vec > 0).to(device)
        input_vec = input_vec.float().to(device)

        model.zero_grad()
        reconstruction = model(input_vec)
        loss = loss_f(reconstruction * input_mask, input_vec * input_mask)
        loss.backward()
        optimizer.step()

    model.eval()
    rmse = get_metrics(model=model, train_set=train_set, test_set=test_set, device=device)
    hr_at_10, ndcg_at_10 = get_ranking_metrics(
        model=model, 
        train_set=train_set, 
        test_set=test_set, 
        top_k=10, 
        device=device
    )

    print(f"[Epoch {epoch}]:: RMSE: {rmse:.6f}, HR@10: {hr_at_10:.6f}, NDCG@10: {ndcg_at_10:.6f}")

    print("=" * 70)

    if rmse < best_rmse:
        best_rmse, best_epoch = rmse, epoch
        best_hr_at_10, best_ndcg_at_10 = hr_at_10, ndcg_at_10
        # Save best model
        model_dir = os.path.join(os.path.dirname(os.getcwd()), '..', 'models')
        os.makedirs(model_dir, exist_ok=True)
        best_model_path = os.path.join(model_dir, 'AutoRec-best.pth')
        torch.save({
            'epoch': epoch,
            'model_state_dict': model.state_dict(),
            'optimizer_state_dict': optimizer.state_dict(),
            'rmse': rmse,
            'hr_at_10': hr_at_10,
            'ndcg_at_10': ndcg_at_10,
            'num_users': num_users,
            'num_items': num_items,
            'num_hidden_units': 500,
        }, best_model_path)

    print(f"Done. Best epoch {best_epoch}, best_rmse: {best_rmse:.6f}.")
    print(epoch)
    print("=" * 70)

# Save final model after training
print("\n" + "=" * 70)
print("Saving final model...")
print("=" * 70)
model_dir = os.path.join(os.path.dirname(os.getcwd()), '..', 'models')
os.makedirs(model_dir, exist_ok=True)
final_model_path = os.path.join(model_dir, 'AutoRec.pth')
torch.save({
    'epoch': epoch,
    'model_state_dict': model.state_dict(),
    'optimizer_state_dict': optimizer.state_dict(),
    'rmse': rmse,
    'hr_at_10': hr_at_10,
    'ndcg_at_10': ndcg_at_10,
    'num_users': num_users,
    'num_items': num_items,
    'num_hidden_units': 500,
    'best_epoch': best_epoch,
    'best_rmse': best_rmse,
    'best_hr_at_10': best_hr_at_10,
    'best_ndcg_at_10': best_ndcg_at_10,
}, final_model_path)
print(f"✓ Final model saved to: {final_model_path}")
print(f"✓ Best model (epoch {best_epoch}) saved to: {os.path.join(model_dir, 'AutoRec-best.pth')}")
print(f"\nBest metrics:")
print(f"  RMSE: {best_rmse:.6f}")
print(f"  HR@10: {best_hr_at_10:.6f}")
print(f"  NDCG@10: {best_ndcg_at_10:.6f}")
print("=" * 70)

  2%|▎         | 1/40 [00:31<20:30, 31.54s/it]

[Epoch 0]:: RMSE: 1.096847, HR@10: 0.001987, NDCG@10: 0.000153
Done. Best epoch 0 best_rmse: 1.096847.
0


  5%|▌         | 2/40 [01:01<19:26, 30.71s/it]

[Epoch 1]:: RMSE: 1.058839, HR@10: 0.006128, NDCG@10: 0.000503
Done. Best epoch 1 best_rmse: 1.058839.
1


  8%|▊         | 3/40 [01:32<19:02, 30.89s/it]

[Epoch 2]:: RMSE: 1.041107, HR@10: 0.003644, NDCG@10: 0.000334
Done. Best epoch 2 best_rmse: 1.041107.
2


 10%|█         | 4/40 [02:03<18:25, 30.71s/it]

[Epoch 3]:: RMSE: 1.020440, HR@10: 0.011262, NDCG@10: 0.000927
Done. Best epoch 3 best_rmse: 1.020440.
3


 12%|█▎        | 5/40 [02:33<17:55, 30.73s/it]

[Epoch 4]:: RMSE: 1.003844, HR@10: 0.060947, NDCG@10: 0.005589
Done. Best epoch 4 best_rmse: 1.003844.
4


 15%|█▌        | 6/40 [03:04<17:23, 30.69s/it]

[Epoch 5]:: RMSE: 1.003115, HR@10: 0.062935, NDCG@10: 0.005380
Done. Best epoch 5 best_rmse: 1.003115.
5


 18%|█▊        | 7/40 [03:35<16:50, 30.62s/it]

[Epoch 6]:: RMSE: 0.995417, HR@10: 0.130672, NDCG@10: 0.012251
Done. Best epoch 6 best_rmse: 0.995417.
6


 20%|██        | 8/40 [04:06<16:23, 30.73s/it]

[Epoch 7]:: RMSE: 0.992294, HR@10: 0.088937, NDCG@10: 0.007985
Done. Best epoch 7 best_rmse: 0.992294.
7


 22%|██▎       | 9/40 [04:36<15:46, 30.54s/it]

[Epoch 8]:: RMSE: 0.977391, HR@10: 0.113779, NDCG@10: 0.011780
Done. Best epoch 8 best_rmse: 0.977391.
8


 25%|██▌       | 10/40 [05:07<15:22, 30.76s/it]

[Epoch 9]:: RMSE: 0.977834, HR@10: 0.025505, NDCG@10: 0.002207
Done. Best epoch 9 best_rmse: 0.977391.
9


 28%|██▊       | 11/40 [05:37<14:50, 30.70s/it]

[Epoch 10]:: RMSE: 0.967007, HR@10: 0.094899, NDCG@10: 0.008892
Done. Best epoch 10 best_rmse: 0.967007.
10


 30%|███       | 12/40 [06:11<14:44, 31.58s/it]

[Epoch 11]:: RMSE: 0.962501, HR@10: 0.024180, NDCG@10: 0.001926
Done. Best epoch 11 best_rmse: 0.962501.
11


 32%|███▎      | 13/40 [06:42<14:10, 31.51s/it]

[Epoch 12]:: RMSE: 0.961690, HR@10: 0.027493, NDCG@10: 0.002104
Done. Best epoch 12 best_rmse: 0.961690.
12


 35%|███▌      | 14/40 [07:13<13:31, 31.20s/it]

[Epoch 13]:: RMSE: 0.957936, HR@10: 0.018880, NDCG@10: 0.001527
Done. Best epoch 13 best_rmse: 0.957936.
13


 38%|███▊      | 15/40 [07:44<13:00, 31.21s/it]

[Epoch 14]:: RMSE: 0.957433, HR@10: 0.071216, NDCG@10: 0.005965
Done. Best epoch 14 best_rmse: 0.957433.
14


 40%|████      | 16/40 [08:16<12:36, 31.50s/it]

[Epoch 15]:: RMSE: 0.954139, HR@10: 0.084962, NDCG@10: 0.009020
Done. Best epoch 15 best_rmse: 0.954139.
15


 42%|████▎     | 17/40 [08:49<12:11, 31.81s/it]

[Epoch 16]:: RMSE: 0.953895, HR@10: 0.075356, NDCG@10: 0.006693
Done. Best epoch 16 best_rmse: 0.953895.
16


 45%|████▌     | 18/40 [09:22<11:48, 32.21s/it]

[Epoch 17]:: RMSE: 0.955881, HR@10: 0.071712, NDCG@10: 0.007906
Done. Best epoch 17 best_rmse: 0.953895.
17


 48%|████▊     | 19/40 [09:55<11:22, 32.52s/it]

[Epoch 18]:: RMSE: 0.954256, HR@10: 0.076184, NDCG@10: 0.008127
Done. Best epoch 18 best_rmse: 0.953895.
18


 50%|█████     | 20/40 [10:28<10:49, 32.50s/it]

[Epoch 19]:: RMSE: 0.955802, HR@10: 0.077675, NDCG@10: 0.007827
Done. Best epoch 19 best_rmse: 0.953895.
19


 52%|█████▎    | 21/40 [10:59<10:10, 32.11s/it]

[Epoch 20]:: RMSE: 0.956356, HR@10: 0.074528, NDCG@10: 0.007641
Done. Best epoch 20 best_rmse: 0.953895.
20


 55%|█████▌    | 22/40 [11:30<09:32, 31.80s/it]

[Epoch 21]:: RMSE: 0.957956, HR@10: 0.074694, NDCG@10: 0.007218
Done. Best epoch 21 best_rmse: 0.953895.
21


 57%|█████▊    | 23/40 [12:01<08:57, 31.63s/it]

[Epoch 22]:: RMSE: 0.960470, HR@10: 0.088274, NDCG@10: 0.008840
Done. Best epoch 22 best_rmse: 0.953895.
22


 60%|██████    | 24/40 [12:33<08:27, 31.72s/it]

[Epoch 23]:: RMSE: 0.961164, HR@10: 0.073534, NDCG@10: 0.007693
Done. Best epoch 23 best_rmse: 0.953895.
23


 62%|██████▎   | 25/40 [13:06<08:03, 32.22s/it]

[Epoch 24]:: RMSE: 0.961629, HR@10: 0.082146, NDCG@10: 0.007577
Done. Best epoch 24 best_rmse: 0.953895.
24


 65%|██████▌   | 26/40 [13:40<07:34, 32.49s/it]

[Epoch 25]:: RMSE: 0.965246, HR@10: 0.072044, NDCG@10: 0.007405
Done. Best epoch 25 best_rmse: 0.953895.
25


 68%|██████▊   | 27/40 [14:14<07:09, 33.00s/it]

[Epoch 26]:: RMSE: 0.967782, HR@10: 0.087281, NDCG@10: 0.009043
Done. Best epoch 26 best_rmse: 0.953895.
26


 70%|███████   | 28/40 [14:46<06:31, 32.66s/it]

[Epoch 27]:: RMSE: 0.968991, HR@10: 0.108148, NDCG@10: 0.012260
Done. Best epoch 27 best_rmse: 0.953895.
27


 72%|███████▎  | 29/40 [15:17<05:54, 32.23s/it]

[Epoch 28]:: RMSE: 0.970085, HR@10: 0.102352, NDCG@10: 0.012475
Done. Best epoch 28 best_rmse: 0.953895.
28


 75%|███████▌  | 30/40 [15:48<05:18, 31.83s/it]

[Epoch 29]:: RMSE: 0.970482, HR@10: 0.129016, NDCG@10: 0.016404
Done. Best epoch 29 best_rmse: 0.953895.
29


 78%|███████▊  | 31/40 [16:18<04:43, 31.47s/it]

[Epoch 30]:: RMSE: 0.971803, HR@10: 0.089765, NDCG@10: 0.009110
Done. Best epoch 30 best_rmse: 0.953895.
30


 80%|████████  | 32/40 [16:50<04:12, 31.57s/it]

[Epoch 31]:: RMSE: 0.974468, HR@10: 0.094071, NDCG@10: 0.010266
Done. Best epoch 31 best_rmse: 0.953895.
31


 82%|████████▎ | 33/40 [17:27<03:51, 33.10s/it]

[Epoch 32]:: RMSE: 0.977396, HR@10: 0.090759, NDCG@10: 0.010251
Done. Best epoch 32 best_rmse: 0.953895.
32


 85%|████████▌ | 34/40 [18:00<03:19, 33.24s/it]

[Epoch 33]:: RMSE: 0.978467, HR@10: 0.093077, NDCG@10: 0.009915
Done. Best epoch 33 best_rmse: 0.953895.
33


 88%|████████▊ | 35/40 [18:33<02:44, 32.89s/it]

[Epoch 34]:: RMSE: 0.978754, HR@10: 0.108480, NDCG@10: 0.011278
Done. Best epoch 34 best_rmse: 0.953895.
34


 90%|█████████ | 36/40 [19:05<02:11, 32.88s/it]

[Epoch 35]:: RMSE: 0.981376, HR@10: 0.133488, NDCG@10: 0.017146
Done. Best epoch 35 best_rmse: 0.953895.
35


 92%|█████████▎| 37/40 [19:37<01:37, 32.61s/it]

[Epoch 36]:: RMSE: 0.980207, HR@10: 0.129182, NDCG@10: 0.014294
Done. Best epoch 36 best_rmse: 0.953895.
36


 95%|█████████▌| 38/40 [20:09<01:04, 32.45s/it]

[Epoch 37]:: RMSE: 0.984201, HR@10: 0.145412, NDCG@10: 0.018827
Done. Best epoch 37 best_rmse: 0.953895.
37


 98%|█████████▊| 39/40 [20:41<00:32, 32.28s/it]

[Epoch 38]:: RMSE: 0.983071, HR@10: 0.138125, NDCG@10: 0.017024
Done. Best epoch 38 best_rmse: 0.953895.
38


100%|██████████| 40/40 [21:13<00:00, 31.84s/it]

[Epoch 39]:: RMSE: 0.984097, HR@10: 0.116429, NDCG@10: 0.013393
Done. Best epoch 39 best_rmse: 0.953895.
39





In [None]:
# Example: Load saved model for inference
# Uncomment and run this cell to load a saved model

# from utils.model import AutoRec
# import torch
# 
# # Load checkpoint
# model_dir = os.path.join(os.path.dirname(os.getcwd()), '..', 'models')
# checkpoint_path = os.path.join(model_dir, 'AutoRec.pth')  # or 'AutoRec-best.pth' for best model
# 
# checkpoint = torch.load(checkpoint_path, map_location='cpu')
# 
# # Recreate model with saved parameters
# loaded_model = AutoRec(
#     num_users=checkpoint['num_users'],
#     num_items=checkpoint['num_items'],
#     num_hidden_units=checkpoint['num_hidden_units']
# )
# 
# # Load model weights
# loaded_model.load_state_dict(checkpoint['model_state_dict'])
# loaded_model.eval()
# 
# print(f"Model loaded from: {checkpoint_path}")
# print(f"Epoch: {checkpoint['epoch']}")
# print(f"RMSE: {checkpoint['rmse']:.6f}")
# print(f"HR@10: {checkpoint['hr_at_10']:.6f}")
# print(f"NDCG@10: {checkpoint['ndcg_at_10']:.6f}")
# 
# # Now you can use loaded_model for inference
