In [2]:
import os
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import train_test_split


In [3]:
items_df = pd.read_csv("../data/cleaned_data/fashion_dataset.csv")
print("Total items:", len(items_df))
items_df.head()


Total items: 289222


Unnamed: 0,image_name,x_1,y_1,x_2,y_2,width,height,area,category_id,category_name,category_type,category_type_name,eval_status,positive_attributes,absolute_path,num_attributes
0,img/Sheer_Pleated-Front_Blouse/img_00000001.jpg,72,79,232,273,160,194,31040,3,Blouse,1,upper-body,train,"[717, 818]",../data\img/Sheer_Pleated-Front_Blouse/img_000...,2
1,img/Sheer_Pleated-Front_Blouse/img_00000002.jpg,67,59,155,161,88,102,8976,3,Blouse,1,upper-body,train,"[717, 818]",../data\img/Sheer_Pleated-Front_Blouse/img_000...,2
2,img/Sheer_Pleated-Front_Blouse/img_00000003.jpg,65,65,156,200,91,135,12285,3,Blouse,1,upper-body,val,"[141, 717, 837, 956]",../data\img/Sheer_Pleated-Front_Blouse/img_000...,4
3,img/Sheer_Pleated-Front_Blouse/img_00000004.jpg,51,62,167,182,116,120,13920,3,Blouse,1,upper-body,train,[716],../data\img/Sheer_Pleated-Front_Blouse/img_000...,1
4,img/Sheer_Pleated-Front_Blouse/img_00000005.jpg,46,88,166,262,120,174,20880,3,Blouse,1,upper-body,test,"[349, 405, 717, 810]",../data\img/Sheer_Pleated-Front_Blouse/img_000...,4


In [4]:
num_users = 500
num_items = len(items_df)
rng = np.random.default_rng(42)

rows = []
for u in range(num_users):
    # pick 20 distinct items per user
    picks = rng.choice(num_items, size=20, replace=False)
    for i in picks:
        rating = rng.integers(1, 6)       # ratings in [1,5]
        rows.append((u, i, rating))

inter_df = pd.DataFrame(rows, columns=["user_idx","item_idx","rating"])
print("Synthetic interactions:", inter_df.shape)
inter_df.head()


Synthetic interactions: (10000, 3)


Unnamed: 0,user_idx,item_idx,rating
0,0,223831,4
1,0,207506,1
2,0,58267,4
3,0,282163,4
4,0,242872,2


In [5]:
train_val, test_df = train_test_split(inter_df, test_size=0.2, random_state=42)
train_df, val_df   = train_test_split(train_val, test_size=0.25, random_state=42)

print("Train:", len(train_df), "Val:", len(val_df), "Test:", len(test_df))


Train: 6000 Val: 2000 Test: 2000


In [6]:
class CFDataset(Dataset):
    def __init__(self, df):
        self.users   = torch.tensor(df["user_idx"].values, dtype=torch.long)
        self.items   = torch.tensor(df["item_idx"].values, dtype=torch.long)
        self.ratings = torch.tensor(df["rating"].values,   dtype=torch.float32)

    def __len__(self):
        return len(self.ratings)

    def __getitem__(self, idx):
        return self.users[idx], self.items[idx], self.ratings[idx]


In [7]:
batch_size = 512
train_loader = DataLoader(CFDataset(train_df), batch_size=batch_size, shuffle=True)
val_loader   = DataLoader(CFDataset(val_df),   batch_size=batch_size, shuffle=False)
test_loader  = DataLoader(CFDataset(test_df),  batch_size=batch_size, shuffle=False)


In [8]:
class MFNN(nn.Module):
    def __init__(self, n_users, n_items, emb_size=32, hidden_layers=[64,32]):
        super().__init__()
        self.user_emb = nn.Embedding(n_users, emb_size)
        self.item_emb = nn.Embedding(n_items, emb_size)

        layers = []
        in_dim = emb_size * 2
        for h in hidden_layers:
            layers += [nn.Linear(in_dim, h), nn.ReLU()]
            in_dim = h
        layers.append(nn.Linear(in_dim, 1))
        self.mlp = nn.Sequential(*layers)

    def forward(self, u, i):
        u_e = self.user_emb(u)
        i_e = self.item_emb(i)
        x   = torch.cat([u_e, i_e], dim=-1)
        return self.mlp(x).squeeze()


In [21]:
device    = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model     = MFNN(num_users, num_items).to(device)
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=1e-3)
num_epochs = 20


In [22]:
for epoch in range(1, num_epochs+1):
    # train
    model.train()
    train_loss = 0
    for u,i,r in train_loader:
        u,i,r = u.to(device), i.to(device), r.to(device)
        pred  = model(u,i)
        loss  = criterion(pred, r)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        train_loss += loss.item() * r.size(0)
    train_loss /= len(train_loader.dataset)

    # validate
    model.eval()
    val_loss = 0
    with torch.no_grad():
        for u,i,r in val_loader:
            u,i,r = u.to(device), i.to(device), r.to(device)
            val_loss += criterion(model(u,i), r).item() * r.size(0)
    val_loss /= len(val_loader.dataset)

    print(f"Epoch {epoch}/{num_epochs}  "
          f"Train MSE: {train_loss:.4f}  "
          f"Val MSE: {val_loss:.4f}")


Epoch 1/20  Train MSE: 9.3513  Val MSE: 8.2427
Epoch 2/20  Train MSE: 7.2608  Val MSE: 5.9810
Epoch 3/20  Train MSE: 4.8711  Val MSE: 3.6491
Epoch 4/20  Train MSE: 2.8943  Val MSE: 2.3314
Epoch 5/20  Train MSE: 2.1919  Val MSE: 2.1758
Epoch 6/20  Train MSE: 2.0830  Val MSE: 2.0856
Epoch 7/20  Train MSE: 1.9670  Val MSE: 2.0588
Epoch 8/20  Train MSE: 1.9232  Val MSE: 2.0557
Epoch 9/20  Train MSE: 1.8810  Val MSE: 2.0533
Epoch 10/20  Train MSE: 1.8440  Val MSE: 2.0577
Epoch 11/20  Train MSE: 1.8100  Val MSE: 2.0586
Epoch 12/20  Train MSE: 1.7768  Val MSE: 2.0626
Epoch 13/20  Train MSE: 1.7455  Val MSE: 2.0622
Epoch 14/20  Train MSE: 1.7145  Val MSE: 2.0650
Epoch 15/20  Train MSE: 1.6860  Val MSE: 2.0668
Epoch 16/20  Train MSE: 1.6565  Val MSE: 2.0731
Epoch 17/20  Train MSE: 1.6278  Val MSE: 2.0749
Epoch 18/20  Train MSE: 1.5991  Val MSE: 2.0790
Epoch 19/20  Train MSE: 1.5707  Val MSE: 2.0852
Epoch 20/20  Train MSE: 1.5436  Val MSE: 2.0888


In [23]:
import math

model.eval()
mse = 0
with torch.no_grad():
    for u,i,r in test_loader:
        u,i,r = u.to(device), i.to(device), r.to(device)
        mse   += criterion(model(u,i), r).item() * r.size(0)
mse  /= len(test_loader.dataset)
rmse = math.sqrt(mse)
print(f"Test RMSE: {rmse:.4f}")


Test RMSE: 1.4766


In [24]:
os.makedirs("../models", exist_ok=True)
torch.save(model.state_dict(), "../models/mfnn_cf.pth")
print("Saved model → ../models/mfnn_cf.pth")


Saved model → ../models/mfnn_cf.pth
