In [1]:
from google.colab import files

uploaded = files.upload()

Saving laptops.csv to laptops (2).csv


In [2]:
# Cell 1: Install Dependencies
!pip install scikit-learn torch pandas numpy
# Cell 2: Import Libraries
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.preprocessing import MinMaxScaler

# Cell 3: Load Data (Replace with actual dataset path)
data = pd.read_csv('laptops.csv', encoding='ISO-8859-1')
data.dropna(inplace=True)



In [12]:
# Cell 4: Generate Synthetic User IDs
data['user_id'] = range(len(data))
data['laptop_id'] = range(len(data))


In [4]:
import re
from sklearn.preprocessing import LabelEncoder

# Extract numeric from RAM and Storage
def extract_numeric(value):
    numbers = re.findall(r'\d+', str(value))
    return int(numbers[0]) if numbers else 0

data['ram'] = data['ram'].apply(extract_numeric)
data['storage'] = data['storage'].apply(extract_numeric)
data['display(in inch)'] = pd.to_numeric(data['display(in inch)'], errors='coerce')

# Normalize numeric features
numeric_cols = ['price(in Rs.)','ram','storage','display(in inch)','no_of_ratings','no_of_reviews']
scaler = MinMaxScaler()
data[numeric_cols] = scaler.fit_transform(data[numeric_cols])

# Encode categorical features
for col in ['processor','os']:
    le = LabelEncoder()
    data[col] = le.fit_transform(data[col])

print(data.head())


   Unnamed: 0                                           img_link  \
0           0  https://rukminim1.flixcart.com/image/312/312/x...   
1           1  https://rukminim1.flixcart.com/image/312/312/x...   
2           2  https://rukminim1.flixcart.com/image/312/312/l...   
3           3  https://rukminim1.flixcart.com/image/312/312/x...   
4           4  https://rukminim1.flixcart.com/image/312/312/x...   

                                       name  price(in Rs.)  processor  \
0             Lenovo Intel Core i5 11th Gen       0.142684         26   
1            Lenovo V15 G2 Core i3 11th Gen       0.065301         22   
2      ASUS TUF Gaming F15 Core i5 10th Gen       0.103218         25   
3  ASUS VivoBook 15 (2022) Core i3 10th Gen       0.054645         21   
4                   Lenovo Athlon Dual Core       0.009107          1   

        ram  os   storage  display(in inch)  rating  no_of_ratings  \
0  0.428571  10  1.000000          0.170940     4.5       0.000839   
1  0.142857 

In [5]:
class MatrixFactorization(nn.Module):
    def __init__(self, num_users, num_items, embedding_dim=30):
        super().__init__()
        self.user_emb = nn.Embedding(num_users, embedding_dim)
        self.item_emb = nn.Embedding(num_items, embedding_dim)
        self.fc = nn.Linear(embedding_dim, 1)

    def forward(self, user, item):
        user_embedded = self.user_emb(user)
        item_embedded = self.item_emb(item)
        interaction = user_embedded * item_embedded
        return self.fc(interaction).squeeze()


In [6]:
class HybridLaptopRecommender(nn.Module):
    def __init__(self, num_users, num_items, num_features, embedding_dim=30):
        super().__init__()
        self.user_embedding = nn.Embedding(num_users, embedding_dim)
        self.item_embedding = nn.Embedding(num_items, embedding_dim)
        self.fc_features = nn.Linear(num_features, embedding_dim)
        self.fc = nn.Linear(embedding_dim, 1)

    def forward(self, user_ids, item_ids, features):
        user_embeds = self.user_embedding(user_ids)
        item_embeds = self.item_embedding(item_ids)
        feature_embeds = self.fc_features(features)
        interaction = user_embeds * (item_embeds + feature_embeds)
        return self.fc(interaction).squeeze()


In [20]:
# Reset index
data = data.reset_index(drop=True)

# Synthetic users
num_synthetic_users = 100
num_laptops = len(data)
interactions = []

for user_id in range(num_synthetic_users):
    rated_laptops = np.random.choice(num_laptops, size=np.random.randint(5, 10), replace=False)
    for laptop_id in rated_laptops:
        rating = data.iloc[laptop_id]['rating']
        interactions.append([user_id, laptop_id, rating])

sim_data = pd.DataFrame(interactions, columns=['user_id','laptop_id','rating'])

# Merge laptop features, drop original user_id and rating to avoid conflicts
laptop_features = data.drop(columns=['user_id','rating'])
sim_data = sim_data.merge(laptop_features, on='laptop_id', how='left')

# Now sim_data['rating'] exists safely


In [22]:
# --- 0. Reset index ---
data = data.reset_index(drop=True)

# --- 1. Define features ---
feature_cols = ['price(in Rs.)','ram','storage','display(in inch)',
                'no_of_ratings','no_of_reviews','processor','os']

# --- 2. Simulate multiple ratings per synthetic user ---
import numpy as np
num_synthetic_users = 100
num_laptops = len(data)
interactions = []

for user_id in range(num_synthetic_users):
    rated_laptops = np.random.choice(num_laptops, size=np.random.randint(5, 10), replace=False)
    for laptop_id in rated_laptops:
        rating = data.iloc[laptop_id]['rating']  # use iloc to avoid index issues
        interactions.append([user_id, laptop_id, rating])

sim_data = pd.DataFrame(interactions, columns=['user_id','laptop_id','rating'])

# --- 3. Merge laptop features safely ---
# Drop 'user_id' and 'rating' from original data to avoid conflicts
laptop_features = data.drop(columns=['user_id','rating'])
sim_data = sim_data.merge(laptop_features, on='laptop_id', how='left')

# --- 4. Define number of users/items/features ---
num_users = sim_data['user_id'].max() + 1
num_items = sim_data['laptop_id'].max() + 1
num_features = len(feature_cols)

# --- 5. Prepare PyTorch tensors ---
import torch
user_tensor = torch.tensor(sim_data['user_id'].values, dtype=torch.long)
item_tensor = torch.tensor(sim_data['laptop_id'].values, dtype=torch.long)
feature_tensor = torch.tensor(sim_data[feature_cols].values, dtype=torch.float)
rating_tensor = torch.tensor(sim_data['rating'].values, dtype=torch.float)

# --- 6. Define the Hybrid Recommender Model ---
import torch.nn as nn

class HybridLaptopRecommender(nn.Module):
    def __init__(self, num_users, num_items, num_features, embedding_dim=30):
        super().__init__()
        self.user_embedding = nn.Embedding(num_users, embedding_dim)
        self.item_embedding = nn.Embedding(num_items, embedding_dim)
        self.fc_features = nn.Linear(num_features, embedding_dim)
        self.fc = nn.Linear(embedding_dim, 1)

    def forward(self, user_ids, item_ids, features):
        user_embeds = self.user_embedding(user_ids)
        item_embeds = self.item_embedding(item_ids)
        feature_embeds = self.fc_features(features)
        interaction = user_embeds * (item_embeds + feature_embeds)
        return self.fc(interaction).squeeze()

# --- 7. Initialize model, loss, optimizer ---
model = HybridLaptopRecommender(num_users, num_items, num_features, embedding_dim=30)
criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.005)

# --- 8. Mini-batch training loop ---
batch_size = 64
num_epochs = 100

for epoch in range(num_epochs):
    model.train()
    permutation = torch.randperm(len(user_tensor))

    epoch_loss = 0
    for i in range(0, len(user_tensor), batch_size):
        idx = permutation[i:i+batch_size]
        batch_users = user_tensor[idx]
        batch_items = item_tensor[idx]
        batch_features = feature_tensor[idx]
        batch_ratings = rating_tensor[idx]

        optimizer.zero_grad()
        predictions = model(batch_users, batch_items, batch_features)
        loss = criterion(predictions, batch_ratings)
        loss.backward()
        optimizer.step()

        epoch_loss += loss.item() * len(idx)

    if (epoch + 1) % 10 == 0:
        avg_loss = epoch_loss / len(user_tensor)
        print(f'Epoch {epoch+1}, Loss: {avg_loss:.4f}')


Epoch 10, Loss: 2.8698
Epoch 20, Loss: 0.9687
Epoch 30, Loss: 0.4232
Epoch 40, Loss: 0.1995
Epoch 50, Loss: 0.0762
Epoch 60, Loss: 0.0341
Epoch 70, Loss: 0.0193
Epoch 80, Loss: 0.0080
Epoch 90, Loss: 0.0038
Epoch 100, Loss: 0.0022


In [26]:
import torch
import torch.nn.functional as F
import numpy as np

def recommend_laptops(target_name, top_n=5, price_thresh=0.2, ram_thresh=1, storage_thresh=1, rating_weight=2.0):
    """
    Recommend laptops similar to target_name.
    Combines:
    - Hybrid embeddings (user + item + features)
    - Feature-aware filtering
    - Re-ranking by specs
    - Boosting high-rated laptops
    """
    if target_name not in data['name'].values:
        return "Laptop not found."

    model.eval()
    device = next(model.parameters()).device  # CPU or GPU

    # --- Target row ---
    target_row = data[data['name'] == target_name].iloc[0]
    target_idx = target_row.name
    target_feature = torch.tensor(target_row[feature_cols].astype(float).values, dtype=torch.float, device=device)
    target_price = target_row['price(in Rs.)']
    target_ram = target_row['ram']
    target_storage = target_row['storage']

    # Make sure features tensor matches embedding size
    all_features = torch.tensor(data[feature_cols].values, dtype=torch.float, device=device)

    # Slice features so that each row corresponds to the item index
    all_features = all_features[:num_items]  # ensures same number of rows as item embeddings

    all_item_embeds = model.item_embedding.weight
    all_feature_embeds = model.fc_features(all_features)
    target_embed = all_item_embeds[target_idx] + 0.5 * all_feature_embeds[target_idx]

    similarities = F.cosine_similarity(target_embed.unsqueeze(0), all_item_embeds + 0.5 * all_feature_embeds)


    # --- Top candidate indices ---
    top_candidates = torch.topk(similarities, top_n*5).indices.tolist()
    top_candidates = [i for i in top_candidates if i != target_idx]

    # --- Feature-aware filtering ---
    filtered_candidates = []
    for idx in top_candidates:
        row = data.iloc[idx]
        price_diff = abs(row['price(in Rs.)'] - target_price)
        ram_diff = abs(row['ram'] - target_ram)
        storage_diff = abs(row['storage'] - target_storage)
        if price_diff <= price_thresh and ram_diff <= ram_thresh and storage_diff <= storage_thresh:
            filtered_candidates.append(idx)
        if len(filtered_candidates) >= top_n*2:  # allow extra for reranking
            break

    # --- Re-rank by specs + rating ---
    target_vector = np.array([target_price, target_ram, target_storage])
    candidate_vectors = data.iloc[filtered_candidates][['price(in Rs.)','ram','storage']].values.astype(float)
    candidate_vectors = candidate_vectors / np.linalg.norm(candidate_vectors, axis=1, keepdims=True)
    target_vector = target_vector / np.linalg.norm(target_vector)

    # Cosine similarity on specs
    spec_sims = candidate_vectors @ target_vector
    # Boost by rating
    ratings = data.iloc[filtered_candidates]['rating'].values
    combined_scores = spec_sims + rating_weight * ratings  # rating_weight adjusts influence
    top_sorted_indices = [filtered_candidates[i] for i in combined_scores.argsort()[::-1][:top_n]]

    return data.iloc[top_sorted_indices][['name','price(in Rs.)','ram','storage','display(in inch)','rating']]


In [27]:
recommend_laptops("Lenovo Intel Core i5 11th Gen", top_n=5)


Unnamed: 0,name,price(in Rs.),ram,storage,display(in inch),rating
604,Lenovo Intel Core i5 11th Gen,0.142684,0.428571,1.0,0.17094,4.5
107,Lenovo Intel Core i5 11th Gen,0.142684,0.428571,1.0,0.17094,4.5
355,Lenovo Intel Core i5 11th Gen,0.142684,0.428571,1.0,0.17094,4.5
236,Lenovo Intel Core i5 11th Gen,0.142684,0.428571,1.0,0.17094,4.5
286,Lenovo Intel Core i5 11th Gen,0.112022,0.428571,1.0,0.17094,4.5


In [35]:
from sklearn.model_selection import train_test_split

train_df, test_df = train_test_split(sim_data, test_size=0.2, random_state=42)



In [37]:
# Features
train_features = torch.tensor(train_df[feature_cols].astype(float).values, dtype=torch.float)
test_features = torch.tensor(test_df[feature_cols].astype(float).values, dtype=torch.float)

# User & item IDs
train_users = torch.tensor(train_df['user_id'].values, dtype=torch.long)
train_items = torch.tensor(train_df['laptop_id'].values, dtype=torch.long)
train_ratings = torch.tensor(train_df['rating'].values, dtype=torch.float)

test_users = torch.tensor(test_df['user_id'].values, dtype=torch.long)
test_items = torch.tensor(test_df['laptop_id'].values, dtype=torch.long)
test_ratings = torch.tensor(test_df['rating'].values, dtype=torch.float)
test_features = torch.tensor(test_df[feature_cols].values, dtype=torch.float)


In [38]:
device = next(model.parameters()).device
train_users = train_users.to(device)
train_items = train_items.to(device)
train_features = train_features.to(device)
train_ratings = train_ratings.to(device)

test_users = test_users.to(device)
test_items = test_items.to(device)
test_features = test_features.to(device)
test_ratings = test_ratings.to(device)


In [39]:
import torch.nn.functional as F

# Make sure the model is in evaluation mode
model.eval()

with torch.no_grad():
    # Predictions for test set
    predictions = model(test_users, test_items, test_features)

    # Compute RMSE
    rmse = torch.sqrt(F.mse_loss(predictions, test_ratings))

    # Compute MAE
    mae = F.l1_loss(predictions, test_ratings)

print(f"Test RMSE: {rmse.item():.4f}")
print(f"Test MAE: {mae.item():.4f}")


Test RMSE: 0.0638
Test MAE: 0.0281


In [42]:
import torch
import torch.nn.functional as F
import numpy as np

def topk_metrics(model, test_users, test_items, test_features, test_ratings, k=5):
    """
    Compute Precision@k, Recall@k, and NDCG@k for all users in the test set.
    Handles users with fewer than k rated items.
    """
    model.eval()
    device = next(model.parameters()).device

    # Move tensors to device
    test_users = test_users.to(device)
    test_items = test_items.to(device)
    test_features = test_features.to(device)
    test_ratings = test_ratings.to(device)

    with torch.no_grad():
        # Predicted ratings
        preds = model(test_users, test_items, test_features)

    # Convert to numpy for convenience
    preds = preds.cpu().numpy()
    true = test_ratings.cpu().numpy()
    users = test_users.cpu().numpy()

    user_metrics = []
    unique_users = np.unique(users)

    for u in unique_users:
        idx = np.where(users == u)[0]
        if len(idx) == 0:
            continue

        user_true = true[idx]
        user_pred = preds[idx]

        # Top-k predicted indices (handle <k items)
        topk_len = min(k, len(user_pred))
        topk_idx = np.argsort(user_pred)[-topk_len:][::-1]

        # Items with rating >= 4 are considered relevant
        relevant = user_true >= 4.0
        topk_relevant = relevant[topk_idx]

        # Precision@k
        precision = topk_relevant.sum() / topk_len

        # Recall@k
        recall = topk_relevant.sum() / max(relevant.sum(), 1)

        # DCG / NDCG
        gains = 2 ** user_true[topk_idx] - 1
        discounts = np.log2(np.arange(2, topk_len + 2))
        dcg = np.sum(gains / discounts)

        # Ideal DCG
        ideal_len = min(k, len(user_true))
        ideal_idx = np.argsort(user_true)[-ideal_len:][::-1]
        ideal_gains = 2 ** user_true[ideal_idx] - 1
        ideal_discounts = np.log2(np.arange(2, ideal_len + 2))
        idcg = np.sum(ideal_gains / ideal_discounts)

        ndcg = dcg / idcg if idcg > 0 else 0.0

        user_metrics.append((precision, recall, ndcg))

    # Average across users
    precision_avg = np.mean([m[0] for m in user_metrics])
    recall_avg = np.mean([m[1] for m in user_metrics])
    ndcg_avg = np.mean([m[2] for m in user_metrics])

    print(f"Precision@{k}: {precision_avg:.4f}")
    print(f"Recall@{k}: {recall_avg:.4f}")
    print(f"NDCG@{k}: {ndcg_avg:.4f}")
topk_metrics(model, test_users, test_items, test_features, test_ratings, k=5)

Precision@5: 0.8950
Recall@5: 0.9610
NDCG@5: 1.0000


In [43]:
print("Train set metrics:")
topk_metrics(model, train_users, train_items, train_features, train_ratings, k=5)

print("Test set metrics:")
topk_metrics(model, test_users, test_items, test_features, test_ratings, k=5)


Train set metrics:
Precision@5: 0.9118
Recall@5: 0.8980
NDCG@5: 0.9990
Test set metrics:
Precision@5: 0.8950
Recall@5: 0.9610
NDCG@5: 1.0000


In [44]:
torch.save(model.state_dict(), "hybrid_laptop_model.pth")
import pickle

extra_info = {
    "feature_cols": feature_cols,
    "num_users": num_users,
    "num_items": num_items,
    "embedding_dim": 30
}

with open("model_info.pkl", "wb") as f:
    pickle.dump(extra_info, f)

print("Model and metadata saved!")

Model and metadata saved!
