# 1) Importing Necessary Modules

In [9]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import sqlite3
import warnings
warnings.filterwarnings("ignore")
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics.pairwise import cosine_similarity

# 2) Loading The Dataset

In [2]:
cursor = sqlite3.connect(r'../../database/laptrack.db')

laptop_df = pd.read_sql_query("SELECT * FROM Laptop_Phase_2_2", cursor)

cursor.close()

laptop_df.head()

Unnamed: 0,Brand,Laptop_Model_Name,Laptop_Model_Number,Processor_Brand,Processor_Model,Storage_Type,Operating_System,Display_Resolution,Extracted_Rating,Battery_Life(Hours_Upto),...,Stock,Time_Of_Extraction,URL,Source,Storage_Capacity(GB),Display_Size(Inches),RAM(GB),No_Of_Reviews,Laptop_Dimensions,Laptop_Weight(Pounds)
0,Dell,Latitude 3540 Laptop,,Intel,1355U,SSD,Windows 11 Professional,1920x1080 MP,,,...,1,2024-11-04 18:23:39,https://www.amazon.com/sspa/click?ie=UTF8&spc=...,Amazon,4000,15.6,64,0,0.71 x 14.13 x 9.44 inches,4.0
1,HP,17t-cn3004,17t-cn3004208,Intel,Intel Core i5,SSD,Windows 11 Pro,1600x900 Pixels,5.0,,...,1,2024-11-04 18:23:42,https://www.amazon.com/sspa/click?ie=UTF8&spc=...,Amazon,4096,17.3,64,0,0.81 x 10.15 x 15.78 inches,5.0
2,Dell,"Dell Inspiron 15.6"" Touchscreen Laptop",,Intel,1355U,SSD,Windows 11 Pro,1920x1080 Pixels,,,...,1,2024-11-04 18:23:45,https://www.amazon.com/sspa/click?ie=UTF8&spc=...,Amazon,4096,15.6,32,0,9.25 x 0.75 x 14.11 inches,3.6
3,Apple,MacBook Pro,Mvvm2ll/a,Intel,Core i9,SSD,Mac OS X 10.0 Cheetah,2560 x 1600 Pixels,4.0,11.0,...,1,2024-11-04 18:23:54,https://www.amazon.com/2019-Apple-MacBook-16-i...,Amazon,2048,16.0,16,0,15.63 x 2.40 x 11.14 inches,5.68
4,HP,TPN-Q279,TPN-Q279,AMD,Ryzen 5,SSD,Windows 11 Home,1920 x 1080 Pixels,3.8,,...,1,2024-11-04 18:23:57,https://www.amazon.com/HP-i7-1355U-i5-14400F-G...,Amazon,2048,15.6,16,0,0.93 x 10.04 x 14.09 inches,7.39


In [8]:
laptop_df.columns

Index(['Brand', 'Laptop_Model_Name', 'Laptop_Model_Number', 'Processor_Brand',
       'Processor_Model', 'Storage_Type', 'Operating_System',
       'Display_Resolution', 'Extracted_Rating', 'Battery_Life(Hours_Upto)',
       'Price', 'Stock', 'Time_Of_Extraction', 'URL', 'Source',
       'Storage_Capacity(GB)', 'Display_Size(Inches)', 'RAM(GB)',
       'No_Of_Reviews', 'Laptop_Dimensions', 'Laptop_Weight(Pounds)'],
      dtype='object')

In [3]:
laptop_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4196 entries, 0 to 4195
Data columns (total 21 columns):
 #   Column                    Non-Null Count  Dtype  
---  ------                    --------------  -----  
 0   Brand                     4196 non-null   object 
 1   Laptop_Model_Name         4167 non-null   object 
 2   Laptop_Model_Number       3796 non-null   object 
 3   Processor_Brand           4166 non-null   object 
 4   Processor_Model           4196 non-null   object 
 5   Storage_Type              4196 non-null   object 
 6   Operating_System          4190 non-null   object 
 7   Display_Resolution        4169 non-null   object 
 8   Extracted_Rating          3490 non-null   float64
 9   Battery_Life(Hours_Upto)  1145 non-null   float64
 10  Price                     4196 non-null   float64
 11  Stock                     4196 non-null   int64  
 12  Time_Of_Extraction        4196 non-null   object 
 13  URL                       4196 non-null   object 
 14  Source  

In [6]:
laptop_df.describe(include="all").T

Unnamed: 0,count,unique,top,freq,mean,std,min,25%,50%,75%,max
Brand,4196.0,97.0,HP,994.0,,,,,,,
Laptop_Model_Name,4167.0,2464.0,HP,61.0,,,,,,,
Laptop_Model_Number,3796.0,2561.0,A325-42,43.0,,,,,,,
Processor_Brand,4166.0,9.0,Intel,3309.0,,,,,,,
Processor_Model,4196.0,299.0,Core i5,508.0,,,,,,,
Storage_Type,4196.0,9.0,SSD,4047.0,,,,,,,
Operating_System,4190.0,73.0,Windows 11 Home,1649.0,,,,,,,
Display_Resolution,4169.0,215.0,1920 x 1080 Pixels,608.0,,,,,,,
Extracted_Rating,3490.0,,,,4.236304,0.532802,1.0,4.0,4.3,4.5,5.0
Battery_Life(Hours_Upto),1145.0,,,,12.033013,25.875707,0.01,8.0,10.0,13.0,850.0


# 3) Building Content Based Recommendation System

In [10]:
# Defining the Recommender Class
class LaptopRecommender:
    def __init__(self, df):
        self.df = df.copy()
        self.similarity_matrix = None
        self.processed_features = None
        
    def preprocess_data(self):
        """Preprocess the data for recommendation system"""
        # Extract numerical value from display resolution
        self.df['resolution_pixels'] = self.df['Display_Resolution'].str.extract('(\d+)').astype(float)
        
        # Convert processor brands and models to categorical
        self.df['Processor_Brand'] = pd.Categorical(self.df['Processor_Brand']).codes
        self.df['Processor_Model'] = pd.Categorical(self.df['Processor_Model']).codes
        self.df['Brand'] = pd.Categorical(self.df['Brand']).codes
        self.df['Storage_Type'] = pd.Categorical(self.df['Storage_Type']).codes
        
        # Select features for similarity calculation
        features = [
            'Brand', 'Processor_Brand', 'Processor_Model', 'Storage_Type',
            'Storage_Capacity(GB)', 'RAM(GB)', 'Price', 'Display_Size(Inches)',
            'resolution_pixels', 'Laptop_Weight(Pounds)', 'Extracted_Rating'
        ]
        
        # Handle missing values
        feature_df = self.df[features].copy()
        feature_df = feature_df.fillna(feature_df.mean())
        
        # Scale features
        scaler = MinMaxScaler()
        self.processed_features = pd.DataFrame(
            scaler.fit_transform(feature_df),
            columns=features
        )
        
        # Calculate similarity matrix
        self.similarity_matrix = cosine_similarity(self.processed_features)
        
    def get_recommendations(self, laptop_index, n_recommendations=5):
        """Get n similar laptop recommendations for a given laptop index"""
        if self.similarity_matrix is None:
            self.preprocess_data()
            
        # Get similarity scores
        sim_scores = list(enumerate(self.similarity_matrix[laptop_index]))
        sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)
        
        # Get top N most similar laptops (excluding itself)
        sim_scores = sim_scores[1:n_recommendations+1]
        laptop_indices = [i[0] for i in sim_scores]
        
        # Get relevant information for recommended laptops
        recommendations = self.df.iloc[laptop_indices][
            ['Brand', 'Laptop_Model_Name', 'Processor_Model', 
             'RAM(GB)', 'Storage_Capacity(GB)', 'Price', 'Extracted_Rating']
        ].copy()
        
        # Add similarity scores
        recommendations['Similarity_Score'] = [i[1] for i in sim_scores]
        
        return recommendations
    
    def get_recommendations_by_features(self, features_dict, n_recommendations=5):
        """Get recommendations based on user-specified features"""
        if self.similarity_matrix is None:
            self.preprocess_data()
            
        # Create a feature vector for the specified features
        query_features = self.processed_features.mean().to_dict()  # Default values
        
        # Map categorical features
        if 'Brand' in features_dict:
            brand_mapping = dict(enumerate(self.df['Brand'].unique()))
            brand_mapping = {v: k for k, v in brand_mapping.items()}
            features_dict['Brand'] = brand_mapping.get(features_dict['Brand'], 0)
            
        # Update with user-specified features
        for feature, value in features_dict.items():
            if feature in query_features:
                query_features[feature] = value
                
        # Convert to DataFrame
        query_df = pd.DataFrame([query_features])
        
        # Calculate similarity with all laptops
        similarities = cosine_similarity(query_df, self.processed_features)[0]
        
        # Get top N recommendations
        top_indices = similarities.argsort()[-n_recommendations:][::-1]
        
        # Get relevant information for recommended laptops
        recommendations = self.df.iloc[top_indices][
            ['Brand', 'Laptop_Model_Name', 'Processor_Model', 
             'RAM(GB)', 'Storage_Capacity(GB)', 'Price', 'Extracted_Rating']
        ].copy()
        
        # Add similarity scores
        recommendations['Similarity_Score'] = similarities[top_indices]
        
        return recommendations

In [11]:
def format_recommendations(recommendations):
    """Format recommendations for display"""
    recommendations['Price'] = recommendations['Price'].map('${:,.2f}'.format)
    recommendations['Similarity_Score'] = recommendations['Similarity_Score'].map('{:.2%}'.format)
    return recommendations.reset_index(drop=True)

In [13]:
# Initialize the recommender
recommender = LaptopRecommender(laptop_df)

# Get recommendations similar to a specific laptop (e.g., index 0)
similar_laptops = recommender.get_recommendations(0)
format_recommendations(similar_laptops)

Unnamed: 0,Brand,Laptop_Model_Name,Processor_Model,RAM(GB),Storage_Capacity(GB),Price,Extracted_Rating,Similarity_Score
0,23,Latitude 3540 Laptop,14,64,4000,"$1,772.95",,100.00%
1,23,Latitude 3540 Laptop,14,64,4000,"$1,772.95",,100.00%
2,23,Latitude 3540 Laptop,14,64,4000,"$1,772.95",,100.00%
3,23,Latitude 3540 Laptop,14,64,4000,"$1,772.95",,100.00%
4,23,Latitude 3540 Laptop,14,64,4000,"$1,772.95",,100.00%


In [14]:
# Get recommendations based on specific features
features = {
    'Brand': 'Dell',
    'Price': 1000,
    'RAM(GB)': 16,
    'Storage_Capacity(GB)': 512
}
feature_based_recommendations = recommender.get_recommendations_by_features(features)
format_recommendations(feature_based_recommendations)

Unnamed: 0,Brand,Laptop_Model_Name,Processor_Model,RAM(GB),Storage_Capacity(GB),Price,Extracted_Rating,Similarity_Score
0,50,ThinkPad P15,259,128,16384,"$6,099.99",3.1,61.70%
1,38,Envy 16t-h1000,126,4,16384,"$3,429.99",,50.64%
2,52,Titan 18 HX A14VIG-036US,214,128,8192,"$5,496.00",4.5,49.10%
3,11,MacBook Pro,250,128,4096,"$5,259.00",,46.99%
4,50,Lenovo ThinkPad P16 Gen 2,214,128,8192,"$5,000.00",5.0,46.67%


This recommendation system has several key features:

- Content-based filtering using multiple relevant laptop features
- Handles both categorical and numerical data
- Provides recommendations in two ways:
- Similar laptops to an existing laptop
- Recommendations based on user-specified features
- Includes proper preprocessing and scaling of features
- Returns similarity scores with recommendations