In [None]:
# Import libarries
!pip install ipywidgets
import pandas as pd
import numpy as np
import torch
import ipywidgets as widgets

from IPython.display import display
from flask import Flask, request, jsonify
from transformers import AutoTokenizer, AutoModel
from sklearn.metrics.pairwise import cosine_similarity




In [None]:
import pandas as pd

# Load the datasets
laptop_data = pd.read_csv('laptop_data.csv')

# Display the first few rows of each dataset to understand their structure
print("Laptop Data Overview:")
print(laptop_data.head())

# Basic statistics to understand numerical columns
print("\nLaptop Data Statistics:")
print(laptop_data.describe())

# Check for missing values
print("\nMissing Values in Laptop Data:")
print(laptop_data.isnull().sum())


Laptop Data Overview:
    Brand   Model Name Core CPU Manufacturer Clock Speed RAM Size  \
0    Dell     Inspiron   i5            Intel     2.4 GHz      8GB   
1     MSI         GL65   i7            Intel     2.6 GHz     16GB   
2      HP    EliteBook   i7            Intel     2.8 GHz     16GB   
3  Lenovo      IdeaPad   i3            Intel     2.1 GHz      8GB   
4    ASUS  ZenBook Pro   i9            Intel     3.1 GHz     64GB   

  Storage Type Display Type Display Size Graphics Processor Screen Resolution  \
0          SSD          LCD        15.6"          Intel UHD         1920x1080   
1      HDD+SSD          IPS        15.6"         NVIDIA GTX         1920x1080   
2          SSD          LED          14"          Intel UHD         1920x1080   
3          HDD           TN        15.6"          Intel UHD          1366x768   
4          SSD         OLED        15.6"         NVIDIA RTX         3840x2160   

           OS Laptop Weight    Special Features Warranty Average Battery Lif

In [None]:
laptop_data.head()

Unnamed: 0,Brand,Model Name,Core,CPU Manufacturer,Clock Speed,RAM Size,Storage Type,Display Type,Display Size,Graphics Processor,Screen Resolution,OS,Laptop Weight,Special Features,Warranty,Average Battery Life,Price,Description
0,Dell,Inspiron,i5,Intel,2.4 GHz,8GB,SSD,LCD,"15.6""",Intel UHD,1920x1080,Windows 10,2.5 kg,Backlit Keyboard,1 year,6 hours,35000,The Dell Inspiron is a versatile laptop that c...
1,MSI,GL65,i7,Intel,2.6 GHz,16GB,HDD+SSD,IPS,"15.6""",NVIDIA GTX,1920x1080,Windows 10,2.3 kg,RGB Keyboard,2 years,4 hours,55000,The MSI GL65 is a high-performance laptop desi...
2,HP,EliteBook,i7,Intel,2.8 GHz,16GB,SSD,LED,"14""",Intel UHD,1920x1080,Windows 11,1.5 kg,Fingerprint Sensor,3 years,8 hours,90000,The HP EliteBook is a premium laptop designed ...
3,Lenovo,IdeaPad,i3,Intel,2.1 GHz,8GB,HDD,TN,"15.6""",Intel UHD,1366x768,Windows 10,2.2 kg,Dolby Audio,1 year,5 hours,25000,The Lenovo IdeaPad is a versatile laptop that ...
4,ASUS,ZenBook Pro,i9,Intel,3.1 GHz,64GB,SSD,OLED,"15.6""",NVIDIA RTX,3840x2160,Windows 10,1.8 kg,NanoEdge Display,2 years,7 hours,200000,The ASUS ZenBook Pro is a high-end laptop that...


In [None]:
print(laptop_data.isnull().sum())

Brand                   0
Model Name              0
Core                    0
CPU Manufacturer        0
Clock Speed             0
RAM Size                0
Storage Type            0
Display Type            0
Display Size            0
Graphics Processor      0
Screen Resolution       0
OS                      0
Laptop Weight           0
Special Features        0
Warranty                0
Average Battery Life    0
Price                   0
Description             0
dtype: int64


In [None]:
print(laptop_data.dtypes)

Brand                   object
Model Name              object
Core                    object
CPU Manufacturer        object
Clock Speed             object
RAM Size                object
Storage Type            object
Display Type            object
Display Size            object
Graphics Processor      object
Screen Resolution       object
OS                      object
Laptop Weight           object
Special Features        object
Warranty                object
Average Battery Life    object
Price                   object
Description             object
dtype: object


## Data Processing

In [None]:
# Fill missing values with an empty string to avoid issues during tokenization
laptop_data.fillna('', inplace=True)

# Combine the product name and description into a single text column for both datasets
laptop_data['text'] = laptop_data['Brand'] + ' ' + laptop_data['Model Name'] + ' ' + laptop_data['Description']

# Check the new 'text' columns
print("\nLaptop Data Text Feature:")
print(laptop_data['text'].head())



Laptop Data Text Feature:
0    Dell Inspiron The Dell Inspiron is a versatile...
1    MSI GL65 The MSI GL65 is a high-performance la...
2    HP EliteBook The HP EliteBook is a premium lap...
3    Lenovo IdeaPad The Lenovo IdeaPad is a versati...
4    ASUS ZenBook Pro The ASUS ZenBook Pro is a hig...
Name: text, dtype: object


## Feature Engineering with Hugging Face LLM

In [None]:

# Load the tokenizer and model from Hugging Face
tokenizer = AutoTokenizer.from_pretrained('sentence-transformers/all-MiniLM-L6-v2')
model = AutoModel.from_pretrained('sentence-transformers/all-MiniLM-L6-v2')

# Function to create embeddings for the text data
def get_embeddings(text):
    inputs = tokenizer(text, return_tensors="pt", padding=True, truncation=True, max_length=128)
    with torch.no_grad():
        outputs = model(**inputs)
    return outputs.last_hidden_state.mean(dim=1).numpy()

# Apply embedding generation to datasets
laptop_data['embeddings'] = laptop_data['text'].apply(lambda x: get_embeddings(x))


# Check the embeddings
print("\nLaptop Data Embeddings (first product):")
print(laptop_data['embeddings'].iloc[0])





Laptop Data Embeddings (first product):
[[-1.15857989e-01  1.64844379e-01 -2.00943891e-02  6.97854161e-02
   9.24687833e-02 -1.13538936e-01  2.84679537e-03  1.49879307e-01
   1.12662531e-01  2.06302553e-02  1.04948461e-01 -6.58793561e-03
   3.27110477e-03  7.23243803e-02  8.80477130e-02  6.34213239e-02
   1.76339149e-01 -2.23702937e-01  2.61930656e-02 -1.10580720e-01
  -1.28620118e-02 -5.88487722e-02 -1.52060211e-01  6.85454756e-02
   1.07151516e-01  1.07430629e-02  4.56405655e-02  9.47704073e-04
  -2.11198464e-01 -2.31197011e-02 -7.13143125e-02  8.36236775e-03
  -7.97853153e-03 -7.89451078e-02 -1.19757786e-01  2.79038921e-02
   1.15992635e-01 -8.35279524e-02  4.74919304e-02 -2.21115559e-01
  -2.75090914e-02 -2.55886950e-02  1.24985032e-01  5.36295176e-02
   1.76145852e-01 -1.94689080e-01 -8.08203444e-02  8.98369402e-02
  -7.68470466e-02 -3.82472351e-02 -8.62914622e-02  1.30736277e-01
  -4.54680584e-02  1.14184596e-01 -1.11450270e-01  9.27258283e-02
   9.50029641e-02 -9.73976869e-03 -

## Model Selection (Cosine Similarity)

In [None]:

# Compute cosine similarity between products in dataset
laptop_similarity = cosine_similarity(np.vstack(laptop_data['embeddings'].values))

# Function to get product recommendations based on cosine similarity
def recommend_products(similarity_matrix, data, product_idx, top_n=5):
    # Get similarity scores for the selected product
    similarity_scores = similarity_matrix[product_idx]

    # Sort the scores in descending order and return the top N recommendations
    similar_products = similarity_scores.argsort()[-top_n-1:-1][::-1]

    return data.iloc[similar_products]

# Recommendation for a laptop (e.g., index 0)
laptop_idx = 0
recommended_laptops = recommend_products(laptop_similarity, laptop_data, laptop_idx)
print("\nRecommended Laptops based on product at index {laptop_idx}:")
print(recommended_laptops['Brand'])




Recommended Laptops based on product at index {laptop_idx}:
10    Dell
8       HP
16    Dell
2       HP
17      HP
Name: Brand, dtype: object


## Building the Recommendation System, Evaluation and Testing

In [None]:
# Testubg recommendation for a laptop product with index
laptop_idx_test = 3
recommended_laptops_test = recommend_products(laptop_similarity, laptop_data, laptop_idx_test)
print("\nTest Recommended Laptops based on product at index {laptop_idx_test}:")
print(recommended_laptops_test['Brand'])



Test Recommended Laptops based on product at index {laptop_idx_test}:
7        Lenovo
11       Lenovo
2            HP
0          Dell
6     Microsoft
Name: Brand, dtype: object


## Interactive Widgets with ipywidgets

In [None]:
# Create a dropdown for selecting the product by its index or name
product_dropdown = widgets.Dropdown(
    options=[(laptop_data['Brand'].iloc[i] + ' - ' + laptop_data['Model Name'].iloc[i], i) for i in range(len(laptop_data))],
    description='Select Product:',
    continuous_update=False
)

# Create an output area to display the recommended products
output_area = widgets.Output()

In [None]:
# Function to update recommendations when the dropdown value changes
def update_recommendations(product_idx):
    with output_area:
        output_area.clear_output()  # Clear the previous output
        recommended_laptops = recommend_products(laptop_similarity, laptop_data, product_idx)
        # Display the recommended products
        display(recommended_laptops[['Brand', 'Description']])

# Link the dropdown widget to the update function
widgets.interactive(update_recommendations, product_idx=product_dropdown)

# Display the dropdown and the output area
display(product_dropdown, output_area)

Dropdown(description='Select Product:', options=(('Dell - Inspiron', 0), ('MSI - GL65', 1), ('HP - EliteBook',…

Output()