In [None]:
import zipfile
import pandas as pd

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
zip_path = '/content/drive/MyDrive/dataset.zip'
extract_path = '/content/dataset'

with zipfile.ZipFile(zip_path, 'r') as zip_ref:
    zip_ref.extractall(extract_path)

In [None]:
data = pd.read_csv('/content/dataset/data.csv')
data.head(2)

Unnamed: 0,id,slug,title,imgs,brand,category,vendor,used,address,availability,...,discounted_price,specifications,description,delivery_fee,delivery_details,warranty,warranty_type,average_rating,num_ratings,reviews
0,0,https://www.mega.pk/mobiles_products/23522/Not...,Nothing Phone 1 8GB RAM 256GB Storage Non PTA ...,['https://www.mega.pk/items_images/Nothing+Pho...,,Mobile,MEGA.PK,0,"Office 11, 12, 14 Basement Ahmed Center, I-8 M...",,...,,"{'RAM': '8GB', 'Memory quantity': '', 'Interna...",,,,,,,,[]
1,1,https://www.mega.pk/mobiles_products/23458/Opp...,Oppo F21 Pro 8GB Ram 128GB Storage 5G PTA Appr...,['https://www.mega.pk/items_images/Oppo+F21+Pr...,OPPO,Mobile,MEGA.PK,0,"Office 11, 12, 14 Basement Ahmed Center, I-8 M...",,...,,"{'RAM': '8gb', 'Memory quantity': '', 'Interna...",,,,,,,,[]


In [None]:
def clean_data_text(row):
  title = row['title'] if pd.notna(row['title']) else ''
  brand = row['brand'] if pd.notna(row['brand']) else ''
  category = row['category'] if pd.notna(row['category']) else ''
  sapce = ' '.join( [f"{k}:{v}"for k,v in eval(row['specifications']).items() ]) if pd.notna(row['specifications']) else ""
  return f"{title} {brand} {category} {sapce}".lower()

data['clean_text'] = data.apply(clean_data_text, axis=1)
data = data.dropna(subset=['clean_text'])


In [None]:
data['clean_text'].head(10)

Unnamed: 0,clean_text
0,nothing phone 1 8gb ram 256gb storage non pta ...
1,oppo f21 pro 8gb ram 128gb storage 5g pta appr...
2,"tecno spark 10 tecno mobile ram:4gb,8gb memor..."
3,"vivo v27 5g vivo mobile ram:8gb,12gb memory q..."
4,apple iphone 15 pro max apple mobile ram:8gb ...
5,"realme gt3 realme mobile ram:8gb,12gb,16gb me..."
6,sparx s9 2gb ram 32gb storage pta approved sp...
7,sparx s6 2gb ram 32gb storage sparx mobile ra...
8,tecno pova neo 2 4gb ram 64gb storage pta appr...
9,vivo y73 8gb ram 128gb storage pta approved v...


In [None]:
data.shape, data['clean_text'].shape

((1666, 23), (1666,))

#Create Embedding

In [None]:
from sentence_transformers import SentenceTransformer

#load a pre-trained BERT MODEL
model = SentenceTransformer('all-MiniLM-L6-v2')

#Generate embedding for product descriptions
data['embeddings'] = data['clean_text'].apply(lambda x: model.encode(x))

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


#Load Embedding

In [None]:
data['embeddings'][0]

array([-4.47997153e-02,  2.20823009e-02,  6.36246502e-02, -7.32250586e-02,
       -3.74107133e-03, -1.27904080e-02,  3.20832096e-02,  5.76821305e-02,
       -4.01968695e-02,  5.48938215e-02,  4.45760861e-02, -5.98917641e-02,
        1.95591878e-02,  3.45871858e-02, -3.43603268e-02, -1.28925266e-02,
        7.80460984e-02, -1.18097447e-01,  1.69884805e-02,  3.80957089e-02,
        2.40320191e-02, -4.15192507e-02,  3.56650762e-02, -9.26706418e-02,
       -1.75771527e-02,  1.14063667e-02, -2.20400337e-02, -1.94787327e-02,
       -6.40584826e-02, -9.01186094e-02, -1.11770798e-02,  9.04287472e-02,
        3.24672498e-02,  5.90824224e-02,  3.44087034e-02, -4.80567776e-02,
        7.38988295e-02, -4.37487289e-02, -3.41263935e-02, -5.80016971e-02,
       -3.87122557e-02, -7.66053237e-03,  2.72599962e-02,  6.11301400e-02,
        1.20617121e-01, -2.00280435e-02,  4.26558293e-02, -1.73091777e-02,
        9.80669260e-03, -5.22637442e-02,  3.29058175e-03,  4.05739173e-02,
       -2.65373234e-02,  

In [None]:
#save the embedding to file
pd.to_pickle(data, 'product_embedding.pkl')

#load the embedding
data = pd.read_pickle('product_embedding.pkl')

In [None]:
from sklearn.metrics.pairwise import cosine_similarity
import numpy as np

def recommend_products(product_name, num_recommendations=5):
  query_embedding = model.encode(product_name)
  data['similarity'] = data['embeddings'].apply(lambda x: cosine_similarity([query_embedding], [x]).flatten()[0])
  recommendations = data.sort_values(by='similarity', ascending=False).head(num_recommendations)
  return recommendations[['title', 'brand', 'category', 'similarity', 'imgs']]



In [None]:
product_name = "samsung phone"
recommendations = recommend_products(product_name)
recommendations

Unnamed: 0,title,brand,category,similarity,imgs
161,SAMSUNG GALAXY A32 6GB RAM 128GB STORAGE LTE P...,Samsung,Mobile,0.615731,['https://www.mega.pk/items_images/SAMSUNG+GAL...
66,Samsung Galaxy Z Fold 4 12GB RAM 512GB Storage...,Samsung,Mobile,0.611576,['https://www.mega.pk/items_images/Samsung+Gal...
130,Samsung Galaxy A53 8GB Ram 128GB Storage 5G PT...,Samsung,Mobile,0.603369,['https://www.mega.pk/items_images/Samsung+Gal...
1571,Samsung Galaxy A32,,Mobile,0.594321,['https://images.priceoye.pk/samsung-galaxy-a3...
131,Samsung Galaxy A23 6GB Ram 128GB Storage LTE P...,Samsung,Mobile,0.588422,['https://www.mega.pk/items_images/Samsung+Gal...


In [None]:
product_name = "redmi"
recommendations = recommend_products(product_name)
recommendations

Unnamed: 0,title,brand,category,similarity,imgs
1349,Xiaomi Redmi 10A,,Mobile,0.448195,['https://images.priceoye.pk/xiaomi-redmi-10a-...
1352,Xiaomi Redmi A1+,,Mobile,0.441663,['https://images.priceoye.pk/xiaomi-redmi-a1-p...
1376,Xiaomi Redmi 10C,,Mobile,0.427061,['https://images.priceoye.pk/redmi-10c-pakista...
1362,Xiaomi Redmi Note 11 Pro,,Mobile,0.409171,['https://images.priceoye.pk/xiaomi-redmi-note...
167,Redmi Note 12 6GB RAM 128GB Storage PTA Approved,Xiaomi,Mobile,0.404531,['https://www.mega.pk/items_images/Redmi+Note+...
