In [None]:
pip install sentence_transformers

Collecting sentence_transformers
  Downloading sentence_transformers-3.1.1-py3-none-any.whl.metadata (10 kB)
Downloading sentence_transformers-3.1.1-py3-none-any.whl (245 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m245.3/245.3 kB[0m [31m1.9 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: sentence_transformers
Successfully installed sentence_transformers-3.1.1


In [None]:
import pandas as pd
import torch
from transformers import BertTokenizer, BertModel
from sentence_transformers import SentenceTransformer
from sklearn.metrics.pairwise import cosine_similarity

In [None]:
# Load embeddings for both sentence transformer and BERT
embeddings_sentence_transformer = pd.read_pickle('/content/sentence_transformer_embeddings.pkl')
embeddings_bert = pd.read_pickle('/content/bert_embeddings.pkl')

embeddings_bert

Unnamed: 0,link,price,actual_price,ratings,color,Brand,Model,Processor,RAM,Storage,combined_features,feature_embedding
0,https://www.daraz.com.np/products/dell-vostro-...,55999.0,55999.0,13.0,Black,Dell,Vostro 3520,i3 12th Gen,16GB RAM,512GB SSD,Dell Vostro 3520 i3 12th Gen 16GB RAM 512GB SSD,"[[-0.8371808, -0.53040063, 0.09187348, -0.0092..."
1,https://www.daraz.com.np/products/apple-macboo...,109900.0,139900.0,76.0,Space Grey,Apple,MacBook Air 13,M1,Unknown,Unknown,Apple MacBook Air 13 M1 Unknown Unknown,"[[-1.1809832, -0.2635993, -0.30852607, 0.28820..."
2,https://www.daraz.com.np/products/dell-vostro-...,68999.0,68999.0,22.0,Black,Dell,Vostro 3520,i5 12th Gen,16GB RAM,512GB SSD,Dell Vostro 3520 i5 12th Gen 16GB RAM 512GB SSD,"[[-0.8225887, -0.5389361, 0.089930564, -0.0001..."
3,https://www.daraz.com.np/products/dell-vostro-...,64000.0,64000.0,8.0,Black,Dell,Vostro 3520,i5 12th Gen,8GB RAM,256GB SSD,Dell Vostro 3520 i5 12th Gen 8GB RAM 256GB SSD,"[[-0.825746, -0.5430305, 0.0957647, 0.02057780..."
4,https://www.daraz.com.np/products/acer-nitro-v...,137999.0,137999.0,2.0,Black,Acer,Nitro V 15,i7 13th Gen,Unknown,512GB SSD,Acer Nitro V 15 i7 13th Gen Unknown 512GB SSD,"[[-0.7531018, -0.50445443, 0.14772333, 0.25335..."
...,...,...,...,...,...,...,...,...,...,...,...,...
528,https://www.daraz.com.np/products/asus-x515-i5...,75990.0,88000.0,0.0,Silver,Asus,X515,i5 11th Gen,8GB RAM,Unknown,Asus X515 i5 11th Gen 8GB RAM Unknown,"[[-0.803862, -0.42832223, -0.2873604, 0.123134..."
529,https://www.daraz.com.np/products/asus-vivoboo...,89990.0,110000.0,0.0,Black,ASUS,VivoBook 16 F1605VA Intel Core,i5 13th Gen,8GB RAM,512GB SSD,ASUS VivoBook 16 F1605VA Intel Core i5 13th G...,"[[-0.55894786, -0.5826864, 0.251406, 0.0097504..."
530,https://www.daraz.com.np/products/lenovo-ideap...,52000.0,52000.0,0.0,Brown,Lenovo,Ideapad 3,AMD Ryzen,4GB RAM,Unknown,Lenovo Ideapad 3 AMD Ryzen 4GB RAM Unknown,"[[-0.9219172, -0.3704869, -0.27306414, 0.20107..."
531,https://www.daraz.com.np/products/lenovo-ideap...,55000.0,55000.0,0.0,Grey,Lenovo,IdeaPad slim,Unknown,Unknown,Unknown,Lenovo IdeaPad slim Unknown Unknown Unknown,"[[-0.7126213, -0.1645561, -0.12597898, 0.35013..."


In [None]:
# Function to evaluate using Mean Reciprocal Rank (MRR) using Sentence Transformer
def evaluate_mrr_sentence_transformer(query, data, relevant_items, top_n=10):
    try:
        # Generate query embedding
        model = SentenceTransformer('bert-base-nli-mean-tokens')
        query_embedding = model.encode([query])

        # Compute similarity
        data['similarity'] = data['feature_embedding'].apply(lambda x: cosine_similarity([x], query_embedding)[0][0])
        ranked_data = data.sort_values(by='similarity', ascending=False).head(top_n)

        # Calculate MRR
        for rank, row in enumerate(ranked_data['combined_features'], start=1):
            if row in relevant_items:
                return 1 / rank

        return 0  # Return 0 if no relevant items are found in top_n
    except Exception as e:
        print(f"Error during MRR evaluation: {e}")
        return None

In [None]:
# Function to evaluate Mean Reciprocal Rank (MRR) using BERT
def evaluate_mrr_bert(query, data, relevant_items, top_n=10):
    try:
        # Load pre-trained BERT model and tokenizer
        tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
        model = BertModel.from_pretrained('bert-base-uncased')

        # Generate BERT embedding for the query
        encoded_input = tokenizer(query, padding=True, truncation=True, return_tensors='pt')
        with torch.no_grad():
            model_output = model(**encoded_input)

        # Extract the CLS token's embedding as the query embedding
        query_embedding = model_output.last_hidden_state[:, 0, :].squeeze().numpy()  # 2D array (1, 768)

        # Ensure the product embeddings are in the correct shape (list of 1D vectors)
        product_embeddings = [embedding.squeeze() for embedding in data['feature_embedding'].tolist()]

        # Calculate cosine similarity between the query embedding and product embeddings
        similarities = cosine_similarity([query_embedding], product_embeddings)

        # Rank the data based on similarity and take top_n results
        ranked_data = data.sort_values(by='similarity', ascending=False).head(top_n)

        # Calculate Mean Reciprocal Rank (MRR)
        for rank, row in enumerate(ranked_data['combined_features'], start=1):
            if row in relevant_items:
                return 1 / rank

        return 0  # Return 0 if no relevant items are found in the top_n

    except Exception as e:
        print(f"Error during MRR evaluation: {e}")
        return None

In [None]:
# Function to evaluate Precision at K (P@K) using Sentence Transformer
def evaluate_precision_at_k_sentence_transformer(query, data, relevant_items, k=10):
    try:
        # Generate query embedding
        model = SentenceTransformer('bert-base-nli-mean-tokens')
        query_embedding = model.encode([query])

        # Compute similarity
        data['similarity'] = data['feature_embedding'].apply(lambda x: cosine_similarity([x], query_embedding)[0][0])
        ranked_data = data.sort_values(by='similarity', ascending=False).head(k)

        # Calculate Precision at K
        relevant_found = 0
        for row in ranked_data['combined_features']:
            if row in relevant_items:
                relevant_found += 1

        return relevant_found / k
    except Exception as e:
        print(f"Error during Precision at K evaluation: {e}")
        return None

In [None]:
# Function to evaluate Precision at K (P@K) using BERT
def evaluate_precision_at_k_bert(query, data, relevant_items, k=10):
    try:
        # Load pre-trained BERT model and tokenizer
        tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
        model = BertModel.from_pretrained('bert-base-uncased')

        # Generate BERT embedding for the query
        encoded_input = tokenizer(query, padding=True, truncation=True, return_tensors='pt')
        with torch.no_grad():
            model_output = model(**encoded_input)

        # Extract the CLS token's embedding as the query embedding
        query_embedding = model_output.last_hidden_state[:, 0, :].squeeze().numpy()  # 2D array (1, 768)

        # Ensure the product embeddings are in the correct shape (list of 1D vectors)
        product_embeddings = [embedding.squeeze() for embedding in data['feature_embedding'].tolist()]

        # Calculate cosine similarity between the query embedding and product embeddings
        similarities = cosine_similarity([query_embedding], product_embeddings)
        # Rank the data based on similarity and take top k results
        ranked_data = data.sort_values(by='similarity', ascending=False).head(k)

        # Calculate Precision at K
        relevant_found = 0
        for row in ranked_data['combined_features']:
            if row in relevant_items:
                relevant_found += 1

        # Return Precision at K
        return relevant_found / k

    except Exception as e:
        print(f"Error during Precision at K evaluation: {e}")
        return None

In [None]:
if embeddings is not None:
    query = "Dell with 16GB RAM"
    relevant_items = ["Dell Vostro 3520  i7 12th Gen 16GB RAM 512GB SSD", "Dell Vostro 3520  i5 12th Gen 16GB RAM 512GB SSD", "Dell Vostro 3520  i3 12th Gen 16GB RAM 512GB SSD"]

    # Sentence Transformer
    mrr_score_sentence_transformer = evaluate_mrr_sentence_transformer(query, embeddings, relevant_items)
    precision_at_10_sentence_transformer = evaluate_precision_at_k_sentence_transformer(query, embeddings, relevant_items, k=10)

    print("Sentence Transformer Results:")
    print(f"Mean Reciprocal Rank (MRR): {mrr_score_sentence_transformer}")
    print(f"Precision at 10 (P@10): {precision_at_10_sentence_transformer}")

    # BERT
    mrr_score_bert = evaluate_mrr_bert(query, embeddings, relevant_items)
    precision_at_10_bert = evaluate_precision_at_k_bert(query, embeddings, relevant_items, k=10)

    print("\nBERT Results:")
    print(f"Mean Reciprocal Rank (MRR): {mrr_score_bert}")
    print(f"Precision at 10 (P@10): {precision_at_10_bert}")



Sentence Transformer Results:
Mean Reciprocal Rank (MRR): 1.0
Precision at 10 (P@10): 0.8





BERT Results:
Mean Reciprocal Rank (MRR): 1.0
Precision at 10 (P@10): 0.8
