# Part 1 - Text Processing
The following cells contain the necessary funtions to clean and prepare the data for the following parts.

In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
import re
import csv
import json
import math
import time
import string
import collections
import numpy as np
from numpy import linalg as la
from collections import defaultdict
from array import array
import nltk
from nltk.corpus import stopwords, wordnet
from nltk.stem import PorterStemmer, WordNetLemmatizer
import pandas as pd
import matplotlib.pyplot as plt
from collections import Counter
from wordcloud import WordCloud
import seaborn as sns

nltk.download('wordnet')
nltk.download('omw-1.4')
nltk.download('stopwords')
nltk.download('punkt', quiet=True)
nltk.download('averaged_perceptron_tagger', quiet=True)
nltk.download('maxent_ne_chunker', quiet=True)
nltk.download('words', quiet=True)

[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data] Downloading package omw-1.4 to /root/nltk_data...
[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Unzipping corpora/stopwords.zip.


True

In [3]:
# Paths of the documents used in this notebook.
# Change as necessary
docs_path = '/content/drive/MyDrive/fashion_products_dataset.json'
validation_path = '/content/drive/MyDrive/validation_labels.csv'
our_validation_path = '/content/drive/MyDrive/our_queries_validation.csv'


with open(docs_path, 'r') as fp:
    lines = json.load(fp)

In [4]:
def build_terms(line):
    """
    Preprocess the text (title + description):
    - Lowercase
    - Remove punctuation and special symbols (e.g., ₹, %, ™)
    - Normalize spaces
    - Tokenization
    - Remove stop words
    - Lemmatization
    - Stemming
    """
    stemmer = PorterStemmer()
    lemmatizer = WordNetLemmatizer()
    stop_words = set(stopwords.words("english"))

    # Lowercase
    line = line.lower()

    # Remove punctuation and uncommon symbols
    line = re.sub(r"[^\w\s]", " ", line)

    # Normalize spaces
    line = re.sub(r"\s+", " ", line).strip()

    # Tokenize
    tokens = line.split()

    # Remove stop words
    tokens = [w for w in tokens if w not in stop_words]

    # Lemmatization (reduces words to their base form)
    tokens = [lemmatizer.lemmatize(w) for w in tokens]

    # Stemming (we don't need, explained in the report)
    tokens = [stemmer.stem(w) for w in tokens]

    return tokens


In [5]:
def create_index_with_tfidf(lines):
    # Initialize data structures
    index = defaultdict(list)
    products_info = {}
    tf = defaultdict(dict)
    df = defaultdict(int)
    idf = defaultdict(float)

    num_documents = len(lines)

    for doc in lines:
        pid = doc.get("pid", "")
        title = doc.get("title", "")
        description = doc.get("description", "")
        brand = doc.get("brand", "")
        category = doc.get("category", "")
        sub_category = doc.get("sub_category", "")
        product_details = doc.get("product_details", "")
        seller = doc.get("seller", "")
        out_of_stock = doc.get("out_of_stock", "")
        selling_price_str = doc.get("selling_price", "").replace(',', '')
        selling_price = float(selling_price_str) if selling_price_str else 0.0
        discount_str = doc.get("discount", "").strip('% off')
        discount = float(discount_str) if discount_str else 0.0
        actual_price_str = doc.get("actual_price", "").replace(',', '')
        actual_price = float(actual_price_str) if actual_price_str else 0.0
        average_rating_str = doc.get("average_rating", "")
        average_rating = float(average_rating_str) if average_rating_str else None
        url = doc.get("url", "")

        text = f"{title} {description}"
        terms = build_terms(text)

        products_info[pid] = {
            "pid": pid,
            "title": title,
            "description": description,
            "brand": brand,
            "category": category,
            "sub_category": sub_category,
            "product_details": product_details,
            "seller": seller,
            "out_of_stock": out_of_stock,
            "selling_price": selling_price,
            "discount": discount,
            "actual_price": actual_price,
            "average_rating": average_rating,
            "url": url
        }

        # Compute term frequency and positional index for the current document
        term_freq = defaultdict(int)
        current_page_index = {}

        for position, term in enumerate(terms):
            term_freq[term] += 1
            try:
                current_page_index[term][1].append(position)
            except:
                current_page_index[term] = [pid, array('I', [position])]

        # Update inverted index and document frequency for each term
        for term, posting in current_page_index.items():
            df[term] += 1
            index[term].append(posting)

        # Compute TF for each term in the doc
        for term, freq in term_freq.items():
            if freq > 0:
                tfidf_weight = (1 + math.log(freq))  # IDF will be applied later
                tf[term][pid] = round(tfidf_weight, 4)
            else:
                tf[term][pid] = 0.0

    # Compute IDF for each term
    for term in df:
        idf[term] = round(math.log(num_documents / df[term]), 4)

    return index, products_info, tf, df, idf


In [6]:
start_time = time.time()
index, products_info, tf, df, idf = create_index_with_tfidf(lines)
print("Total time to create the index: {} seconds".format(np.round(time.time() - start_time, 2)))

Total time to create the index: 27.64 seconds


In [7]:
def rank_products(query_terms, docs, index, idf, tf, products_info):
    # Initialize document vectors and query vector
    doc_vectors = defaultdict(lambda: [0] * len(query_terms))
    query_vector = [0] * len(query_terms)

    # Count term frequencies in the query
    query_terms_count = collections.Counter(query_terms)
    query_norm = la.norm(list(query_terms_count.values()))

    # Build the weighted query vector using TF-IDF
    for termIndex, term in enumerate(query_terms):
        if term not in index:
            continue
        query_vector[termIndex] = query_terms_count[term] / query_norm * idf[term]

        # Assign TF-IDF weights to documents containing the term
        for (pid, _) in index[term]:
            if pid in docs and pid in tf[term]:
                doc_vectors[pid][termIndex] = tf[term][pid] * idf[term]

    # Compute cosine similarity between query and each document
    doc_scores = []
    for pid, doc_vec in doc_vectors.items():
        doc_norm = la.norm(doc_vec)
        if doc_norm == 0:
            score = 0.0
        else:
            score = np.dot(doc_vec, query_vector) / doc_norm
        doc_scores.append([score, pid])

    # Sort documents by descending similarity score
    doc_scores.sort(reverse=True)
    result_docs = [x[1] for x in doc_scores]

    return result_docs, doc_scores


In [8]:

def search_products_tf_idf(query, index, idf, tf, products_info):
    # Tokenize the query into normalized terms
    query_terms = build_terms(query)
    if not query_terms:
        return [], []

    # Start with documents containing the first query term
    first_term = query_terms[0]
    if first_term not in index:
        return [], []
    docs = set(posting[0] for posting in index[first_term])

    # Intersect with documents containing the remaining query terms
    for term in query_terms[1:]:
        if term not in index:
            return [], []
        term_docs = set(posting[0] for posting in index[term])
        docs &= term_docs
        if not docs:
            return [], []

    # Rank the matching documents using TF-IDF and cosine similarity
    ranked_docs, doc_scores = rank_products(query_terms, list(docs), index, idf, tf, products_info)
    return ranked_docs, doc_scores


In [9]:
query_1 = "women full sleeve sweatshirt cotton"
ranked_docs_q1, scores_q1 = search_products_tf_idf(query_1, index, idf, tf, products_info)

query_2 = "men slim jeans blue"
ranked_docs_q2, scores_q2 = search_products_tf_idf(query_2, index, idf, tf, products_info)

k = 5
print(f"\nTop {k} results for: '{query_1}'")
for score, pid in scores_q1[:k]:
    info = products_info[pid]
    print(f" - {pid}: {info['title']} → score = {round(score, 4)}")

print(f"\nTop {k} results for: '{query_2}'")
for score, pid in scores_q2[:k]:
    info = products_info[pid]
    print(f" - {pid}: {info['title']} → score = {round(score, 4)}")

validation_data = []
with open(validation_path, newline='', encoding='utf-8') as csvfile:
    lector = csv.reader(csvfile)
    for fila in lector:
        validation_data.append(fila)



Top 5 results for: 'women full sleeve sweatshirt cotton'
 - SWSFZH6TFEHRDHWK: Full Sleeve Printed Women Sweatshirt → score = 1.9087
 - SWSFWHEPSKFVHCUZ: Full Sleeve Graphic Print Women Sweatshirt → score = 1.9087
 - SWSFV5JN5TKJWPZ2: Full Sleeve Solid Women Sweatshirt → score = 1.9087
 - SWSFSUDDT7YJ6ZSG: Full Sleeve Printed Women Sweatshirt → score = 1.9087
 - SWSFSUDDKGZJGSQ2: Full Sleeve Printed Women Sweatshirt → score = 1.9087

Top 5 results for: 'men slim jeans blue'
 - TSHFWWUJZ7SFGVSX: Printed, Striped Men Round Neck Dark Blue, White, Orange T-Shirt → score = 1.8637
 - TSHFWWUJYRVU3B5J: Printed, Striped Men Round Neck Dark Blue, White, Maroon T-Shirt → score = 1.8637
 - TSHFWWUJJQRTJCBN: Printed, Striped Men Round Neck Dark Blue, White, Grey T-Shirt → score = 1.8637
 - JEAFZRDTDFKGQKJH: Slim Men Blue Jeans → score = 1.8637
 - JEAFZ5MTE7GYFKBG: Slim Men Blue Jeans → score = 1.8637


# Part 1 - Extra

In [23]:
def rank_products_bm25(query_terms, index, idf, tf, products_info, docs=None, k1=1.5, b=0.75):
    doc_scores = defaultdict(float)
    doc_lengths = {}
    total_length = 0

    if docs is None:
        docs = products_info.keys()

    # Compute document lengths
    for pid in docs:
        text = f"{products_info[pid]['title']} {products_info[pid]['description']}"
        doc_length = len(build_terms(text))
        doc_lengths[pid] = doc_length
        total_length += doc_length

    num_docs = len(docs)
    avg_doc_length = total_length / num_docs if num_docs > 0 else 0

    # Compute BM25 score for each document
    for term in query_terms:
        if term not in index:
            continue
        for pid, _ in index[term]:
            if pid not in docs:
                continue
            if pid in tf[term]:
                f = tf[term][pid]
                idf_term = idf[term]
                numerator = f * (k1 + 1)
                denominator = f + k1 * (1 - b + b * (doc_lengths[pid] / avg_doc_length))
                score = idf_term * (numerator / denominator)
                doc_scores[pid] += score

    # Sort documents by score
    ranked_docs = sorted(doc_scores.items(), key=lambda x: x[1], reverse=True)
    result_docs = [pid for pid, score in ranked_docs]

    return result_docs, ranked_docs


In [24]:
#SAME FUNCTION BUT WITH THE OPITON TO CHOSE THE RANKING METHOD
def search_products(query, index, idf, tf, products_info, ranking_method="bm25"):
    """
    Search for products using either TF-IDF or BM25 ranking.
    ranking_method can be "tfidf" or "bm25".
    """
    # Tokenize the query into normalized terms
    query_terms = build_terms(query)
    if not query_terms:
        return [], []

    # Start with documents containing the first query term
    first_term = query_terms[0]
    if first_term not in index:
        return [], []
    docs = set(posting[0] for posting in index[first_term])

    # Intersect with documents containing the remaining query terms (todo o nada)
    for term in query_terms[1:]:
        if term not in index:
            return [], []
        term_docs = set(posting[0] for posting in index[term])
        docs &= term_docs
        if not docs:
            return [], []

    # Rank the matching documents using the selected ranking method
    if ranking_method.lower() == "bm25":
        ranked_docs, doc_scores = rank_products_bm25(query_terms, index, idf, tf, products_info, docs=list(docs))
    else:
        ranked_docs, doc_scores = rank_products(query_terms, list(docs), index, idf, tf, products_info)

    return ranked_docs, doc_scores



# Part 2 - Evaluation


Precision@k

In [12]:
def precision_at_k(doc_score, y_score, k=10):
    """
    Parameters
    ----------
    doc_score: Ground truth (true relevance labels).
    y_score: Predicted scores.
    k : number of doc to consider.

    Returns
    -------
    precision @k : float

    """
    order = np.argsort(y_score)[::-1]  # Bonus point: How to improve the effitiency of this part?
    #doc_score = np.take(doc_score, order[:k])
    doc_score = doc_score[order[:k]]
    relevant = sum(doc_score == 1)
    return float(relevant) / k

Recall@k

In [13]:
def recall_at_k(doc_score, y_score, k=10):
    """
    Parameters
    ----------
    doc_score : Ground truth (true relevance labels, binary: 1=relevant, 0=non-relevant)
    y_score   : Predicted scores
    k         : Number of top documents to consider

    Returns
    -------
    recall@k : float
    """
    order = np.argsort(y_score)[::-1]  # Sort documents by predicted score (descending)
    doc_score_k = doc_score[order[:k]]  # Take top-k documents
    relevant_retrieved = sum(doc_score_k == 1)
    total_relevant = sum(doc_score == 1)

    if total_relevant == 0:
        return 0.0  # Avoid division by zero

    return float(relevant_retrieved) / total_relevant


Average Precision@k

In [14]:
def avg_precision_at_k(doc_score, y_score, k=10):
    """
    Parameters
    ----------
    doc_score: Ground truth (true relevance labels).
    y_score: Predicted scores.
    k : number of doc to consider.

    Returns
    -------
    average precision @k : float
    """
    order = np.argsort(y_score)[::-1]  # get the list of indexes of the predicted score sorted in descending order.

    prec_at_i = 0
    prec_at_i_list = []
    number_of_relevant = 0
    number_to_iterate = min(k, len(order))

    for i in range(number_to_iterate):
        if doc_score[order[i]] == 1:
            number_of_relevant += 1
            prec_at_i = number_of_relevant / (i + 1)
            prec_at_i_list.append(prec_at_i)

    if number_of_relevant == 0:
        return 0
    else:
      return np.sum(prec_at_i_list) / number_of_relevant

F1@k

In [15]:
def f1_at_k(doc_score, y_score, k=10):
    """
    Parameters
    ----------
    doc_score : Ground truth (true relevance labels, binary: 1=relevant, 0=non-relevant)
    y_score   : Predicted scores
    k         : Number of top documents to consider

    Returns
    -------
    F1@k : float
    """
    order = np.argsort(y_score)[::-1]  # sort by predicted score (descending)
    doc_score_k = doc_score[order[:k]]

    relevant_retrieved = sum(doc_score_k == 1)
    total_relevant = sum(doc_score == 1)

    if total_relevant == 0:
        return 0.0  # avoid division by zero

    precision = relevant_retrieved / k
    recall = relevant_retrieved / total_relevant

    if (precision + recall) == 0:
        return 0.0

    f1 = 2 * (precision * recall) / (precision + recall)
    return f1


MAP

In [16]:
def map_at_k(search_res, k=10):
    """
    Parameters
    ----------
    search_res: search results dataset containing:
        query_id: query id.
        doc_id: document id.
        predicted_relevance: relevance predicted through LightGBM.
        doc_score: actual score of the document for the query (ground truth).

    Returns
    -------
    mean average precision @ k : float
    """
    avp = []
    for q in search_res["query_id"].unique():  # loop over all query id
        curr_data = search_res[search_res["query_id"] == q]  # select data for current query
        avp.append(avg_precision_at_k(np.array(curr_data["is_relevant"]),
                   np.array(curr_data["predicted_relevance"]), k))  #append average precision for current query
    return np.sum(avp) / len(avp), avp  # return mean average precision

MRR

In [17]:
def rr_at_k(doc_score, y_score, k=10):
    """
    Parameters
    ----------
    doc_score: Ground truth (true relevance labels).
    y_score: Predicted scores.
    k : number of doc to consider.

    Returns
    -------
    Reciprocal Rank for qurrent query
    """
    order = np.argsort(y_score)[::-1]  # get the list of indexes of the predicted score sorted in descending order.
    doc_score = np.take(doc_score, order[:k])  # sort the actual relevance label of the documents based on predicted score(hint: np.take) and take first k.
    for i, rel in enumerate(doc_score, start=1):
      if rel == 1:
        return 1.0 / i
    return 0.0

NDCG

In [18]:
def dcg_at_k(doc_score, y_score, k=10):
    order = np.argsort(y_score)[::-1]  # get the list of indexes of the predicted score sorted in descending order.
    doc_score = np.take(doc_score, order[:k])  # sort the actual relevance label of the documents based on predicted score(hint: np.take) and take first k.
    gain = 2 ** doc_score - 1  # Compute gain (use formula 7 above)
    discounts = np.log2(np.arange(len(doc_score)) + 2)  # Compute denominator
    return np.sum(gain / discounts)  #return dcg@k


def ndcg_at_k(doc_score, y_score, k=10):
    dcg_max = dcg_at_k(doc_score, doc_score, k)
    if not dcg_max:
        return 0
    return np.round(dcg_at_k(doc_score, y_score, k) / dcg_max, 4)

In [19]:
validation_df = pd.read_csv(validation_path)

query1_id = 1
query2_id = 2
results = []

for qid, ranked_docs, scores in [
    (query1_id, ranked_docs_q1, scores_q1),
    (query2_id, ranked_docs_q2, scores_q2)
]:
    # Filter validation data for the current query
    q_validation = validation_df[validation_df["query_id"] == qid]

    # Align ranked product IDs with ground truth labels
    y_true = np.array([
        int(q_validation[q_validation["pid"] == pid]["labels"].values[0])
        if pid in q_validation["pid"].values else 0
        for pid in ranked_docs
    ])
    y_score = np.array([s[0] for s in scores])

    # Compute evaluation metrics for the ranked results
    p = precision_at_k(y_true, y_score, k=10)
    r = recall_at_k(y_true, y_score, k=10)
    ap = avg_precision_at_k(y_true, y_score, k=10)
    f1 = f1_at_k(y_true, y_score, k=10)
    rr = rr_at_k(y_true, y_score, k=10)
    ndcg = ndcg_at_k(y_true, y_score, k=10)

    results.append({
        "Query": f"Query {qid}",
        "Precision@10": round(p, 3),
        "Recall@10": round(r, 3),
        "AvgPrecision@10": round(ap, 3),
        "F1@10": round(f1, 3),
        "RR@10": round(rr, 3),
        "NDCG@10": round(ndcg, 3)
    })


results_df = pd.DataFrame(results)
display(results_df)


Unnamed: 0,Query,Precision@10,Recall@10,AvgPrecision@10,F1@10,RR@10,NDCG@10
0,Query 1,0.1,0.083,0.333,0.091,0.333,0.11
1,Query 2,0.0,0.0,0.0,0.0,0.0,0.0


In [20]:
# Queries
queries = {
    1: "men cotton blend grey t shirt",
    2: "light blue jeans for men slim fit",
    3: "women casual wear cotton shirt",
    4: "printed navy top for women",
    5: "regular fit denim jeans"
}

ranked_docs = []
scores = []

# Run TF-IDF search for each query
for i in range(1, len(queries) + 1):
    ranked_doc, score = search_products_tf_idf(queries[i], index, idf, tf, products_info)
    ranked_docs.append(ranked_doc)
    scores.append(score)

k = 25

# Iterate through all queries and display top-k results
for i in range(1, len(queries) + 1):
    query = queries[i]
    scores_i = scores[i - 1]
    print(f"\nTop {k} results for: '{query}'")
    for score, pid in scores_i[:k]:
        info = products_info[pid]
        print(f" - {pid}: {info['title']} → score = {round(score, 4)}")



Top 25 results for: 'men cotton blend grey t shirt'
 - TKPFYGUNNFVNFQF7: Solid Men Grey Track Pants → score = 1.8456
 - TSHEXZ8KYFDUHVYY: Solid Men Round Neck Multicolor T-Shirt  (Pack of 2) → score = 1.8387
 - TSHEXZ8KHNVHEEK6: Solid Men V-neck Multicolor T-Shirt  (Pack of 2) → score = 1.8387
 - TSHEXZ8KBEKYQYTZ: Solid Men V-Neck Multicolor T-Shirt  (Pack of 2) → score = 1.8387
 - TSHER6FBWFYYR5GG: Solid Men Round Neck Multicolor T-Shirt  (Pack of 3) → score = 1.8387
 - TSHER6F4RWS54H9F: Solid Men Round Neck Multicolor T-Shirt  (Pack of 2) → score = 1.8387
 - TSHENYJ28ZCH4H4V: Solid Men Round Neck Multicolor T-Shirt  (Pack of 5) → score = 1.8387
 - TSHENYGRN9XHQUVF: Solid Men Round Neck Multicolor T-Shirt  (Pack of 3) → score = 1.8387
 - TSHENXFZHDWREHFT: Solid Men V-neck Multicolor T-Shirt  (Pack of 3) → score = 1.8387
 - TSHENXFW8CZCTVU5: Solid Men V-neck Multicolor T-Shirt  (Pack of 3) → score = 1.8387
 - TSHFZ3JDVAYZKCFC: Solid Men Round Neck Grey T-Shirt → score = 1.8318
 - TSHF

In [21]:
validation_df = pd.read_csv(our_validation_path)

strategies = ["Exact coincidence", "All - 1", ">= 70%", "product types"]
query_ids = validation_df["query_id"].unique()

results = []
k = 10

# Evaluation loop over each query
for qid in query_ids:
    q_data = validation_df[validation_df["query_id"] == qid]
    doc_score = np.ones(len(q_data))  # assume all documents are relevant

    for strategy in strategies:
        # Add small random noise to binary predictions to break ties
        y_binary = q_data[strategy].values
        y_score = y_binary + np.random.rand(len(y_binary)) * 0.01

        # Compute evaluation metrics for the current strategy
        results.append({
            "Query": f"Query {qid}",
            "Strategy": strategy,
            "Precision@10": round(precision_at_k(doc_score, y_score, k), 3),
            "Recall@10": round(recall_at_k(doc_score, y_score, k), 3),
            "F1@10": round(f1_at_k(doc_score, y_score, k), 3),
            "AvgPrecision@10": round(avg_precision_at_k(doc_score, y_score, k), 3),
            "RR@10": round(rr_at_k(doc_score, y_score, k), 3),
            "DCG@10": round(dcg_at_k(doc_score, y_score, k), 3),
            "nDCG@10": round(ndcg_at_k(doc_score, y_score, k), 3)
        })

results_df = pd.DataFrame(results)
display(results_df)

Unnamed: 0,Query,Strategy,Precision@10,Recall@10,F1@10,AvgPrecision@10,RR@10,DCG@10,nDCG@10
0,Query 1,Exact coincidence,1.0,0.556,0.714,1.0,1.0,4.544,1.0
1,Query 1,All - 1,1.0,0.556,0.714,1.0,1.0,4.544,1.0
2,Query 1,>= 70%,1.0,0.556,0.714,1.0,1.0,4.544,1.0
3,Query 1,product types,1.0,0.556,0.714,1.0,1.0,4.544,1.0
4,Query 2,Exact coincidence,1.0,1.0,1.0,1.0,1.0,4.544,1.0
5,Query 2,All - 1,1.0,1.0,1.0,1.0,1.0,4.544,1.0
6,Query 2,>= 70%,1.0,1.0,1.0,1.0,1.0,4.544,1.0
7,Query 2,product types,1.0,1.0,1.0,1.0,1.0,4.544,1.0
8,Query 3,Exact coincidence,1.0,0.4,0.571,1.0,1.0,4.544,1.0
9,Query 3,All - 1,1.0,0.4,0.571,1.0,1.0,4.544,1.0


# Part 2 - Extra


In [28]:
ranked_docs2 = []
scores2 = []

# Run BM25 search for each query
for i in range(1, len(queries) + 1):
    ranked_doc2, score2 = search_products(queries[i], index, idf, tf, products_info, ranking_method="bm25")
    ranked_docs2.append(ranked_doc2)
    scores2.append(score2)

k = 25

# Iterate through all queries and display top-k results
for i in range(1, len(queries) + 1):
    query = queries[i]
    scores_i = scores2[i - 1]
    print(f"\nTop {k} results for: '{query}'")
    for pid,score in scores_i[:k]:
        info = products_info[pid]
        print(f" - {pid}: {info['title']} → score = {round(score, 4)}")


Top 25 results for: 'men cotton blend grey t shirt'
 - TROFCCRY3FJAHHQB: Regular Fit Men Grey Satin Blend Trousers → score = 11.4803
 - TROFCCRYVP6EVGQT: Regular Fit Men Grey Satin Blend Trousers → score = 11.1442
 - TSHFZ3JDHZSGCFHT: Solid Men Round Neck Grey T-Shirt → score = 11.0235
 - TSHFZ3JDVAYZKCFC: Solid Men Round Neck Grey T-Shirt → score = 11.0235
 - TSHEXZ8KHNVHEEK6: Solid Men V-neck Multicolor T-Shirt  (Pack of 2) → score = 10.9489
 - TSHEXZ8KYFDUHVYY: Solid Men Round Neck Multicolor T-Shirt  (Pack of 2) → score = 10.9489
 - TSHEXZ8KBEKYQYTZ: Solid Men V-Neck Multicolor T-Shirt  (Pack of 2) → score = 10.9489
 - TSHENXFZHDWREHFT: Solid Men V-neck Multicolor T-Shirt  (Pack of 3) → score = 10.8538
 - TSHER6FBWFYYR5GG: Solid Men Round Neck Multicolor T-Shirt  (Pack of 3) → score = 10.8538
 - TSHER6F4RWS54H9F: Solid Men Round Neck Multicolor T-Shirt  (Pack of 2) → score = 10.8538
 - TSHENYGRN9XHQUVF: Solid Men Round Neck Multicolor T-Shirt  (Pack of 3) → score = 10.7603
 - TSHE

In [29]:
validation_df = pd.read_csv(our_validation_path)

strategies = ["Exact coincidence", "All - 1", ">= 70%", "product types"]
query_ids = validation_df["query_id"].unique()

results = []
k = 10

# Evaluation loop over each query
for qid in query_ids:
    q_data = validation_df[validation_df["query_id"] == qid]
    doc_score = np.ones(len(q_data))  # assume all documents are relevant

    for strategy in strategies:
        # Add small random noise to binary predictions to break ties
        y_binary = q_data[strategy].values
        y_score = y_binary + np.random.rand(len(y_binary)) * 0.01

        # Compute evaluation metrics for the current strategy
        results.append({
            "Query": f"Query {qid}",
            "Strategy": strategy,
            "Precision@10": round(precision_at_k(doc_score, y_score, k), 3),
            "Recall@10": round(recall_at_k(doc_score, y_score, k), 3),
            "F1@10": round(f1_at_k(doc_score, y_score, k), 3),
            "AvgPrecision@10": round(avg_precision_at_k(doc_score, y_score, k), 3),
            "RR@10": round(rr_at_k(doc_score, y_score, k), 3),
            "DCG@10": round(dcg_at_k(doc_score, y_score, k), 3),
            "nDCG@10": round(ndcg_at_k(doc_score, y_score, k), 3)
        })

results_df = pd.DataFrame(results)
display(results_df)

Unnamed: 0,Query,Strategy,Precision@10,Recall@10,F1@10,AvgPrecision@10,RR@10,DCG@10,nDCG@10
0,Query 1,Exact coincidence,1.0,0.556,0.714,1.0,1.0,4.544,1.0
1,Query 1,All - 1,1.0,0.556,0.714,1.0,1.0,4.544,1.0
2,Query 1,>= 70%,1.0,0.556,0.714,1.0,1.0,4.544,1.0
3,Query 1,product types,1.0,0.556,0.714,1.0,1.0,4.544,1.0
4,Query 2,Exact coincidence,1.0,1.0,1.0,1.0,1.0,4.544,1.0
5,Query 2,All - 1,1.0,1.0,1.0,1.0,1.0,4.544,1.0
6,Query 2,>= 70%,1.0,1.0,1.0,1.0,1.0,4.544,1.0
7,Query 2,product types,1.0,1.0,1.0,1.0,1.0,4.544,1.0
8,Query 3,Exact coincidence,1.0,0.4,0.571,1.0,1.0,4.544,1.0
9,Query 3,All - 1,1.0,0.4,0.571,1.0,1.0,4.544,1.0
