In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

* Result Top N results, Query true values chunk from the DB, <Q1, P1, ESCI> <Q1, P2, ESCI>, etc

In [None]:
# Mean Reciprocal Rank
# For the exact product match and when it appears in the result

def mrr_metric(retrived, exact_prods):
    """
    retrived: list of lists with predicted ranked items (one list per query)
    exact_product: list of true relevant item (one per query)

    Returns:
        MRR score as a float
    """
    for index, prods in enumerate(retrived):
        if prods in exact_prods
            return (1.0 / index+1)  # ranks start from 1
            
    return (0.0)  # true item not found in prediction

In [None]:
# Mean Average Precision
# For the Substitue/ Complement match in top k

def exact_map_metric(retrived, exact_prods):
    """
    retrived: list of predicted items in ranked order
    exact_prods: set of Exact items

    Returns Set:
        Mean Average Precision (MAP) for one query
        Fraction of Exact items retrieved from true set
    """

    if not exact_prods:
        return 0.0

    score = 0.0
    num_hits = 0.0

    for index, prod in enumerate(retrived):
        if prod in exact_prods:
            num_hits += 1.0
            score += num_hits / (index + 1.0)

    return (score / len(exact_prods), num_hits / len(exact_prods))

In [None]:
# Mean Average Precision
# For the Substitue/ Complement match in top k

def sc_map_metric(retrived, sc_true_prods):
    """
    retrived: list of predicted items in ranked order
    sc_true_prods: set of Substitute and Complement items

    Returns Set:
        Mean Average Precision (MAP) for one query
        Fraction of S/C items retrieved from true set
    """

    if not sc_true_prods:
        return 0.0

    score = 0.0
    num_hits = 0.0

    for index, prod in enumerate(retrived):
        if prod in sc_true_prods:
            num_hits += 1.0
            score += num_hits / (index + 1.0)

    return (score / len(sc_true_prods), num_hits / len(sc_true_prods))

In [None]:
# 1 - %I@k
# For Irrelevant match in top k

def not_i_at_k(retrived, ir_true_prods):
    """
    retrived: list of predicted items in ranked order
    ir_true_prods: set of Irrelevent items

    Returns:
        % of irrelevent items in predicted items for one query
    """
    if not ir_true_prods:
        return 0.0
        
    num_hits = 0.0

    for prod in retrived:
        if prod in ir_true_prods:
            num_hits += 1.0

    return num_hits / len(ir_true_prods)

# NDCG@k

## 1. Assign Relevance Scores

| Label          | Score |
| -------------- | ----- |
| E (Exact)      | 5     |
| S (Substitute) | 2     |
| I (Irrelevant) | 0     |

## 2. Compute DCG@k (Discounted Cumulative Gain)

Discounted Cumulative Gain (DCG@k) with Exponential Relevance

$$
\text{DCG@k} = \sum_{i=1}^{k} \frac{2^{rel_i} - 1}{\log_2(i + 1)}
$$

Where:
- (rel_i) is the relevance score of the item at position (i)
- (log_2(i + 1)) is the discount factor

## 3. Compute IDCG@k (Ideal DCG)
Sort the same items in perfect order (most relevant first) and compute DCG again. This gives the best possible DCG — called IDCG.

## 4. Normalize
nDCG@k is defined as:

$$
\text{nDCG@k} = \frac{\text{DCG@k}}{\text{IDCG@k}}
$$

Where:
- (DCG@k) is the Discounted Cumulative Gain at rank *k*
- (IDCG@k) is the DCG@k of the ideal (perfect) ranking
 
NDCG ranges from 0 to 1, 1.0 means your system ranked everything perfectly

## Note

Ignore Missing (Filter Out)

skip items without relevance labels in the DCG and IDCG calculation.


In [None]:
relevance_dict = {
    'E': 5,
    'S': 2,
    'C': 2,
    'I': 0
}

In [None]:
def compute_relevence_scores(retrived, true_prods):
    relevance_scores = [
        relevance_dict[true_prods[pid]]
        for pid in retrived
        if pid in true_prods
    ]

    return relevance_scores

In [None]:
relevance_scores = compute_relevence_scores(retrived, true_prods)

def dcg_at_k(relevance_scores):
    """
    Compute DCG@k using 2^rel - 1 as gain.
    
    Parameters:
    - relevance_scores (list or array): relevance scores ordered by predicted rank
    
    Returns:
    - DCG@k score
    """
    k = len(relevance_scores)
    relevance_scores = np.asarray(relevance_scores)
    gains = 2 ** relevance_scores - 1
    discounts = np.log2(np.arange(2, k + 2))
    return np.sum(gains / discounts)

def ndcg_at_k(relevance_scores):
    """
    Compute nDCG@k using ideal DCG as normalization.
    
    Parameters:
    - relevance_scores (list or array): relevance scores ordered by predicted rank
    
    Returns:
    - nDCG@k score
    """
    dcg = dcg_at_k(relevance_scores)
    ideal_relevance = sorted(relevance_scores, reverse=True)
    idcg = dcg_at_k(ideal_relevance)
    return dcg / idcg if idcg != 0 else 0.0

In [None]:
ndcg = []
mrr = []
e_map = []
e_perct = []
sc_map = []
sc_perct = []
not_i_at_k = []

def calc_metrics(retrived_set, true_set):
    for query_id, product_list in retrived_set:
        true_prod_map = true_set[query_id]
        ndcg.append(ndcg_at_k(compute_relevence_scores(retrived_set, true_prod_map))
                    
        exact_prods = []
        sc_prods = []
        ir_prods = []
        for prod, relevence in true_prod_map.items():
            if relevence == 'E':
                exact_prods.append(prod)
            elif relevence == 'S' || relevence == 'C':
                sc_prods.append(prod)
            elif relevence == 'I':
                ir_prods.append(prod)

        mrr.append(mrr(retrived_set, exact_prods))
        
        e_map_metric, e_perct_metric = exact_map_metric(retrived_set, exact_prods)
        e_map.append(e_map_metric)
        e_perct.append(e_perct_metric)

        sc_map_metric, sc_perct_metric = sc_map_metric(retrived_set, sc_prods)
        sc_map.append(sc_map_metric)
        sc_perct.append(sc_perct_metric)

        not_i_at_k.append(not_i_at_k(retrived_set, ir_prods))

https://chatgpt.com/c/685bd853-c460-8005-8bba-2f4e242a2da8