In [8]:
import numpy as np
from typing import List, Tuple

def simple_ranking_algorithm(query: str, documents: List[str]) -> List[Tuple[str, float]]:
    """
    A simple ranking algorithm based on word overlap between query and documents.
    """
    query_words = set(query.lower().split())
    ranked_docs = []
    for doc in documents:
        doc_words = set(doc.lower().split())
        score = len(query_words.intersection(doc_words)) / len(query_words)
        ranked_docs.append((doc, score))
    return sorted(ranked_docs, key=lambda x: x[1], reverse=True)

def precision_at_k(ranked_docs: List[Tuple[str, float]], relevant_docs: List[str], k: int) -> float:
    """Calculate Precision@k"""
    relevant_retrieved = sum(1 for doc, _ in ranked_docs[:k] if doc in relevant_docs)
    return relevant_retrieved / k if k > 0 else 0

def recall_at_k(ranked_docs: List[Tuple[str, float]], relevant_docs: List[str], k: int) -> float:
    """Calculate Recall@k"""
    relevant_retrieved = sum(1 for doc, _ in ranked_docs[:k] if doc in relevant_docs)
    return relevant_retrieved / len(relevant_docs) if relevant_docs else 0

def mean_reciprocal_rank(ranked_docs: List[Tuple[str, float]], relevant_docs: List[str]) -> float:
    """Calculate Mean Reciprocal Rank"""
    for i, (doc, _) in enumerate(ranked_docs, 1):
        if doc in relevant_docs:
            return 1 / i
    return 0

def average_precision(ranked_docs: List[Tuple[str, float]], relevant_docs: List[str]) -> float:
    """Calculate Average Precision"""
    relevant_count = 0
    sum_precision = 0
    for i, (doc, _) in enumerate(ranked_docs, 1):
        if doc in relevant_docs:
            relevant_count += 1
            sum_precision += relevant_count / i
    return sum_precision / len(relevant_docs) if relevant_docs else 0

def ndcg_at_k(ranked_docs: List[Tuple[str, float]], relevant_docs: List[str], k: int) -> float:
    """Calculate Normalized Discounted Cumulative Gain@k"""
    dcg = sum((1 / np.log2(i + 1)) for i, (doc, _) in enumerate(ranked_docs[:k], 1) if doc in relevant_docs)
    idcg = sum((1 / np.log2(i + 1)) for i in range(1, min(k, len(relevant_docs)) + 1))
    return dcg / idcg if idcg > 0 else 0

# Example usage
query = "python software development"
documents = [
    "Python is a popular programming language",
    "Java is widely used in enterprise software",
    "Python has simple syntax and is easy to learn",
    "JavaScript is used for web development",
    "Python supports multiple programming paradigms"
]
relevant_docs = [documents[0], documents[2], documents[4]]

ranked_docs = simple_ranking_algorithm(query, documents)
print(f"Ranked Documents: {ranked_docs}")

k = 3
print(f"Precision@{k}: {precision_at_k(ranked_docs, relevant_docs, k)}")
print(f"Recall@{k}: {recall_at_k(ranked_docs, relevant_docs, k)}")
print(f"MRR: {mean_reciprocal_rank(ranked_docs, relevant_docs)}")
print(f"mAP: {average_precision(ranked_docs, relevant_docs)}")
print(f"nDCG@{k}: {ndcg_at_k(ranked_docs, relevant_docs, k)}")

Ranked Documents: [('Python is a popular programming language', 0.3333333333333333), ('Java is widely used in enterprise software', 0.3333333333333333), ('Python has simple syntax and is easy to learn', 0.3333333333333333), ('JavaScript is used for web development', 0.3333333333333333), ('Python supports multiple programming paradigms', 0.3333333333333333)]
Precision@3: 0.6666666666666666
Recall@3: 0.6666666666666666
MRR: 1.0
mAP: 0.7555555555555555
nDCG@3: 0.7039180890341347
