# Retrieval On Summarized Text And Model Evaluation

#### Import original and summarized corpus

In [1]:
import json

read_file_path = 'output_led_large_text_sum.txt'
imported_data = {}
# Import from JSON file
with open(read_file_path, 'r') as json_file:
    imported_data = json.load(json_file)

read_file_path = 'data.json'
imported_data2 = []
with open(read_file_path, 'r') as json_file:
    imported_data2 = json.load(json_file)

read_file_path = 'pegasus.txt'
pegasus_imported_data = []
with open(read_file_path, 'r') as json_file:
    pegasus_imported_data = json.load(json_file)

read_file_path = 'distil_Bart.txt'
distilbart_imported_data = []
with open(read_file_path, 'r') as json_file:
    distilbart_imported_data = json.load(json_file)

#### Create corpus of document text

In [2]:
corpus_led_large = [doc for id, doc in imported_data.items()]
corpus_pegasus = [doc for id, doc in pegasus_imported_data.items()]
corpus_distilbart = [doc for id, doc in distilbart_imported_data.items()]

original_corpus = [doc for doc in imported_data2]

#### Load manually annotated data

In [3]:
read_file_path = 'sample-annotations.json'
annotated_data = []
# Import from JSON file
with open(read_file_path, 'r') as json_file:
    annotated_data = json.load(json_file)

queries = annotated_data['queries'];
print(queries[0])

{'id': 1, 'query': 'Quantum computing algorithms overview', 'narrative': 'Relevant documents must contain information about various algorithms used in quantum computing. These algorithms can run on a quantum computer. Documents on that do not mention any of the quantum algorithms like Deutsch–Jozsa algorithm, Bernstein-Vazirani algorithm, Simon’s algorithm, Quantum phase estimation algorithm, Shor’s algorithm, Grover’s algorithm, etc. should be considered irrelevant.', 'documents': [{'title': 'Quantum algorithm', 'url': 'https://en.wikipedia.org/wiki/Quantum_algorithm', 'relevance_score': 4}, {'title': 'Algorithm', 'url': 'https://en.wikipedia.org/wiki/Quantum_computing', 'relevance_score': 0}, {'title': 'Shor’s algorithm', 'url': 'https://en.wikipedia.org/wiki/Grover%27s_algorithm', 'relevance_score': 3}, {'title': 'Grover’s algorithm', 'url': 'https://en.wikipedia.org/wiki/Shor%27s_algorithm', 'relevance_score': 3}, {'title': 'Deutsch–Jozsa algorithm', 'url': 'https://en.wikipedia.or

In [4]:
import requests
from bs4 import BeautifulSoup
from rank_bm25 import BM25Okapi
import re
import nltk
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize  # Add this import statement

# Download NLTK stopwords
nltk.download('stopwords')
stop_words = set(stopwords.words('english'))

[nltk_data] Downloading package stopwords to /Users/meet/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


In [5]:
def clean_text(text):
    # Remove HTML tags
    clean_text = BeautifulSoup(text, 'html.parser').get_text()

    # Tokenize and remove non-alphanumeric characters, convert to lowercase
    tokens = word_tokenize(clean_text)
    clean_text = ' '.join([word for word in tokens if word.isalnum()]).lower()

    # Remove stopwords
    clean_text = ' '.join([word for word in clean_text.split() if word not in stop_words])

    return clean_text

## BM25 Retrieval Model

In [6]:
from rank_bm25 import BM25Okapi

In [7]:
# get BM25 scores for all queries
def get_query_results(model, queries):
    query_results = []
    for query in queries:
        tokenized_query = query['query'].lower().split() # tokenize query
        doc_scores = model.get_scores(tokenized_query) # get BM25 scores
        query_results.append({
            'query_id': query['id'],
            'query': query['query'],
            'scores': [{ 
                'id': i + 1,
                'score': doc_scores[i],
                'title': original_corpus[i]['title'],
            } for i in range(len(doc_scores))] # add document id, score, and title
        })
    return query_results

In [8]:
# get docid, title apping for documents
doc_mappings = [(doc['id'], doc['title']) for doc in original_corpus]

In [9]:
# get average precision for a query
def get_average_precision(relevant_docs, retrieved_docs):
    # Calculate precision for each retrieved document
    precisions = []
    total_relevant_retrieved = 0
    # Calculate precision for each relevant retrieved document
    for i in range(len(retrieved_docs)):
        if retrieved_docs[i] in relevant_docs:
            total_relevant_retrieved += 1
            precisions.append(total_relevant_retrieved / (i + 1))
    # Calculate average precision
    return sum(precisions) / len(precisions) if len(precisions) > 0 else 0;

def get_average_mrr(relevant_docs, retrieved_docs):
    # Calculare mean reciprocal rank
    for i in range(len(retrieved_docs)):
        if retrieved_docs[i] in relevant_docs:
            return 1 / (i + 1)
    return 0;

# get average precision for all queries
def get_mean_average_precision(queries, results, doc_mappings, eval_func=get_average_precision, k=10):
    eval_scores = []
    # Loop through each query
    for query_data in queries:
        query = query_data['query']
        annotated_documents = query_data['documents']

        # Extract true labels and predicted scores
        annotated_documents = [{
            'title': doc['title'],
            'relevance_score': doc['relevance_score']
        } for doc in annotated_documents]
        annotated_documents = sorted(annotated_documents, key=lambda k: k['relevance_score'], reverse=True)

        # Rank documents based on your retrieval model
        current_result = [result for result in results if result['query'] == query][0] # Get scores for query
        ranked_documents = current_result['scores'] # Get ranked documents with scores
        ranked_documents = sorted(ranked_documents, key=lambda k: k['score'], reverse=True) # Sort documents by score (highest to lowest)

        # Get true labels
        relevant_docs = []
        for doc in annotated_documents:
            if doc['relevance_score'] > 0: # If document is relevant
                relevant_docs.append(list(filter(lambda x: x[1] == doc['title'], doc_mappings))[0][0]) # Add document id to relevant documents

        predicted_scores = [doc['id'] for doc in ranked_documents][:k]

        eval_scores.append(eval_func(relevant_docs, predicted_scores))

    # Calculate metrics
    map_score = sum(eval_scores) / len(eval_scores)
    return map_score

In [10]:
import numpy as np

In [11]:
def get_mean_dcg_at_k(queries, results, doc_mappings, k = 10):
    eval_scores = []
    # Loop through each query
    for query_data in queries:
        query = query_data['query']
        annotated_documents = query_data['documents']

        # Extract true labels and predicted scores
        annotated_documents = [{
            'title': doc['title'],
            'relevance_score': doc['relevance_score']
        } for doc in annotated_documents]
        annotated_documents = sorted(annotated_documents, key=lambda k: k['relevance_score'], reverse=True)

        # Rank documents based on your retrieval model
        current_result = [result for result in results if result['query'] == query][0] # Get scores for query
        ranked_documents = current_result['scores'] # Get ranked documents with scores
        ranked_documents = sorted(ranked_documents, key=lambda k: k['score'], reverse=True) # Sort documents by score (highest to lowest)
        top_k_ranked_documents = ranked_documents[:k]

        actual_dcg_scores = []
        prev_sum = 0
        for i, doc in enumerate(top_k_ranked_documents, 1):
            annotated_score = list(filter(lambda x: x['title'] == doc['title'], annotated_documents))[0]['relevance_score'] if doc['title'] in [doc['title'] for doc in annotated_documents] else 0
            if i == 1:
                actual_dcg_scores.append(annotated_score)
                prev_sum = annotated_score
            else:
                prev_sum += annotated_score / np.log2(i);
                actual_dcg_scores.append(prev_sum)
        
        ideal_dcg_scores = []
        prev_sum = 0
        for i, doc in enumerate(annotated_documents, 1):
            if i == 1:
                ideal_dcg_scores.append(doc['relevance_score'])
                prev_sum = doc['relevance_score']
            else:
                prev_sum += doc['relevance_score'] / np.log2(i);
                ideal_dcg_scores.append(prev_sum)

        eval_scores.append(actual_dcg_scores[-1] / ideal_dcg_scores[-1])

    # Calculate metrics
    dcg_score = sum(eval_scores) / len(eval_scores)
    return dcg_score

#### MAP, MRR, and NDCG of BM25 model on corpus of documents summarized with Led Large model

In [12]:
# Calculate MAP and MRR score for summarization with Led Large model on BM25
clean_corpus_led_large = [clean_text(doc) for doc in corpus_led_large] # clean corpus
tokenized_corpus_led_large = [doc.split() for doc in clean_corpus_led_large] # tokenize corpus
bm25_led_large = BM25Okapi(tokenized_corpus_led_large) # initialize BM25 model
results_led_large = get_query_results(bm25_led_large, queries) # get BM25 scores for all queries

map_score_led_large = get_mean_average_precision(queries, results_led_large, doc_mappings, get_average_precision) # get MAP score
mrr_score_led_large = get_mean_average_precision(queries, results_led_large, doc_mappings, get_average_mrr) # get MRR score 
dcg_score_led_large = get_mean_dcg_at_k(queries, results_led_large, doc_mappings, 10) # get DCG score

print(f"Mean Average Precision (MAP) (Summarization model: Led Large), Retrieval model: BM25): {map_score_led_large:.4f}")
print(f"Mean Reciprocal Rank (MRR) (Summarization model: Led Large), Retrieval model: BM25: {mrr_score_led_large:.4f}")
print(f"Mean Discounted Cumulative Gain (DCG) (Summarization model: Led Large), Retrieval model: BM25: {dcg_score_led_large:.4f}")

Mean Average Precision (MAP) (Summarization model: Led Large), Retrieval model: BM25): 0.8996
Mean Reciprocal Rank (MRR) (Summarization model: Led Large), Retrieval model: BM25: 0.9600
Mean Discounted Cumulative Gain (DCG) (Summarization model: Led Large), Retrieval model: BM25: 0.8105


#### MAP, MRR, and NDCG of BM25 model on corpus of documents summarized with Pegasus model

In [13]:
# Calculate MAP and MRR score for summarization with Pegasus on BM25
clean_corpus_pegasus = [clean_text(doc) for doc in corpus_pegasus] # clean corpus
tokenized_corpus_pegasus = [doc.split() for doc in clean_corpus_pegasus] # tokenize corpus
bm25_pegasus = BM25Okapi(tokenized_corpus_pegasus) # initialize BM25 model
results_pegasus = get_query_results(bm25_pegasus, queries) # get BM25 scores for all queries

map_score_pegasus = get_mean_average_precision(queries, results_pegasus, doc_mappings, get_average_precision) # get MAP score
mrr_score_pegasus = get_mean_average_precision(queries, results_pegasus, doc_mappings, get_average_mrr) # get MRR score
dcg_score_pegasus = get_mean_dcg_at_k(queries, results_pegasus, doc_mappings, 10) # get DCG score

print(f"Mean Average Precision (MAP) (Summarization model: Pegasus), Retrieval model: BM25: {map_score_pegasus:.4f}")
print(f"Mean Reciprocal Rank (MRR) (Summarization model: Pegasus), Retrieval model: BM25: {mrr_score_pegasus:.4f}")
print(f"Mean Discounted Cumulative Gain (DCG) (Summarization model: Pegasus), Retrieval model: BM25: {dcg_score_pegasus:.4f}")

Mean Average Precision (MAP) (Summarization model: Pegasus), Retrieval model: BM25: 0.8049
Mean Reciprocal Rank (MRR) (Summarization model: Pegasus), Retrieval model: BM25: 0.8667
Mean Discounted Cumulative Gain (DCG) (Summarization model: Pegasus), Retrieval model: BM25: 0.7068


#### MAP, MRR, and NDCG of BM25 model on corpus of documents summarized with Distil BART model

In [14]:
# Calculate MAP and MRR score for summarization with Distil Bart on BM25
clean_corpus_distilbart = [clean_text(doc) for doc in corpus_distilbart] # clean corpus
tokenized_corpus_distilbart = [doc.split() for doc in clean_corpus_distilbart] # tokenize corpus
bm25_distilbart = BM25Okapi(tokenized_corpus_distilbart) # initialize BM25 model
results_distilbart = get_query_results(bm25_distilbart, queries) # get BM25 scores for all queries

map_score_distilbart = get_mean_average_precision(queries, results_distilbart, doc_mappings, get_average_precision) # get MAP score
mrr_score_distilbart = get_mean_average_precision(queries, results_distilbart, doc_mappings, get_average_mrr) # get MRR score
dcg_score_distilbart = get_mean_dcg_at_k(queries, results_distilbart, doc_mappings, 10) # get DCG score

print(f"Mean Average Precision (MAP) (Summarization model: Distil Bart), Retrieval model: BM25: {map_score_distilbart:.4f}")
print(f"Mean Reciprocal Rank (MRR) (Summarization model: Distil Bart), Retrieval model: BM25: {mrr_score_distilbart:.4f}")
print(f"Mean Discounted Cumulative Gain (DCG) (Summarization model: Distil Bart), Retrieval model: BM25: {dcg_score_distilbart:.4f}")

Mean Average Precision (MAP) (Summarization model: Distil Bart), Retrieval model: BM25: 0.8946
Mean Reciprocal Rank (MRR) (Summarization model: Distil Bart), Retrieval model: BM25: 0.9800
Mean Discounted Cumulative Gain (DCG) (Summarization model: Distil Bart), Retrieval model: BM25: 0.8469


## BM25 Model Hyperparameters Tuning

In [20]:
def tune_bm25(queries, corpus, doc_mappings, k1_values, b_values):
    best_score = 0
    best_k1 = None
    best_b = None
    model = BM25Okapi(corpus)
    results = get_query_results(model, queries)
    map_score = get_mean_average_precision(queries, results, doc_mappings, get_average_precision)
    mrr_score = get_mean_average_precision(queries, results, doc_mappings, get_average_mrr)
    dcg_score = get_mean_dcg_at_k(queries, results, doc_mappings, 10)
    print(f"k1=1.5 (default), b=0.75 (default), MAP={map_score:.4f}, MRR={mrr_score:.4f}, DCG={dcg_score:.4f}")

    for k1 in k1_values:
        for b in b_values:
            model = BM25Okapi(corpus, k1=k1, b=b)
            results = get_query_results(model, queries)
            map_score = get_mean_average_precision(queries, results, doc_mappings, get_average_precision)
            mrr_score = get_mean_average_precision(queries, results, doc_mappings, get_average_mrr)
            dcg_score = get_mean_dcg_at_k(queries, results, doc_mappings, 10)
            # print(f"BM25Okapi: k1={k1}, b={b}, MAP={map_score:.4f}, MRR={mrr_score:.4f}, DCG={dcg_score:.4f}")
            if map_score > best_score:
                best_score = map_score
                best_k1 = k1
                best_b = b
    return best_k1, best_b

In [21]:
print("Retrieval model: BM25, Summarization model: Led Large")
k1_values = [i / 10 for i in range(1, 15, 1)]
b_values = [i / 20 for i in range(1, 21, 1)]
best_k1, best_b = tune_bm25(queries, tokenized_corpus_led_large, doc_mappings, k1_values, b_values)

model = BM25Okapi(tokenized_corpus_led_large, k1=best_k1, b=best_b)
results_led_large_tuned = get_query_results(model, queries)
map_score_tuned = get_mean_average_precision(queries, results_led_large_tuned, doc_mappings, get_average_precision)
mrr_score_tuned = get_mean_average_precision(queries, results_led_large_tuned, doc_mappings, get_average_mrr)
dcg_score_tuned = get_mean_dcg_at_k(queries, results_led_large_tuned, doc_mappings, 10)
print(f"Best k1: {best_k1}, Best b: {best_b}, MAP: {map_score_tuned:.4f}, MRR: {mrr_score_tuned:.4f}, DCG: {dcg_score_tuned:.4f}")

Retrieval model: BM25, Summarization model: Led Large
k1=1.5 (default), b=0.75 (default), MAP=0.8996, MRR=0.9600, DCG=0.8105
Best k1: 1.4, Best b: 0.5, MAP: 0.9231, MRR: 0.9800, DCG: 0.8108


In [22]:
print("Retrieval model: BM25, Summarization model: Pegasus")
k1_values = [i / 10 for i in range(1, 15, 1)]
b_values = [i / 20 for i in range(1, 21, 1)]
best_k1, best_b = tune_bm25(queries, tokenized_corpus_pegasus, doc_mappings, k1_values, b_values)

model = BM25Okapi(tokenized_corpus_pegasus, k1=best_k1, b=best_b)
results_pegasus_tuned = get_query_results(model, queries)
map_score_tuned = get_mean_average_precision(queries, results_pegasus_tuned, doc_mappings, get_average_precision)
mrr_score_tuned = get_mean_average_precision(queries, results_pegasus_tuned, doc_mappings, get_average_mrr)
dcg_score_tuned = get_mean_dcg_at_k(queries, results_pegasus_tuned, doc_mappings, 10)
print(f"Best k1: {best_k1}, Best b: {best_b}, MAP: {map_score_tuned:.4f}, MRR: {mrr_score_tuned:.4f}, DCG: {dcg_score_tuned:.4f}")

Retrieval model: BM25, Summarization model: Pegasus
k1=1.5 (default), b=0.75 (default), MAP=0.8049, MRR=0.8667, DCG=0.7068
Best k1: 1.4, Best b: 0.1, MAP: 0.8081, MRR: 0.8600, DCG: 0.7067


In [23]:
print("Retrieval model: BM25, Summarization model: Distil Bart")
k1_values = [i / 10 for i in range(1, 15, 1)]
b_values = [i / 20 for i in range(1, 21, 1)]
best_k1, best_b = tune_bm25(queries, tokenized_corpus_distilbart, doc_mappings, k1_values, b_values)

model = BM25Okapi(tokenized_corpus_distilbart, k1=best_k1, b=best_b)
results_distilbart_tuned = get_query_results(model, queries)
map_score_tuned = get_mean_average_precision(queries, results_distilbart_tuned, doc_mappings, get_average_precision)
mrr_score_tuned = get_mean_average_precision(queries, results_distilbart_tuned, doc_mappings, get_average_mrr)
dcg_score_tuned = get_mean_dcg_at_k(queries, results_distilbart_tuned, doc_mappings, 10)
print(f"Best k1: {best_k1}, Best b: {best_b}, MAP: {map_score_tuned:.4f}, MRR: {mrr_score_tuned:.4f}, DCG: {dcg_score_tuned:.4f}")

Retrieval model: BM25, Summarization model: Distil Bart
k1=1.5 (default), b=0.75 (default), MAP=0.8946, MRR=0.9800, DCG=0.8469
Best k1: 1.4, Best b: 0.55, MAP: 0.9019, MRR: 0.9800, DCG: 0.8466


## Query Likelihood Model

In [24]:
from collections import Counter
import math

class QueryLikelihoodModel:
    def __init__(self, documents, mu=2000):
        self.documents = documents
        # Build a vocabulary
        self.vocab = set(term for doc in self.documents for term in doc)
        # Compute document frequencies
        self.doc_frequencies = Counter(term for doc in self.documents for term in set(doc))
        # Smoothing parameter (Jelinek-Mercer smoothing)
        # self.smoothing_lambda = smoothing_lambda
        self.mu = mu

    def document_likelihood(self, doc, query):
        # Compute the likelihood of generating the query given the document with Jelinek-Mercer smoothing
        # P(query | doc) = (1 - λ) * P(term | doc) + λ * P(term | collection)
        likelihood = 0.0
        total_terms_in_doc = len(doc)
        for term in query:
            # Estimate P(term | doc)
            term_likelihood_doc = doc.count(term) / total_terms_in_doc if total_terms_in_doc > 0 else 0.0
            # Estimate P(term | collection)
            term_likelihood_collection = self.doc_frequencies[term] / sum(self.doc_frequencies.values())
            # Combine using Jelinek-Mercer smoothing
            smoothed_term_likelihood = (doc.count(term) + (self.mu / sum(self.doc_frequencies.values()) * self.doc_frequencies[term])) / (total_terms_in_doc + self.mu)
            smoothed_term_likelihood = math.log(smoothed_term_likelihood) if smoothed_term_likelihood > 0 else 0.0
            likelihood += smoothed_term_likelihood
        return likelihood

    def get_scores(self, query):
        # Rank documents based on the query likelihood with Jelinek-Mercer smoothing
        scores = []

        for doc in self.documents:
            likelihood = self.document_likelihood(doc, query)
            scores.append(likelihood)

        return scores

In [25]:
ql_led_large = QueryLikelihoodModel(tokenized_corpus_led_large)
results_led_large_ql = get_query_results(ql_led_large, queries)
map_score_led_large_ql = get_mean_average_precision(queries, results_led_large_ql, doc_mappings, get_average_precision)
mrr_score_led_large_ql = get_mean_average_precision(queries, results_led_large_ql, doc_mappings, get_average_mrr)
dcg_score_led_large_ql = get_mean_dcg_at_k(queries, results_led_large_ql, doc_mappings, 10)
print(f"Mean Average Precision (MAP) (Summarization model: Led Large, Retrieval model: Query Likelihood): {map_score_led_large_ql:.4f}")
print(f"Mean Reciprocal Rank (MRR) (Summarization model: Led Large, Retrieval model: Query Likelihood): {mrr_score_led_large_ql:.4f}")
print(f"Mean Discounted Cumulative Gain (DCG) (Summarization model: Led Large, Retrieval model: Query Likelihood): {dcg_score_led_large_ql:.4f}")

Mean Average Precision (MAP) (Summarization model: Led Large, Retrieval model: Query Likelihood): 0.8979
Mean Reciprocal Rank (MRR) (Summarization model: Led Large, Retrieval model: Query Likelihood): 0.9800
Mean Discounted Cumulative Gain (DCG) (Summarization model: Led Large, Retrieval model: Query Likelihood): 0.8004


In [26]:
ql_pegasus = QueryLikelihoodModel(tokenized_corpus_pegasus)
results_pegasus_ql = get_query_results(ql_pegasus, queries)
map_score_pegasus_ql = get_mean_average_precision(queries, results_pegasus_ql, doc_mappings, get_average_precision)
mrr_score_pegasus_ql = get_mean_average_precision(queries, results_pegasus_ql, doc_mappings, get_average_mrr)
dcg_score_pegasus_ql = get_mean_dcg_at_k(queries, results_pegasus_ql, doc_mappings, 10)
print(f"Mean Average Precision (MAP) (Summarization model: Pegasus, Retrieval model: Query Likelihood): {map_score_pegasus_ql:.4f}")
print(f"Mean Reciprocal Rank (MRR) (Summarization model: Pegasus, Retrieval model: Query Likelihood): {mrr_score_pegasus_ql:.4f}")
print(f"Mean Discounted Cumulative Gain (DCG) (Summarization model: Pegasus, Retrieval model: Query Likelihood): {dcg_score_pegasus_ql:.4f}")

Mean Average Precision (MAP) (Summarization model: Pegasus, Retrieval model: Query Likelihood): 0.7704
Mean Reciprocal Rank (MRR) (Summarization model: Pegasus, Retrieval model: Query Likelihood): 0.8267
Mean Discounted Cumulative Gain (DCG) (Summarization model: Pegasus, Retrieval model: Query Likelihood): 0.6976


In [27]:
ql_distilbart = QueryLikelihoodModel(tokenized_corpus_distilbart)
results_distilbart_ql = get_query_results(ql_distilbart, queries)
map_score_distilbart_ql = get_mean_average_precision(queries, results_distilbart_ql, doc_mappings, get_average_precision)
mrr_score_distilbart_ql = get_mean_average_precision(queries, results_distilbart_ql, doc_mappings, get_average_mrr)
dcg_score_distilbart_ql = get_mean_dcg_at_k(queries, results_distilbart_ql, doc_mappings, 10)
print(f"Mean Average Precision (MAP) (Summarization model: Distil Bart, Retrieval model: Query Likelihood): {map_score_distilbart_ql:.4f}")
print(f"Mean Reciprocal Rank (MRR) (Summarization model: Distil Bart, Retrieval model: Query Likelihood): {mrr_score_distilbart_ql:.4f}")
print(f"Mean Discounted Cumulative Gain (DCG) (Summarization model: Distil Bart, Retrieval model: Query Likelihood): {dcg_score_distilbart_ql:.4f}")

Mean Average Precision (MAP) (Summarization model: Distil Bart, Retrieval model: Query Likelihood): 0.9267
Mean Reciprocal Rank (MRR) (Summarization model: Distil Bart, Retrieval model: Query Likelihood): 0.9800
Mean Discounted Cumulative Gain (DCG) (Summarization model: Distil Bart, Retrieval model: Query Likelihood): 0.8626


## Query Likelihood Model Hyperparameter Tuning

In [29]:
def tune_querylikelihood(queries, corpus, doc_mappings, mu_values):
    best_score = 0
    best_mu = None
    model = QueryLikelihoodModel(corpus)
    results = get_query_results(model, queries)
    map_score = get_mean_average_precision(queries, results, doc_mappings, get_average_precision)
    mrr_score = get_mean_average_precision(queries, results, doc_mappings, get_average_mrr)
    dcg_score = get_mean_dcg_at_k(queries, results, doc_mappings, 10)
    print(f"mu=2000 (default), MAP={map_score:.4f}, MRR={mrr_score:.4f}, DCG={dcg_score:.4f}")

    for mu in mu_values:
        model = QueryLikelihoodModel(corpus, mu=mu)
        results = get_query_results(model, queries)
        map_score = get_mean_average_precision(queries, results, doc_mappings, get_average_precision)
        mrr_score = get_mean_average_precision(queries, results, doc_mappings, get_average_mrr)
        dcg_score = get_mean_dcg_at_k(queries, results, doc_mappings, 10)
        # print(f"Query Likelihood: mu={mu}, MAP={map_score:.4f}, MRR={mrr_score:.4f}, DCG={dcg_score:.4f}")
        if map_score > best_score:
            best_score = map_score
            best_mu = mu
    return best_mu

In [63]:
print("Retrieval model: Query Likelihood, Summarization model: Led Large")
mu_values = [i for i in range(0, 2001, 10)]
best_mu = tune_querylikelihood(queries, tokenized_corpus_led_large, doc_mappings, mu_values)

model = QueryLikelihoodModel(tokenized_corpus_led_large, mu=best_mu)
results_led_large_ql_tuned = get_query_results(model, queries)
map_score_tuned = get_mean_average_precision(queries, results_led_large_ql_tuned, doc_mappings, get_average_precision)
mrr_score_tuned = get_mean_average_precision(queries, results_led_large_ql_tuned, doc_mappings, get_average_mrr)
dcg_score_tuned = get_mean_dcg_at_k(queries, results_led_large_ql_tuned, doc_mappings, 10)
print(f"Best mu: {best_mu}, MAP: {map_score_tuned:.4f}, MRR: {mrr_score_tuned:.4f}, DCG: {dcg_score_tuned:.4f}")

Retrieval model: Query Likelihood, Summarization model: Led Large
mu=2000 (default), MAP=0.8979, MRR=0.9800, DCG=0.8004
Best mu: 650, MAP: 0.9097, MRR: 0.9800, DCG: 0.8142


In [64]:
print("Retrieval model: Query Likelihood, Summarization model: Pegasus")
mu_values = [i for i in range(0, 2001, 10)]
best_mu = tune_querylikelihood(queries, tokenized_corpus_pegasus, doc_mappings, mu_values)

model = QueryLikelihoodModel(tokenized_corpus_pegasus, mu=best_mu)
results_pegasus_ql_tuned = get_query_results(model, queries)
map_score_tuned = get_mean_average_precision(queries, results_pegasus_ql_tuned, doc_mappings, get_average_precision)
mrr_score_tuned = get_mean_average_precision(queries, results_pegasus_ql_tuned, doc_mappings, get_average_mrr)
dcg_score_tuned = get_mean_dcg_at_k(queries, results_pegasus_ql_tuned, doc_mappings, 10)
print(f"Best mu: {best_mu}, MAP: {map_score_tuned:.4f}, MRR: {mrr_score_tuned:.4f}, DCG: {dcg_score_tuned:.4f}")

Retrieval model: Query Likelihood, Summarization model: Pegasus
mu=2000 (default), MAP=0.7704, MRR=0.8267, DCG=0.6976
Best mu: 520, MAP: 0.7877, MRR: 0.8333, DCG: 0.6999


In [65]:
print("Retrieval model: Query Likelihood, Summarization model: Distil Bart")
mu_values = [i for i in range(0, 2001, 10)]
best_mu = tune_querylikelihood(queries, tokenized_corpus_distilbart, doc_mappings, mu_values)

model = QueryLikelihoodModel(tokenized_corpus_distilbart, mu=best_mu)
results_distilbart_ql_tuned = get_query_results(model, queries)
map_score_tuned = get_mean_average_precision(queries, results_distilbart_ql_tuned, doc_mappings, get_average_precision)
mrr_score_tuned = get_mean_average_precision(queries, results_distilbart_ql_tuned, doc_mappings, get_average_mrr)
dcg_score_tuned = get_mean_dcg_at_k(queries, results_distilbart_ql_tuned, doc_mappings, 10)
print(f"Best mu: {best_mu}, MAP: {map_score_tuned:.4f}, MRR: {mrr_score_tuned:.4f}, DCG: {dcg_score_tuned:.4f}")

Retrieval model: Query Likelihood, Summarization model: Distil Bart
mu=2000 (default), MAP=0.9267, MRR=0.9800, DCG=0.8626
Best mu: 1640, MAP: 0.9267, MRR: 0.9800, DCG: 0.8623


## Sample Ranking

In [31]:
def get_top_k_documents(model, query, doc_mappings, k = 10):
    tokenized_query = query.lower().split()  # tokenize query
    doc_scores = model.get_scores(tokenized_query)  # get BM25 scores
    ranked_documents = [{
        'id': i + 1,
        'score': doc_scores[i],
        'title': doc_mappings[i][1],
    } for i in range(len(doc_scores))]  # add document id, score, and title
    ranked_documents = sorted(ranked_documents, key=lambda k: k['score'], reverse=True)  # Sort documents by score (highest to lowest)
    topk_ranked_documents = ranked_documents[:k]
    topk_ranked_documents = [doc['title'] for doc in topk_ranked_documents]
    return topk_ranked_documents

In [66]:
print("Retrieval model: BM25, Summarization model: Led Large")
query = queries[20]['query']
print("\nQuery: ", query)
bm25_led_large_tuned = BM25Okapi(tokenized_corpus_led_large, k1=best_k1, b=best_b)
top_10_documents = get_top_k_documents(bm25_led_large_tuned, query, doc_mappings, 10)
print("\nTop 10 documents: ");
for doc in top_10_documents:
    print(doc)

Retrieval model: BM25, Summarization model: Led Large

Query:  Evolution of Social Media Platforms

Top 10 documents: 
Social media
Social media marketing
Virtual community
Digital marketing 
Content creation
Social network
Streaming media
History of film technology
Stock exchange
History of women's cricket


In [53]:
print("Retrieval model: BM25, Summarization model: Pegasus")
query = queries[20]['query']
print("\nQuery: ", query)
top_10_documents = get_top_k_documents(bm25_pegasus, query, doc_mappings, 10)
print("\nTop 10 documents: ");
for doc in top_10_documents:
    print(doc)

Retrieval model: BM25, Summarization model: Pegasus

Query:  Evolution of Social Media Platforms

Top 10 documents: 
Social media marketing
Social media
Digital marketing 
Social network
Virtual community
Power
Streaming media
Franklin D Roosevelt
Chemical energy
Content creation


In [54]:
print("Retrieval model: BM25, Summarization model: Distil Bart")
query = queries[20]['query']
print("\nQuery: ", query)
top_10_documents = get_top_k_documents(bm25_distilbart, query, doc_mappings, 10)
print("\nTop 10 documents: ");
for doc in top_10_documents:
    print(doc)

Retrieval model: BM25, Summarization model: Distil Bart

Query:  Evolution of Social Media Platforms

Top 10 documents: 
Social media
Virtual community
Social media marketing
Digital marketing 
Content creation
Filmmaking
Social network
Power
Franklin D Roosevelt
Quantum algorithm


In [55]:
print("Retrieval model: Query Likelihood, Summarization model: Led Large")
query = queries[20]['query']
print("\nQuery: ", query)
top_10_documents = get_top_k_documents(ql_led_large, query, doc_mappings, 10)
print("\nTop 10 documents: ");
for doc in top_10_documents:
    print(doc)

Retrieval model: Query Likelihood, Summarization model: Led Large

Query:  Evolution of Social Media Platforms

Top 10 documents: 
Social media
Virtual community
Social media marketing
Content creation
Digital marketing 
Social network
Stock exchange
History of women's cricket
Streaming media
History of film technology


In [56]:
print("Retrieval model: Query Likelihood, Summarization model: Pegasus")
query = queries[20]['query']
print("\nQuery: ", query)
top_10_documents = get_top_k_documents(ql_pegasus, query, doc_mappings, 10)
print("\nTop 10 documents: ");
for doc in top_10_documents:
    print(doc)

Retrieval model: Query Likelihood, Summarization model: Pegasus

Query:  Evolution of Social Media Platforms

Top 10 documents: 
Social media marketing
Social media
Digital marketing 
Social network
Virtual community
Power
Streaming media
Franklin D Roosevelt
Chemical energy
Content creation


In [57]:
print("Retrieval model: Query Likelihood, Summarization model: Distil Bart")
query = queries[20]['query']
print("\nQuery: ", query)
top_10_documents = get_top_k_documents(ql_distilbart, query, doc_mappings, 10)
print("\nTop 10 documents: ");
for doc in top_10_documents:
    print(doc)

Retrieval model: Query Likelihood, Summarization model: Distil Bart

Query:  Evolution of Social Media Platforms

Top 10 documents: 
Social media
Social media marketing
Virtual community
Digital marketing 
Content creation
Social network
Filmmaking
Streaming media
Power
Streaming television
