In [None]:
import pyterrier as pt
import pandas as pd
import numpy as np
from pyterrier.measures import RR, nDCG, MAP

import nlpaug.augmenter.char as nac
# import nlpaug.augmenter.word as naw
# import nlpaug.augmenter.sentence as nas
import nlpaug.flow as naf

# from nlpaug.util import Action
from common_helpers import *

In [None]:
dataset_path = "irds:beir/fiqa"
dataset = pt.get_dataset(dataset_path)
testset = pt.get_dataset(dataset_path + "/test")
test_queries = testset.get_topics()

In [None]:
# Indexing documents
from pathlib import Path

indexer = pt.IterDictIndexer(
    str(Path.cwd()),  # this will be ignored
    type=pt.index.IndexingType.MEMORY,
    fields=["text"]
)
index_ref = indexer.index(dataset.get_corpus_iter())

In [None]:
bm25 = pt.terrier.Retriever(index_ref, wmodel="BM25")

models = [bm25]
metrics = [RR @ 10, nDCG @ 10, MAP @ 100]
noise_levels = np.arange(0.0, 0.6, 0.1)
noise_combinations = [
    {"sub": 1.0, "ins": 0.0, "del": 0.0},  # Only substitution
    {"sub": 0.0, "ins": 1.0, "del": 0.0},  # Only insertion
    {"sub": 0.0, "ins": 0.0, "del": 1.0},  # Only deletion
    {"sub": 0.5, "ins": 0.5, "del": 0.0},  # Substitution + Insertion
    {"sub": 0.5, "ins": 0.0, "del": 0.5},  # Substitution + Deletion
    {"sub": 0.0, "ins": 0.5, "del": 0.5},  # Insertion + Deletion
    {"sub": 0.33, "ins": 0.33, "del": 0.33},  # Equal mix
]

In [None]:
results = []
for noise_level in noise_levels:
    for noise_config in noise_combinations:
        # Set noise probabilities
        sub_prob = noise_level * noise_config["sub"]
        ins_prob = noise_level * noise_config["ins"]
        del_prob = noise_level * noise_config["del"]

        noise_types = {"substitute": sub_prob, "insert": ins_prob, "delete": del_prob}
        aug = naf.Sequential(
            [nac.RandomCharAug(action=action, aug_char_p=0.25, aug_word_p=noise_type, 
                              spec_char="", aug_char_min=0, aug_word_min=0, aug_char_max=500000, aug_word_max=500000)
            for action, noise_type in noise_types.items()]
        )

        eval_result = run_noise_experiment(test_queries, testset, aug, models, metrics)

        # Store results
        eval_result["noise_level"] = noise_level
        eval_result["sub_prob"] = sub_prob
        eval_result["ins_prob"] = ins_prob
        eval_result["del_prob"] = del_prob
        results.append(eval_result)

# Combine results
final_results = pd.concat(results, ignore_index=True)
final_results.head(len(models))

In [None]:
final_results["Noise Type"] = final_results.apply(get_noise_label, axis=1)

plot_metric(final_results, "RR@10", "Reciprocal Rank at 10 (RR@10)", "Noise Type")
plot_metric(final_results, "nDCG@10", "Normalized Discounted Cumulative Gain at 10 (nDCG@10)", "Noise Type")
plot_metric(final_results, "AP@100", "Average Precision at 100 (AP@100)", "Noise Type")

In [None]:
import numpy as np

# Calculate word count per query
query_lengths = test_queries['query'].str.split().str.len()

# Compute percentiles
short_threshold = np.percentile(query_lengths, 25)  # 25th percentile
long_threshold = np.percentile(query_lengths, 75)  # 75th percentile

print(f"Short query threshold: {short_threshold} words")
print(f"Long query threshold: {long_threshold} words")

test_queries['query_length_category'] = test_queries['query'].apply(
    lambda q: "short" if len(q.split()) <= short_threshold else
              "long" if len(q.split()) > long_threshold else
              "medium"
)


In [None]:
import pandas as pd
import pyterrier as pt
import nlpaug.augmenter.char as nac
import nlpaug.flow as naf

# Group queries by length
query_groups = {
    "Short Queries": test_queries[test_queries["query_length_category"] == "short"],
    "Medium Queries": test_queries[test_queries["query_length_category"] == "medium"],
    "Long Queries": test_queries[test_queries["query_length_category"] == "long"]
}

results = []

for noise_level in noise_levels:
    for noise_config in noise_combinations:
        sub_prob = noise_level * noise_config["sub"]
        ins_prob = noise_level * noise_config["ins"]
        del_prob = noise_level * noise_config["del"]

        noise_types = {"substitute": sub_prob, "insert": ins_prob, "delete": del_prob}
        aug = naf.Sequential([
            nac.RandomCharAug(action=action, aug_char_p=0.25, aug_word_p=noise_type, spec_char="")
            for action, noise_type in noise_types.items()
        ])

        for group_name, queries in query_groups.items():
            eval_result = run_noise_experiment(queries, testset, aug, models, metrics)

            eval_result["noise_level"] = noise_level
            eval_result["sub_prob"] = sub_prob
            eval_result["ins_prob"] = ins_prob
            eval_result["del_prob"] = del_prob
            eval_result["query_group"] = group_name
            results.append(eval_result)

# Combine results
final_results = pd.concat(results, ignore_index=True)
final_results

In [None]:
# Ensure the final results are in a DataFrame
final_results = pd.concat(results, ignore_index=True)

# Convert the result columns to proper types for plotting
final_results["noise_level"] = final_results["noise_level"].astype(float)
final_results["sub_prob"] = final_results["sub_prob"].astype(float)
final_results["ins_prob"] = final_results["ins_prob"].astype(float)
final_results["del_prob"] = final_results["del_prob"].astype(float)

# Create a new column to represent the noise type as a string
def get_noise_type(row):
    if row['sub_prob'] > 0:
        return 'Substitution'
    elif row['ins_prob'] > 0:
        return 'Insertion'
    elif row['del_prob'] > 0:
        return 'Deletion'
    else:
        return 'No Noise'

final_results['noise_type'] = final_results.apply(get_noise_type, axis=1)

# Plotting function for metrics with facet grid
def plot_metric(metric):
    # Create a FacetGrid for each noise type
    g = sns.FacetGrid(final_results, col="noise_type", hue="query_group", height=5, aspect=1.5)

    # Plot the results for each facet
    g.map(sns.lineplot, "noise_level", metric, marker="o")

    # Add titles, axis labels, and grid
    g.set_axis_labels('Noise Level', metric)
    g.set_titles(col_template="{col_name} Noise")
    g.add_legend(title='Query Group')

    plt.tight_layout()
    plt.show()

# Example plots for different metrics
plot_metric("RR@10")
plot_metric("nDCG@10")
plot_metric("AP@100")


In [None]:
tfidf = pt.terrier.Retriever(index_ref, wmodel="TF_IDF")

# Run Experiment
retrievers = {
    "TF-IDF": tfidf,
    "BM25": bm25,
}


results = []

for noise_level in noise_levels:
    for noise_config in noise_combinations:
        sub_prob = noise_level * noise_config["sub"]
        ins_prob = noise_level * noise_config["ins"]
        del_prob = noise_level * noise_config["del"]

        aug = naf.Sequential([
            nac.RandomCharAug(action="substitute", aug_char_p=0.25, aug_word_p=sub_prob, spec_char=""),
            nac.RandomCharAug(action="insert", aug_char_p=0.25, aug_word_p=ins_prob, spec_char=""),
            nac.RandomCharAug(action="delete", aug_char_p=0.25, aug_word_p=del_prob, spec_char="")
        ])

        noisy_queries_list = []
        for _, row in test_queries.iterrows():
            noisy_query = aug.augment(row['query'])
            noisy_query = " ".join(noisy_query) if isinstance(noisy_query, list) else noisy_query
            noisy_queries_list.append({'qid': row['qid'], 'query': noisy_query})

        noisy_queries_df = pd.DataFrame(noisy_queries_list)
        noisy_queries_df["qid"] = noisy_queries_df["qid"].astype(str)

        for model_name, retriever in retrievers.items():
            eval_result = pt.Experiment(
                [retriever],
                noisy_queries_df,
                testset.get_qrels(),
                eval_metrics=[RR @ 10, nDCG @ 10, MAP @ 100]
            )

            eval_result["model"] = model_name
            eval_result["noise_level"] = noise_level
            eval_result["sub_prob"] = sub_prob
            eval_result["ins_prob"] = ins_prob
            eval_result["del_prob"] = del_prob
            results.append(eval_result)

# Combine results
final_results = pd.concat(results, ignore_index=True)
print(final_results)


In [None]:
# Filter results for BM25 and TF-IDF
comparison_results = final_results[final_results["model"].isin(["BM25", "TF-IDF"])]

# Plotting function for metrics
def plot_comparison(metric, title):
    plt.figure(figsize=(10, 6))
    sns.lineplot(
        data=comparison_results,
        x="noise_level",
        y=metric,
        hue="model",
        marker="o"
    )
    plt.title(title, fontsize=14)
    plt.xlabel("Noise Level", fontsize=12)
    plt.ylabel(metric, fontsize=12)
    plt.legend(title="Model")
    plt.show()

# Plot metrics
plot_comparison("RR@10", "Reciprocal Rank at 10 (RR@10) Comparison")
plot_comparison("nDCG@10", "Normalized Discounted Cumulative Gain at 10 (nDCG@10) Comparison")
plot_comparison("AP@100", "Average Precision at 100 (AP@100) Comparison")

In [None]:
import seaborn as sns

import matplotlib.pyplot as plt

# Filter results for TF-IDF and BM25
comparison_results = final_results[final_results["model"].isin(["BM25", "TF-IDF"])]

comparison_results["Noise Type"] = comparison_results.apply(get_noise_type, axis=1)

# Plotting function for metrics
def plot_noise_impact(metric, title):
    plt.figure(figsize=(12, 8))
    sns.lineplot(
        data=comparison_results,
        x="noise_level",
        y=metric,
        hue="Noise Type",
        style="model",
        markers=True,
        dashes=False
    )
    plt.title(title, fontsize=16)
    plt.xlabel("Noise Level", fontsize=14)
    plt.ylabel(metric, fontsize=14)
    plt.legend(title="Noise Type", fontsize=12)
    plt.grid(True)
    plt.show()

# Plot metrics
plot_noise_impact("RR@10", "Impact of Noise on RR@10 for TF-IDF and BM25")
plot_noise_impact("nDCG@10", "Impact of Noise on nDCG@10 for TF-IDF and BM25")
plot_noise_impact("AP@100", "Impact of Noise on AP@100 for TF-IDF and BM25")