# Keyword Search (Weighted by BM25)

> Pre-requisite: follow [this guide](https://www.elastic.co/guide/en/elasticsearch/reference/current/getting-started.html) to setup self-hosted Elasticsearch system.

## 1. Connect to Elasticsearch Server & Load Benchmarkss

In [None]:
from elasticsearch import Elasticsearch

client = Elasticsearch(
  "https://localhost:9200",
  api_key="SEVzTDVvOEJpRjU2U19wVFhrUXM6a204U3lERi1UTUtRNzlQRF9NUENqdw==",  # Adjust
  ca_certs="http_ca.crt"
)
client.info()

In [None]:
import pandas as pd

# Load context benchmark
benchmark = pd.read_csv("BX2_public_bi.csv")

In [None]:
# Load content benchmark + table summaries
# benchmark = pd.read_csv("BC2_public_bi.csv")
# summaries = pd.read_csv("row_summaries_public_bi.csv")

## 2. Ingest Contexts/Summaries

In [None]:
def ingest_contexts(benchmark: pd.DataFrame):
    client.indices.delete(index='benchmark', ignore=[400, 404])  # Ignore if already removed
    for i in range(benchmark.shape[0]):
        table = benchmark["table"][i]
        context = benchmark["context"][i]
        client.index(
            index = "benchmark",
            document={
                "table": table,
                "context": context
            }
        )

In [None]:
def ingest_contents(summaries: pd.DataFrame):
    client.indices.delete(index='benchmark', ignore=[400, 404])  # Ignore if already removed
    for i in range(summaries.shape[0]):
        table = summaries["table"][i]
        summary = summaries["summary"][i]
        client.index(
            index = "benchmark",
            document={
                "table": table,
                "summary": summary
            }
        )

In [None]:
import numpy as np

def get_sample_summaries(summaries: pd.DataFrame, sample_percentage=1):
    """
    This is to randomly sample certain percentage of the summaries.
    The return value is the summaries but only for the sampled rows.
    """
    # Prepare to sample summaries (category refers to the tables)
    category_counts = summaries["table"].value_counts()
    sample_sizes = np.ceil(category_counts * sample_percentage).astype(int)

    # Perform stratified sampling
    sampled_summaries = summaries.copy(deep=True)
    sampled_summaries["table_copy"] = sampled_summaries["table"]
    sampled_summaries = sampled_summaries.groupby("table_copy", group_keys=False).apply(
        lambda x: x.sample(n=sample_sizes[x.name], random_state=42),
        include_groups=False,
    )
    return sampled_summaries.reset_index(drop=True)

In [None]:
# Ingest contexts
ingest_contexts(benchmark)

In [None]:
# Ingest summaries
# ingest_contents(summaries)

## 3. Evaluate System

In [None]:
from collections import defaultdict

def get_tables_ranks(hits):
    # Get the tables and the corresponding ranks (based on their relevance scores).
    rank = 1
    prev_score = hits[0]['_score']
    tables_encountered = defaultdict(bool)
    tables_ranks = []
    for hit in hits:
        table = hit['_source']['table']
        if not tables_encountered[table]:
            if hit['_score'] < prev_score:
                rank += 1
            tables_ranks.append((table, rank))
            prev_score = hit['_score']
            tables_encountered[table] = True
    return tables_ranks

In [None]:
import ast

def evaluate_contexts(benchmark: pd.DataFrame, k=1):
    accuracy_sum = 0
    precision_at_1_sum = 0
    reciprocal_rank_sum = 0
    for i in range(benchmark.shape[0]):
        expected_tables = ast.literal_eval(benchmark["relevant_tables"][i])
        question = benchmark["question"][i]
        search_query = {
            "size": k,
            "query": {
                "match": {
                    "context": question
                }
            }
        }
        result = client.search(index="benchmark", body=search_query, search_type='dfs_query_then_fetch').body
        tables_ranks = get_tables_ranks(result["hits"]["hits"])
    
        for j, (table, rank) in enumerate(tables_ranks):
            if table in expected_tables:
                accuracy_sum += 1
                if rank == 1:
                    precision_at_1_sum += 1
                reciprocal_rank_sum += (1 / (j + 1))
                break
    return {
        "accuracy": accuracy_sum / benchmark.shape[0],
        "Mean Precision@1": precision_at_1_sum / benchmark.shape[0],
        "MRR": reciprocal_rank_sum / benchmark.shape[0],
    }

In [None]:
def evaluate_contents(benchmark: pd.DataFrame, k=1):
    accuracy_sum = 0
    precision_at_1_sum = 0
    reciprocal_rank_sum = 0
    for i in range(benchmark.shape[0]):
        expected_table = benchmark["table"][i]
        question = benchmark["question"][i]
        search_query = {
            "size": k,
            "query": {
                "match": {
                    "summary": question
                }
            }
        }
        result = client.search(index="benchmark", body=search_query, search_type='dfs_query_then_fetch').body
        tables_ranks = get_tables_ranks(result["hits"]["hits"])

        for j, (table, rank) in enumerate(tables_ranks):
            if table == expected_table:
                accuracy_sum += 1
                if rank == 1:
                    precision_at_1_sum += 1
                reciprocal_rank_sum += (1 / (j + 1))
                break
    return {
        "accuracy": accuracy_sum / benchmark.shape[0],
        "Mean Precision@1": precision_at_1_sum / benchmark.shape[0],
        "MRR": reciprocal_rank_sum / benchmark.shape[0],
    }

In [None]:
results = evaluate_contexts(benchmark, k=1)  # Adjust k
print(results)