In [5]:
%load_ext autoreload
%autoreload 2


from rag_components import *
from llama_index.core.retrievers import VectorIndexRetriever

from rich.progress import track

import re

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


## Baseline Specific Components


In [6]:
def answer_reading_comprehension(question, context_title, context_text, qa_llm, top_k=2):
    """
    Answer a question given a context.

    Args:
        question (str): The question to answer
        context_title (str): The title of the context
        context_text (str): The text of the context
        features (list, optional): The features to generate from the context. Defaults to ["summary"].

    Returns:
        str: The answer to the question
    """
    text_index = create_index_from_text_with_ids(context_text, context_title, chunk_size=1024, chunk_overlap=200)

    # Query the vector store to find top matching chunks
    retriever = VectorIndexRetriever(
        index=text_index,
        similarity_top_k=top_k,
    )
    top_chunks = retriever.retrieve(question)

    # Extract and combine the text from the top matching chunks for use as context
    top_chunks_text = [chunk.node.text for chunk in top_chunks]
    top_chunks_text_combined = " ".join(top_chunks_text)

    raw_text_chunk_count = 0
    summary_chunk_count = 0
    for chunk in top_chunks:
        if chunk.node.id_.startswith("text_chunk_"):
            raw_text_chunk_count += 1

    # Use the combined context of top chunks to generate an answer to the question
    # Construct a prompt that guides the LLM to consider the context and answer the question subjectively or conceptually
    prompt = f"""Consider the following context with depth and thoughtfulness: {top_chunks_text_combined}\n\n\
        Respond to the following question with insight and nuance. Answer concisely, often in one \
        sentence or less and sometimes in the form of a list or structured text. If the question \
        asks you to order events, refer to the events by their number (e.g. "1. third event, 2. second \
        event, 3. first event" -> "3, 2, 1"). Answer multiple choice questions using the number which \
        corresponds to the correct answer (e.g. "1. A, 2. B, 3. C" -> "2"). Do not include the \
        question in your answer. \
        \n\n\
        Question: {question}\n\n\
        Answer: """
    response = qa_llm.complete(prompt).text
    additional_info = {
        "raw_text_chunk_count": raw_text_chunk_count,
        "summary_chunk_count": summary_chunk_count,
        "top_chunks_text": top_chunks_text,
    }
    return response, additional_info

## Inference


In [7]:
debug_lim = 100
output_file = "output/baseline_mistral_large_100.jsonl"
test_longdep_qa(answer_reading_comprehension, output_file=output_file, debug_lim=debug_lim, qa_llm=mistral_large)

Answering questions:   0%|          | 0/100 [00:00<?, ?it/s]

In [9]:
rouge_metrics = get_rouge_metrics(output_file)
print("Rouge Metrics:", rouge_metrics)

self_score = llm_self_score(output_file, llm=gpt4)
print("LLM Self-Score:", self_score)

Rouge Metrics: {'rouge1': 0.18182868588261636, 'rouge2': 0.07019693366877688, 'rougeL': 0.14806468928100985, 'rougeLsum': 0.1556604420633752}
LLM Self-Score: 0.39
