In [4]:
%load_ext autoreload
%autoreload 2


from rag_components import *
from llama_index.core.retrievers import VectorIndexRetriever
from concurrent.futures import ThreadPoolExecutor

from rich.progress import track

import re

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


## Proposition Specific Components


In [5]:
def generate_propositions_from_chunks(chunks, proposition_llm=Settings.llm, text_title=None):
    """
    Generate propositions for each text chunk using the proposition_llm.

    Args:
        chunks (list[str]): A list of text chunks to be summarized.

    Returns:
        list[str]: Summaries of the text chunks.
    """
    proposition_sets = []
    if text_title is None:
        text_title = "chunks"
    for chunk in tqdm(chunks, desc=f'Generating propositions for "{text_title}"', leave=False):
        prompt = f"""\
            {chunk}

            Decompose the text above into a set of clear and simple propositions, ensuring they are interpretable out of context.
            1. Split compound sentences into simple sentences. Maintain the original phrasing from the input whenever possible.
            2. For any named entity that is accompanied by additional descriptive information, separate this information into \
            its own distinct proposition.
            3. Decontextualize the proposition by adding necessary modifier to nouns or entire sentences and replacing pronouns \
            (e.g., "it", "he", "she", "they", "this", "that") with the full name of the entities they refer to.
            4. Present the results as a numbered list of propositions, each on a new line and prefixed with the proposition number.

            Propositions: \
        """
        prompt = re.sub(r" +", " ", prompt)
        proposition_set_raw = proposition_llm.complete(prompt).text.strip()
        proposition_set = proposition_set_raw.split("\n")
        proposition_set = [re.sub(r"^\d+\.", "", proposition) for proposition in proposition_set]
        proposition_set = [proposition.strip() for proposition in proposition_set]
        proposition_set = [proposition for proposition in proposition_set if proposition]
        proposition_sets.append(proposition_set)
    return proposition_sets


def generate_propositions_from_index(text_title, overwrite_existing=False, proposition_llm=Settings.llm):
    """
    Generate propositions for each text chunk in the index using GPT-4.

    Args:
        index (VectorStoreIndex): The index containing text chunks for which to generate propositions.

    Returns:
        VectorStoreIndex: An index containing the propositions generated for each text chunk.
    """
    if not overwrite_existing:
        existing_index = get_index_by_title(f"{text_title}_propositions")
        if existing_index:
            return existing_index
    index = get_index_by_title(text_title)
    text_chunk_ids = get_ids_from_index(index)

    chunks = [get_text_by_id(index, node_id) for node_id in text_chunk_ids]
    proposition_sets = generate_propositions_from_chunks(chunks, proposition_llm, text_title=text_title)
    propositions = [proposition for proposition_set in proposition_sets for proposition in proposition_set]

    id_nums = [
        [f"{int(text_chunk_ids[i].split('_')[-1])}_{j}" for j in range(len(proposition_sets[i]))]
        for i in range(len(proposition_sets))
    ]
    id_nums = [id_num for id_num_set in id_nums for id_num in id_num_set]
    proposition_ids = [f"proposition_{id_num}" for id_num in id_nums]

    proposition_index = create_index_from_chunks_with_ids(
        propositions, proposition_ids, f"{text_title}_propositions", overwrite_existing=overwrite_existing
    )
    return proposition_index


def answer_reading_comprehension_with_propositions(
    question, context_title, context_text, top_k=2, chunk_size=1024, chunk_overlap=200, qa_llm=gpt4
):
    """
    Answer a question given a context, using proposition generation.

    Args:
        question (str): The question to answer
        context_title (str): The title of the context
        context_text (str): The text of the context
        top_k (int): The number of top matching chunks to retrieve

    Returns:
        str: The answer to the question
    """

    # Find the top k most relevant proposition chunks
    proposition_index = generate_propositions_from_index(context_title, proposition_llm=mixtral)
    proposition_retriever = VectorIndexRetriever(
        index=proposition_index,
        similarity_top_k=top_k,
    )
    top_proposition_chunks = proposition_retriever.retrieve(question)

    # Identify the ids of the corresponding raw text chunks
    retrieved_proposition_ids = [chunk.node.id_ for chunk in top_proposition_chunks]
    corresponding_text_ids = [f"text_chunk_{proposition_id.split('_')[-2]}" for proposition_id in retrieved_proposition_ids]

    # Get the correponding raw text chunks
    text_index = create_index_from_text_with_ids(
        context_text, context_title, chunk_size=chunk_size, chunk_overlap=chunk_overlap
    )
    corresonding_chunks = [get_node_by_id(text_index, text_id) for text_id in corresponding_text_ids]

    # Find the top k * 10 most relevant raw text chunks, and try to find the rank of the chunks which correspond to the top proposition chunks
    text_retriever = VectorIndexRetriever(
        index=text_index,
        similarity_top_k=top_k * 10,
    )
    retrieved_texts = text_retriever.retrieve(question)
    text_chunk_ranks = []
    for i, chunk in enumerate(corresonding_chunks):
        id_ = chunk.id_
        rank = None
        for j, retrieved_text in enumerate(retrieved_texts):
            if retrieved_text.node.id_ == id_:
                rank = j
                break
        text_chunk_ranks.append(rank)

    corresponding_chunks_text = [chunk.text for chunk in corresonding_chunks]
    corresponding_chunks_text_combined = " ".join(corresponding_chunks_text)

    prompt = f"""Consider the following context with depth and thoughtfulness: {corresponding_chunks_text_combined}\n\n\
        Respond to the following question with insight and nuance. Answer concisely, often in one \
        sentence or less and sometimes in the form of a list or structured text. If the question \
        asks you to order events, refer to the events by their number (e.g. "1. third event, 2. second \
        event, 3. first event" -> "3, 2, 1"). Answer multiple choice questions using the number which \
        corresponds to the correct answer (e.g. "1. A, 2. B, 3. C" -> "2"). Do not include the \
        question in your answer. \
        \n\n\
        Question: {question}\n\n\
        Answer: Considering the context above, """
    response = qa_llm.complete(prompt).text

    top_chunks_info = [
        {
            "summary_score": top_proposition_chunks[i].score,
            "proposition_rank": i,
            "text_score": similarity_score(question, context_title, corresonding_chunks[i].id_),
            "text_rank": text_chunk_ranks[i],
            "proposition": top_proposition_chunks[i].node.text,
            "text": corresonding_chunks[i].text,
        }
        for i in range(len(corresonding_chunks))
    ]

    additional_info = {
        "top_chunks_info": top_chunks_info,
    }
    return response, additional_info

## Inference


In [6]:
debug_lim = 100
output_file = "output/propositions_with_corresponding_in_context.jsonl"
test_longdep_qa(answer_reading_comprehension_with_propositions, output_file=output_file, debug_lim=debug_lim, qa_llm=gpt4)

Answering questions:   0%|          | 0/100 [00:00<?, ?it/s]

Generating propositions for "2023 French pension reform unrest":   0%|          | 0/23 [00:00<?, ?it/s]

Generating propositions for "2023 Turkey–Syria earthquake":   0%|          | 0/24 [00:00<?, ?it/s]

Generating propositions for "Claude List":   0%|          | 0/22 [00:00<?, ?it/s]

Generating propositions for "Climate change in Washington (state)":   0%|          | 0/22 [00:00<?, ?it/s]

Generating propositions for "Execution of Nagaenthran K. Dharmalingam":   0%|          | 0/17 [00:00<?, ?it/s]

Generating propositions for "Foreign Cattle Market":   0%|          | 0/20 [00:00<?, ?it/s]

Generating propositions for "Governorship of Glenn Youngkin":   0%|          | 0/23 [00:00<?, ?it/s]

Generating propositions for "Hells Angels MC criminal allegations and incidents in the United States":   0%|  …

Generating propositions for "History of NBC":   0%|          | 0/19 [00:00<?, ?it/s]

Generating propositions for "Light in painting":   0%|          | 0/66 [00:00<?, ?it/s]

Generating propositions for "Nightlife in Belgrade":   0%|          | 0/31 [00:00<?, ?it/s]

Generating propositions for "Police brutality by country":   0%|          | 0/31 [00:00<?, ?it/s]

In [7]:
rouge_metrics = get_rouge_metrics(output_file)
print("Rouge Metrics:", rouge_metrics)

self_score = llm_self_score(output_file, llm=gpt4)
print("LLM Self-Score:", self_score)

Rouge Metrics: {'rouge1': 0.40281104491103636, 'rouge2': 0.17082979780142077, 'rougeL': 0.35271561346205105, 'rougeLsum': 0.3564499653389325}
LLM Self-Score: 0.38
