In [6]:
%load_ext autoreload
%autoreload 2
from rag_components import *
from llama_index.core.retrievers import VectorIndexRetriever

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [4]:
def answer_reading_comprehension_with_union_of_features(
    question, context_title, context_text, top_k=2, chunk_size=1024, chunk_overlap=200, qa_llm=gpt4
):
    """
    Answer a question given a context.

    Args:
        question (str): The question to answer
        context_title (str): The title of the context
        context_text (str): The text of the context
        top_k (int): The number of top matching chunks to retrieve

    Returns:
        str: The answer to the question
    """

    # Find the top k most relevant summary chunks
    text_index_title = f"{context_title}"
    summary_index_title = f"{context_title}_summaries"
    question_index_title = f"{context_title}_questions"
    summary_index = get_index_by_title(f"{summary_index_title}")
    question_index = get_index_by_title(f"{question_index_title}")
    raw_index = get_index_by_title(f"{text_index_title}")

    combined_index = index_union([raw_index, summary_index, question_index])

    retriever = VectorIndexRetriever(
        index=combined_index,
        similarity_top_k=top_k * 10,
    )
    top_chunks = retriever.retrieve(question)

    corresponding_text_ids = []
    for chunk in top_chunks:
        chunk_id = chunk.node.id_
        if "summary" in chunk_id:
            chunk_id = f"text_chunk_{chunk_id.split('_')[-1]}"
        elif "question" in chunk_id:
            chunk_id = f"text_chunk_{chunk_id.split('_')[-1]}"
        corresponding_text_ids.append(chunk_id)

    unique_corresponding_text_ids = []
    unique_feature_ids = []
    for i in range(len(corresponding_text_ids)):
        text_id = corresponding_text_ids[i]
        feature_id = top_chunks[i].node.id_
        if text_id not in unique_corresponding_text_ids:
            unique_corresponding_text_ids.append(text_id)
            unique_feature_ids.append(feature_id)
        if len(unique_corresponding_text_ids) >= top_k:
            break
    corresponding_text_ids = unique_corresponding_text_ids

    feature_chunks = [get_node_by_id(text_index_title, feature_id) for feature_id in unique_feature_ids]

    corresonding_chunks = [get_node_by_id(text_index_title, text_id) for text_id in corresponding_text_ids]
    corresponding_chunks_text = [chunk.text for chunk in corresonding_chunks]
    corresponding_chunks_text_combined = " ".join(corresponding_chunks_text)

    response = answer_reading_comprehension(question, corresponding_chunks_text_combined, qa_llm=qa_llm)

    top_chunks_info = [
        {
            "feature_score": similarity_score(
                question,
                (
                    question_index_title
                    if "question" in unique_feature_ids[i]
                    else summary_index_title if "summary" in unique_feature_ids[i] else text_index_title
                ),
                unique_feature_ids[i],
            ),
            "summary_rank": i,
            "text_score": similarity_score(question, text_index_title, corresonding_chunks[i].id_),
            "feature": top_chunks[i].node.text,
            "text": corresonding_chunks[i].text,
        }
        for i in range(len(corresonding_chunks))
    ]

    additional_info = {
        "top_chunks_info": top_chunks_info,
    }
    return response, additional_info

### INFERENCE


In [5]:
debug_lim = 100
chunk_sizes = [64, 128, 256, 512, 1024, 2048]
topks = [32, 16, 8, 4, 2, 1]
chunk_overlaps = [10, 25, 50, 100, 200, 400]
for idx in [4, 5]:
    print("Generating output for chunk size", chunk_sizes[idx])
    output_file = f"output/union_with_corresponding_in_context_mistral_large_chunksize{chunk_sizes[idx]}.jsonl"
    test_longdep_qa(
        answer_reading_comprehension_with_union_of_features,
        output_file=output_file,
        debug_lim=debug_lim,
        qa_llm=mistral_large,
        chunk_size=chunk_sizes[idx],
        top_k=topks[idx],
        chunk_overlap=chunk_overlaps[idx],
    )

Generating output for chunk size 1024


Answering questions:   0%|          | 0/100 [00:00<?, ?it/s]

José Luis Picardo_chunksize1024
José Luis Picardo_chunksize1024


KeyboardInterrupt: 

In [16]:
for idx in range(6):
    output_file = f"output/union_with_corresponding_in_context_mistral_large_chunksize{chunk_sizes[idx]}.jsonl"
    rouge_metrics = get_rouge_metrics(output_file)
    print("results for chunk size", chunk_sizes[idx])
    print("Rouge Metrics:", rouge_metrics)

    self_score = llm_self_score(output_file, llm=gpt4)
    print("LLM Self-Score:", self_score)

results for chunk size 64
Rouge Metrics: {'rouge1': 0.21495930643478178, 'rouge2': 0.08360037560551968, 'rougeL': 0.16919841660123439, 'rougeLsum': 0.17853503120980155}


100%|██████████| 100/100 [00:00<?, ?it/s]

LLM Self-Score: 0.48





results for chunk size 128
Rouge Metrics: {'rouge1': 0.20567075412935015, 'rouge2': 0.08538299982612982, 'rougeL': 0.1647252573416373, 'rougeLsum': 0.1741708687542663}


100%|██████████| 100/100 [00:00<?, ?it/s]

LLM Self-Score: 0.41





results for chunk size 256
Rouge Metrics: {'rouge1': 0.23090750778408342, 'rouge2': 0.10204666514366703, 'rougeL': 0.1805865761332273, 'rougeLsum': 0.19042366137178537}


100%|██████████| 100/100 [00:00<?, ?it/s]

LLM Self-Score: 0.49





results for chunk size 512
Rouge Metrics: {'rouge1': 0.22544897345007614, 'rouge2': 0.0985295774164229, 'rougeL': 0.17655377279302073, 'rougeLsum': 0.18518196545983356}


100%|██████████| 100/100 [00:00<?, ?it/s]

LLM Self-Score: 0.43





results for chunk size 1024
Rouge Metrics: {'rouge1': 0.2076821832686861, 'rouge2': 0.09470691393624994, 'rougeL': 0.17616498717498966, 'rougeLsum': 0.18121232589080816}


100%|██████████| 100/100 [00:00<?, ?it/s]

LLM Self-Score: 0.31





results for chunk size 2048
Rouge Metrics: {'rouge1': 0.20250619469391343, 'rouge2': 0.0699810496789757, 'rougeL': 0.16206160824988958, 'rougeLsum': 0.1747182299261461}


100%|██████████| 100/100 [00:00<?, ?it/s]

LLM Self-Score: 0.31



