In [10]:
%load_ext autoreload
%autoreload 2


from rag_components import *
from llama_index.core.retrievers import VectorIndexRetriever

import re




You can avoid this message in future by passing the argument `trust_remote_code=True`.
Passing `trust_remote_code=True` will be mandatory to load this dataset from the next major release of `datasets`.


Downloading builder script:   0%|          | 0.00/6.27k [00:00<?, ?B/s]

In [12]:
def summarize_chunks(chunks, summarizer_llm=mixtral, text_title=None):
    """
    Generate a summary for each text chunk using the summarizer_llm.

    Args:
        chunks (list[str]): A list of text chunks to be summarized.

    Returns:
        list[str]: Summaries of the text chunks.
    """
    summaries = []
    if text_title is None:
        text_title = "chunks"
    for chunk in tqdm(chunks, desc=f'Summarizing "{text_title}"', leave=False):
        if len(summaries) == 0:
            prompt = f"""<s>[INST]Summarize the following text conceptually. \
                The summary should paraphrase the original text, be significantly \
                shorter, retain all propositions, and be able to replace the \
                original text. Here is the text to be summarized:\n\n{chunk}[/INST]"""
        else:
            prompt = f"""<s>[INST]Summarize the following text conceptually in the context of the text that precedes it. \
                The summary should paraphrase the original text, be significantly \
                shorter, retain all propositions, and be able to replace the \
                original text. Here is the the context:\n\n{summaries[-1]}\n\n\
                Summarize the following text:\n\n{chunk}[/INST]"""

        prompt = re.sub(r"\s+", " ", prompt)
        response = summarizer_llm.complete(prompt).text.strip()
        summaries.append(response)
    return summaries


def summarize_index(text_title, overwrite_existing=False, summarizer_llm=Settings.llm):
    """
    Generate a summary for each text chunk in the index using the summarizer_llm.

    Args:
        index (VectorStoreIndex): The index containing text chunks to be summarized.

    Returns:
        VectorStoreIndex: An index containing summaries for each text chunk.
    """
    if not overwrite_existing:
        existing_index = get_index_by_title(f"{text_title}_summaries")
        if existing_index:
            print(f"Index with title '{text_title}_summaries' already exists. Returning existing index.")
            return existing_index

    print(f"Summarizing '{text_title}'")
    index = get_index_by_title(text_title)
    text_chunk_ids = get_ids_from_index(index)
    id_nums = [int(id_.split("_")[-1]) for id_ in text_chunk_ids]
    summary_ids = [f"summary_{id_num}" for id_num in id_nums]
    chunks = [get_text_by_id(index, node_id) for node_id in text_chunk_ids]
    summaries = summarize_chunks(chunks, summarizer_llm, text_title=text_title)

    summary_index = create_index_from_chunks_with_ids(
        summaries, summary_ids, f"{text_title}_summaries", overwrite_existing=overwrite_existing
    )
    return summary_index


def answer_reading_comprehension_icl(question, retrieved_chunks_combined, qa_llm=mistral_large, examplelist=None):
    print("Answering question with ICL")
    if len(examplelist) > 0:
        prompt = generate_qa_prompt_icl(retrieved_chunks_combined, question, examplelist)
    else:
        prompt = generate_qa_prompt(retrieved_chunks_combined, question)
    response = qa_llm.complete(prompt).text
    return response


def answer_reading_comprehension_with_summarization_icl(
    question, context_title, context_text, top_k=2, chunk_size=1024, chunk_overlap=200, qa_llm=gpt4, examplelist=None
):
    """
    Answer a question given a context.

    Args:
        question (str): The question to answer
        context_title (str): The title of the context
        context_text (str): The text of the context
        top_k (int): The number of top matching chunks to retrieve

    Returns:
        str: The answer to the question
    """

    # Find the top k most relevant summary chunks
    text_index_title = f"{context_title}"
    print(text_index_title)
    # Check if text index already exists
    text_index = get_index_by_title(text_index_title)
    if text_index is None:
        text_index = create_index_from_text_with_ids(
            context_text, text_index_title, chunk_size=chunk_size, chunk_overlap=chunk_overlap
        )
    summary_index = summarize_index(text_index_title, summarizer_llm=mixtral)
    summary_retriever = VectorIndexRetriever(
        index=summary_index,
        similarity_top_k=top_k,
    )
    top_summary_chunks = summary_retriever.retrieve(question)
    print(f"Top {top_k} summary chunks retrieved")
    # Identify the ids of the corresponding raw text chunks
    retrieved_summary_ids = [chunk.node.id_ for chunk in top_summary_chunks]
    corresponding_text_ids = [f"text_chunk_{summary_id.split('_')[-1]}" for summary_id in retrieved_summary_ids]

    # Get the correponding raw text chunks
    text_index = create_index_from_text_with_ids(
        context_text, text_index_title, chunk_size=chunk_size, chunk_overlap=chunk_overlap
    )
    print(f"Text index created")
    corresonding_chunks = [get_node_by_id(text_index, text_id) for text_id in corresponding_text_ids]
    print(f"Corresponding chunks retrieved")
    # Find the top k * 10 most relevant raw text chunks, and try to find the rank of the chunks which correspond to the top summary chunks
    text_retriever = VectorIndexRetriever(
        index=text_index,
        similarity_top_k=top_k * 10,
    )
    retrieved_texts = text_retriever.retrieve(question)
    print(f"Top {top_k * 10} text chunks retrieved")
    text_chunk_ranks = []
    for i, chunk in enumerate(corresonding_chunks):
        id_ = chunk.id_
        rank = None
        for j, retrieved_text in enumerate(retrieved_texts):
            if retrieved_text.node.id_ == id_:
                rank = j
                break
        text_chunk_ranks.append(rank)

    corresponding_chunks_text = [chunk.text for chunk in corresonding_chunks]
    corresponding_chunks_text_combined = " ".join(corresponding_chunks_text)

    response = answer_reading_comprehension_icl(
        question, corresponding_chunks_text_combined, qa_llm=qa_llm, examplelist=examplelist
    )

    top_chunks_info = [
        {
            "summary_score": top_summary_chunks[i].score,
            "summary_rank": i,
            "text_score": similarity_score(question, text_index_title, corresonding_chunks[i].id_),
            "text_rank": text_chunk_ranks[i],
            "summary": top_summary_chunks[i].node.text,
            "text": corresonding_chunks[i].text,
        }
        for i in range(len(corresonding_chunks))
    ]

    additional_info = {
        "top_chunks_info": top_chunks_info,
    }
    return response, additional_info

In [13]:
debug_lim = 205
chunk_sizes = [256]
topks = [8]
chunk_overlaps = [50]
for idx in range(1):
    print("Generating output for chunk size", chunk_sizes[idx])
    output_file = f"output/summarization_with_corresponding_in_context_mistral_large_icl_chunksize{chunk_sizes[idx]}.jsonl"
    test_longdep_qa_icl(
        answer_reading_comprehension_with_summarization_icl,
        output_file=output_file,
        debug_lim=debug_lim,
        qa_llm=mistral_large,
        chunk_size=chunk_sizes[idx],
        top_k=topks[idx],
        chunk_overlap=chunk_overlaps[idx],
    )

Generating output for chunk size 256


Answering questions:   0%|          | 0/205 [00:00<?, ?it/s]

Afghan–Soviet War_chunksize256
Creating index Afghan–Soviet War_chunksize256 from text.
Summarizing 'Afghan–Soviet War_chunksize256'


Summarizing "Afghan–Soviet War_chunksize256":   0%|          | 0/152 [00:00<?, ?it/s]

Top 8 summary chunks retrieved
Text index created
Corresponding chunks retrieved
Top 80 text chunks retrieved
Answering question with ICL
Example list length 0
Afghan–Soviet War_chunksize256
Index with title 'Afghan–Soviet War_chunksize256_summaries' already exists. Returning existing index.
Top 8 summary chunks retrieved
Text index created
Corresponding chunks retrieved
Top 80 text chunks retrieved
Answering question with ICL
Example list length 0
Afghan–Soviet War_chunksize256
Index with title 'Afghan–Soviet War_chunksize256_summaries' already exists. Returning existing index.
Top 8 summary chunks retrieved
Text index created
Corresponding chunks retrieved
Top 80 text chunks retrieved
Answering question with ICL
Example list length 0
Afghan–Soviet War_chunksize256
Index with title 'Afghan–Soviet War_chunksize256_summaries' already exists. Returning existing index.
Top 8 summary chunks retrieved
Text index created
Corresponding chunks retrieved
Top 80 text chunks retrieved
Answering q

Summarizing "Stowe Gardens_chunksize256":   0%|          | 0/79 [00:00<?, ?it/s]

Top 8 summary chunks retrieved
Text index created
Corresponding chunks retrieved
Top 80 text chunks retrieved
Answering question with ICL
Example list length 0
Stowe Gardens_chunksize256
Index with title 'Stowe Gardens_chunksize256_summaries' already exists. Returning existing index.
Top 8 summary chunks retrieved
Text index created
Corresponding chunks retrieved
Top 80 text chunks retrieved
Answering question with ICL
Example list length 0
Stowe Gardens_chunksize256
Index with title 'Stowe Gardens_chunksize256_summaries' already exists. Returning existing index.
Top 8 summary chunks retrieved
Text index created
Corresponding chunks retrieved
Top 80 text chunks retrieved
Answering question with ICL
Example list length 0
Stowe Gardens_chunksize256
Index with title 'Stowe Gardens_chunksize256_summaries' already exists. Returning existing index.
Top 8 summary chunks retrieved
Text index created
Corresponding chunks retrieved
Top 80 text chunks retrieved
Answering question with ICL
Example

Summarizing "Street names in Barcelona_chunksize256":   0%|          | 0/136 [00:00<?, ?it/s]

Top 8 summary chunks retrieved
Text index created
Corresponding chunks retrieved
Top 80 text chunks retrieved
Answering question with ICL
Example list length 0
Street names in Barcelona_chunksize256
Index with title 'Street names in Barcelona_chunksize256_summaries' already exists. Returning existing index.
Top 8 summary chunks retrieved
Text index created
Corresponding chunks retrieved
Top 80 text chunks retrieved
Answering question with ICL
Example list length 0
Street names in Barcelona_chunksize256
Index with title 'Street names in Barcelona_chunksize256_summaries' already exists. Returning existing index.
Top 8 summary chunks retrieved
Text index created
Corresponding chunks retrieved
Top 80 text chunks retrieved
Answering question with ICL
Example list length 0
Street names in Barcelona_chunksize256
Index with title 'Street names in Barcelona_chunksize256_summaries' already exists. Returning existing index.
Top 8 summary chunks retrieved
Text index created
Corresponding chunks re

Summarizing "Ted Brimble_chunksize256":   0%|          | 0/84 [00:00<?, ?it/s]

Top 8 summary chunks retrieved
Text index created
Corresponding chunks retrieved
Top 80 text chunks retrieved
Answering question with ICL
Example list length 0
Ted Brimble_chunksize256
Index with title 'Ted Brimble_chunksize256_summaries' already exists. Returning existing index.
Top 8 summary chunks retrieved
Text index created
Corresponding chunks retrieved
Top 80 text chunks retrieved
Answering question with ICL
Example list length 0
Ted Brimble_chunksize256
Index with title 'Ted Brimble_chunksize256_summaries' already exists. Returning existing index.
Top 8 summary chunks retrieved
Text index created
Corresponding chunks retrieved
Top 80 text chunks retrieved
Answering question with ICL
Example list length 0
Ted Brimble_chunksize256
Index with title 'Ted Brimble_chunksize256_summaries' already exists. Returning existing index.
Top 8 summary chunks retrieved
Text index created
Corresponding chunks retrieved
Top 80 text chunks retrieved
Answering question with ICL
Example list length

Summarizing "War crimes in the Russian invasion of Ukraine_chunksize256":   0%|          | 0/87 [00:00<?, ?it/…

Top 8 summary chunks retrieved
Text index created
Corresponding chunks retrieved
Top 80 text chunks retrieved
Answering question with ICL
Example list length 0
War crimes in the Russian invasion of Ukraine_chunksize256
Index with title 'War crimes in the Russian invasion of Ukraine_chunksize256_summaries' already exists. Returning existing index.
Top 8 summary chunks retrieved
Text index created
Corresponding chunks retrieved
Top 80 text chunks retrieved
Answering question with ICL
Example list length 0
War crimes in the Russian invasion of Ukraine_chunksize256
Index with title 'War crimes in the Russian invasion of Ukraine_chunksize256_summaries' already exists. Returning existing index.
Top 8 summary chunks retrieved
Text index created
Corresponding chunks retrieved
Top 80 text chunks retrieved
Answering question with ICL
Example list length 0
War crimes in the Russian invasion of Ukraine_chunksize256
Index with title 'War crimes in the Russian invasion of Ukraine_chunksize256_summar

Summarizing "57th Medical Detachment_chunksize256":   0%|          | 0/140 [00:00<?, ?it/s]

Top 8 summary chunks retrieved
Text index created
Corresponding chunks retrieved
Top 80 text chunks retrieved
Answering question with ICL
Example list length 0
57th Medical Detachment_chunksize256
Index with title '57th Medical Detachment_chunksize256_summaries' already exists. Returning existing index.
Top 8 summary chunks retrieved
Text index created
Corresponding chunks retrieved
Top 80 text chunks retrieved
Answering question with ICL
Example list length 0
57th Medical Detachment_chunksize256
Index with title '57th Medical Detachment_chunksize256_summaries' already exists. Returning existing index.
Top 8 summary chunks retrieved
Text index created
Corresponding chunks retrieved
Top 80 text chunks retrieved
Answering question with ICL
Example list length 0
57th Medical Detachment_chunksize256
Index with title '57th Medical Detachment_chunksize256_summaries' already exists. Returning existing index.
Top 8 summary chunks retrieved
Text index created
Corresponding chunks retrieved
Top 

Summarizing "2023 in the United Kingdom_chunksize256":   0%|          | 0/127 [00:00<?, ?it/s]

Top 8 summary chunks retrieved
Text index created
Corresponding chunks retrieved
Top 80 text chunks retrieved
Answering question with ICL
Example list length 0
2023 in the United Kingdom_chunksize256
Index with title '2023 in the United Kingdom_chunksize256_summaries' already exists. Returning existing index.
Top 8 summary chunks retrieved
Text index created
Corresponding chunks retrieved
Top 80 text chunks retrieved
Answering question with ICL
Example list length 0
2023 in the United Kingdom_chunksize256
Index with title '2023 in the United Kingdom_chunksize256_summaries' already exists. Returning existing index.
Top 8 summary chunks retrieved
Text index created
Corresponding chunks retrieved
Top 80 text chunks retrieved
Answering question with ICL
Example list length 0
2023 in the United Kingdom_chunksize256
Index with title '2023 in the United Kingdom_chunksize256_summaries' already exists. Returning existing index.
Top 8 summary chunks retrieved
Text index created
Corresponding chu

Summarizing "2022–23 Arsenal F.C. season_chunksize256":   0%|          | 0/72 [00:00<?, ?it/s]

Top 8 summary chunks retrieved
Text index created
Corresponding chunks retrieved
Top 80 text chunks retrieved
Answering question with ICL
Example list length 0
2022–23 Arsenal F.C. season_chunksize256
Index with title '2022–23 Arsenal F.C. season_chunksize256_summaries' already exists. Returning existing index.
Top 8 summary chunks retrieved
Text index created
Corresponding chunks retrieved
Top 80 text chunks retrieved
Answering question with ICL
Example list length 0
2022–23 Arsenal F.C. season_chunksize256
Index with title '2022–23 Arsenal F.C. season_chunksize256_summaries' already exists. Returning existing index.
Top 8 summary chunks retrieved
Text index created
Corresponding chunks retrieved
Top 80 text chunks retrieved
Answering question with ICL
Example list length 0
2022–23 Arsenal F.C. season_chunksize256
Index with title '2022–23 Arsenal F.C. season_chunksize256_summaries' already exists. Returning existing index.
Top 8 summary chunks retrieved
Text index created
Correspondi

Summarizing "1971 Great Lakes blizzard_chunksize256":   0%|          | 0/72 [00:00<?, ?it/s]

Top 8 summary chunks retrieved
Text index created
Corresponding chunks retrieved
Top 80 text chunks retrieved
Answering question with ICL
Example list length 0
1971 Great Lakes blizzard_chunksize256
Index with title '1971 Great Lakes blizzard_chunksize256_summaries' already exists. Returning existing index.
Top 8 summary chunks retrieved
Text index created
Corresponding chunks retrieved
Top 80 text chunks retrieved
Answering question with ICL
Example list length 0
1971 Great Lakes blizzard_chunksize256
Index with title '1971 Great Lakes blizzard_chunksize256_summaries' already exists. Returning existing index.
Top 8 summary chunks retrieved
Text index created
Corresponding chunks retrieved
Top 80 text chunks retrieved
Answering question with ICL
Example list length 0
1971 Great Lakes blizzard_chunksize256
Index with title '1971 Great Lakes blizzard_chunksize256_summaries' already exists. Returning existing index.
Top 8 summary chunks retrieved
Text index created
Corresponding chunks re

Summarizing "2023 Kazakh legislative election_chunksize256":   0%|          | 0/83 [00:00<?, ?it/s]

Top 8 summary chunks retrieved
Text index created
Corresponding chunks retrieved
Top 80 text chunks retrieved
Answering question with ICL
Example list length 0
2023 Kazakh legislative election_chunksize256
Index with title '2023 Kazakh legislative election_chunksize256_summaries' already exists. Returning existing index.
Top 8 summary chunks retrieved
Text index created
Corresponding chunks retrieved
Top 80 text chunks retrieved
Answering question with ICL
Example list length 0
2023 Kazakh legislative election_chunksize256
Index with title '2023 Kazakh legislative election_chunksize256_summaries' already exists. Returning existing index.
Top 8 summary chunks retrieved
Text index created
Corresponding chunks retrieved
Top 80 text chunks retrieved
Answering question with ICL
Example list length 0
2023 Kazakh legislative election_chunksize256
Index with title '2023 Kazakh legislative election_chunksize256_summaries' already exists. Returning existing index.
Top 8 summary chunks retrieved


Summarizing "A Gest of Robyn Hode (ballad)_chunksize256":   0%|          | 0/117 [00:00<?, ?it/s]

Top 8 summary chunks retrieved
Text index created
Corresponding chunks retrieved
Top 80 text chunks retrieved
Answering question with ICL
Example list length 0
A Gest of Robyn Hode (ballad)_chunksize256
Index with title 'A Gest of Robyn Hode (ballad)_chunksize256_summaries' already exists. Returning existing index.
Top 8 summary chunks retrieved
Text index created
Corresponding chunks retrieved
Top 80 text chunks retrieved
Answering question with ICL
Example list length 0
A Gest of Robyn Hode (ballad)_chunksize256
Index with title 'A Gest of Robyn Hode (ballad)_chunksize256_summaries' already exists. Returning existing index.
Top 8 summary chunks retrieved
Text index created
Corresponding chunks retrieved
Top 80 text chunks retrieved
Answering question with ICL
Example list length 0
A Gest of Robyn Hode (ballad)_chunksize256
Index with title 'A Gest of Robyn Hode (ballad)_chunksize256_summaries' already exists. Returning existing index.
Top 8 summary chunks retrieved
Text index created

Summarizing "Anti-Peruvian sentiment_chunksize256":   0%|          | 0/137 [00:00<?, ?it/s]

Top 8 summary chunks retrieved
Text index created
Corresponding chunks retrieved
Top 80 text chunks retrieved
Answering question with ICL
Example list length 0
Anti-Peruvian sentiment_chunksize256
Index with title 'Anti-Peruvian sentiment_chunksize256_summaries' already exists. Returning existing index.
Top 8 summary chunks retrieved
Text index created
Corresponding chunks retrieved
Top 80 text chunks retrieved
Answering question with ICL
Example list length 0
Anti-Peruvian sentiment_chunksize256
Index with title 'Anti-Peruvian sentiment_chunksize256_summaries' already exists. Returning existing index.
Top 8 summary chunks retrieved
Text index created
Corresponding chunks retrieved
Top 80 text chunks retrieved
Answering question with ICL
Example list length 0
Anti-Peruvian sentiment_chunksize256
Index with title 'Anti-Peruvian sentiment_chunksize256_summaries' already exists. Returning existing index.
Top 8 summary chunks retrieved
Text index created
Corresponding chunks retrieved
Top 

Summarizing "Assassination of José Calvo Sotelo_chunksize256":   0%|          | 0/187 [00:00<?, ?it/s]

Top 8 summary chunks retrieved
Text index created
Corresponding chunks retrieved
Top 80 text chunks retrieved
Answering question with ICL
Example list length 0
Assassination of José Calvo Sotelo_chunksize256
Index with title 'Assassination of José Calvo Sotelo_chunksize256_summaries' already exists. Returning existing index.
Top 8 summary chunks retrieved
Text index created
Corresponding chunks retrieved
Top 80 text chunks retrieved
Answering question with ICL
Example list length 0
Assassination of José Calvo Sotelo_chunksize256
Index with title 'Assassination of José Calvo Sotelo_chunksize256_summaries' already exists. Returning existing index.
Top 8 summary chunks retrieved
Text index created
Corresponding chunks retrieved
Top 80 text chunks retrieved
Answering question with ICL
Example list length 0
Assassination of José Calvo Sotelo_chunksize256
Index with title 'Assassination of José Calvo Sotelo_chunksize256_summaries' already exists. Returning existing index.
Top 8 summary chunk

Summarizing "Cornish Bronze Age_chunksize256":   0%|          | 0/94 [00:00<?, ?it/s]

Top 8 summary chunks retrieved
Text index created
Corresponding chunks retrieved
Top 80 text chunks retrieved
Answering question with ICL
Example list length 0


In [14]:
chunk_sizes = [256]
topks = [8]
chunk_overlaps = [50]
for idx in range(1):
    output_file = f"output/summarization_with_corresponding_in_context_mistral_large_icl_chunksize{chunk_sizes[idx]}.jsonl"
    rouge_metrics = get_rouge_metrics(output_file)
    print("results for chunk size", chunk_sizes[idx])
    print("Rouge Metrics:", rouge_metrics)

    self_score = llm_self_score(output_file, llm=gpt4)
    print("LLM Self-Score:", self_score)

results for chunk size 256
Rouge Metrics: {'rouge1': 0.14237941384412695, 'rouge2': 0.05085983384241306, 'rougeL': 0.1154005554385617, 'rougeLsum': 0.12156666636438837}


  0%|          | 0/205 [00:00<?, ?it/s]

LLM Self-Score: 0.32195121951219513


In [11]:
#read output/summarization_with_corresponding_in_context_mistral_large_icl_chunksize256.jsonl file and caluclate accuracy of last 100 questions
import json
with open("output/summarization_with_corresponding_in_context_mistral_large_icl_chunksize256.jsonl") as f:
    lines = f.readlines()
    last_100 = lines[-100:]
    correct = 0
    for line in last_100:
        data = json.loads(line)
        if data["correct"]:
            correct += 1


print("Accuracy of last 100 questions:", correct/100)

#get rouge metrics for last 100 questions
longdep_qa_ds = load_dataset("bigainlco/LooGLE", "longdep_qa", split="test")
rouge = evaluate.load("rouge")
def get_rouge_metrics(output_file, last_n=None):
    """
    Get ROUGE metrics for a .jsonl file containing generated answers and ground truth answers.

    Args:
        output_file (str): The path to the .jsonl file

    Returns:
        dict: The ROUGE metrics
    """
    with open(output_file) as f:
        lines = f.readlines()
        if last_n is not None:
            lines = lines[-last_n:]
        references = []
        predictions = []
        for line in lines:
            data = json.loads(line)
            references.append(data["ground_truth"])
            predictions.append(data["generated_answer"])
    rouge_metrics = rouge.compute(predictions=predictions, references=references)
    return rouge_metrics


output_file = "output/summarization_with_corresponding_in_context_mistral_large_icl_chunksize256.jsonl"
rouge_metrics = get_rouge_metrics(output_file, last_n=100)
print("Rouge Metrics for last 100 questions:", rouge_metrics)

Accuracy of last 100 questions: 0.49
Rouge Metrics for last 100 questions: {'rouge1': 0.20971355602679248, 'rouge2': 0.09022807730730556, 'rougeL': 0.17438533649009222, 'rougeLsum': 0.17935401008500995}


In [3]:
from datasets import load_dataset
longdep_qa_ds = load_dataset("bigainlco/LooGLE", "longdep_qa", split="test")

Generating test split: 0 examples [00:00, ? examples/s]

In [3]:
import shutil
import os

cache_dir = os.path.expanduser("~/.cache/huggingface/datasets")
shutil.rmtree(cache_dir)


In [35]:
# llm = Settings.llm
n_questions = sum([len(eval(env["qa_pairs"])) for env in longdep_qa_ds])
debug_lim = 206
output_file = "output/testset_union_mistral_large_chunksize256.jsonl"
write_file = "output/testset_union_mistral_large_chunksize256_with_contextCombined.jsonl"
existing_output = read_output_file(output_file)
example_list = []
temp_list = []
from tqdm.auto import tqdm
with tqdm(total=debug_lim, position=0, desc="Answering questions") as pbar:
    for environment in longdep_qa_ds:
        context = environment["input"]
        title = environment["title"]
        title = f"{title}_chunksize{256}"
        print(environment)
        
        qa_pairs = eval(environment["qa_pairs"])
        for question_dict in qa_pairs:
            question = question_dict["Q"]
            ground_truth = question_dict["A"]
            question_type = question_dict["type"]
            question_context = question_dict["S"]
            #find the question in the output file
            question_found = False
            for output in existing_output:
                if output["question"] == question:
                    question_found = True
                    #add the question type and context information to this and write to a new output file
                    output["question_type"] = question_type
                    output["question_context"] = question_context
                    with open(write_file, "a") as output_file:
                        output_file.write(json.dumps(output) + "\n")
                    break
            pbar.update(1)
            if pbar.n >= debug_lim:
                break
        if pbar.n >= debug_lim:
            break

Answering questions:   0%|          | 0/206 [00:00<?, ?it/s]

{'input': ' Early life. Picardo was born in Jerez de la Frontera, in the Province of Cádiz in Andalucía, Spain on 18 June 1919. His father was Alvaro Picardo de Celis and his mother\'s family name was Castellón. He had four brothers, one of whom died in infancy. His father died in 1929 when Picardo was ten years old. With his mother and his brothers he moved to Madrid, Spain. He enrolled at the newly created Instituto de Bachillerato Cervantes for his high school education. On completing school he initially wanted to join the navy, but was frustrated by the closure of the military academies in Madrid during the Second Spanish Republic. He turned to the study of law, but was frustrated again, this time by the start of the Spanish Civil War in July 1936 when he was in the middle of his course. He had just celebrated his seventeenth birthday. Training in architecture. To avoid being evacuated from Madrid when the Spanish Civil War began, Picardo joined the studio of the architect Luis Moy

In [36]:
import json
output_file = "output/testset_union_mistral_large_chunksize256_with_contextCombined.jsonl"
final_file = "output/testset_union_results_test.jsonl"
# Load your JSON data
data = read_output_file(output_file)

def check_context_match(data):
    for entry in data:
        context = entry["question_context"]
        text_chunks = entry["additional_info"]["top_chunks_info"]
        match_found = False
        
        # Checking if any context is in any text
        for context_sentence in context:
            for chunk in text_chunks:
                if context_sentence in chunk["text"]:
                    match_found = True
                    break
            if match_found:
                break
        
        # Adding the matched_chunk key
        entry["matched_chunk"] = match_found

# Run the function
check_context_match(data)

def summarize_results(data):
    results = {
        "true": {},
        "false": {}
    }

    # Initialize dictionaries
    for answer_correctness in results.keys():
        for entry in data:
            if str(entry['correct']).lower() == answer_correctness:
                question_type = entry['question_type']
                if question_type not in results[answer_correctness]:
                    results[answer_correctness][question_type] = {"matched": 0, "total": 0}
                
                # Increment totals
                results[answer_correctness][question_type]["total"] += 1
                
                # Increment matches if matched_chunk is True
                if entry['matched_chunk']:
                    results[answer_correctness][question_type]["matched"] += 1

    return results

# Calculate the summary
results_summary = summarize_results(data)

# Save the summary to a file
with open(final_file, 'w') as file:
    json.dump(results_summary, file, indent=4)


print("The JSON data has been updated and saved.")


The JSON data has been updated and saved.


In [22]:
import json

write_file = "output/summarization_icl_chunksize256_with_contextCombined_updated.jsonl"
final_file = "output/summarization_icl_results_test.jsonl"
# Load your JSON data
with open(write_file) as file:
    data = json.load(file)


def summarize_results(data):
    results = {
        "true": {},
        "false": {}
    }

#only take the last 100 questions
    data = data[-100:]
    # Initialize dictionaries
    for answer_correctness in results.keys():
        for entry in data:
            if str(entry['correct']).lower() == answer_correctness:
                question_type = entry['question_type']
                if question_type not in results[answer_correctness]:
                    results[answer_correctness][question_type] = {"matched": 0, "total": 0}
                
                # Increment totals
                results[answer_correctness][question_type]["total"] += 1
                
                # Increment matches if matched_chunk is True
                if entry['matched_chunk']:
                    results[answer_correctness][question_type]["matched"] += 1

    return results

# Calculate the summary
results_summary = summarize_results(data)

# Save the summary to a file
with open(final_file, 'w') as file:
    json.dump(results_summary, file, indent=4)
