The goal here is to preprocess the output files that we got from running individual agents to make it ready for CMAB and GPTSwarm experiments.

We Get the 1) question_id, 2)question_text, 4) gold_answer, 5) predicted_answer, 6) evaluation_results, 7) complexity_label, 8) time_taken (and steps_taken) for each question in the train and test files and from the predictions of NoR, OneR and IRCoT


In [1]:
import os
import json
import re
import numpy as np

# 1. Preprocess 1
Here we first extract some values (time taken, confidence scores, etc.) from the log files 

In [2]:
agents = ["nor", "oner", "ircot"]
path_to_inference_log_files_for_test_data_for_each_agent = [
   f"../LOGS/test/{agent}_qa_flan_xl_test_aware_210_51.txt" for agent in agents
]

path_to_inference_log_files_for_train_data_for_each_agent = [
    f"../LOGS/train/{agent}_qa_flan_xl_train_aware_210_51.txt" for agent in agents
]

destination_folder = "../Results"


In [3]:
def parse_log_file(filepath):
    data_dict = {}
    current_index = None
    current_qid = None
    current_sub_index = 0
    sub_keys = ['Generated Texts:', 'Confidence Info:', 'Run Time in Seconds:']

    with open(filepath, 'r') as file:
        lines = file.readlines()

    for line in lines:
        line = line.strip()
        if line.startswith("Index:"):
            current_index = int(line.split(":")[1].strip())
            current_qid = None
            current_sub_index = 0
        elif line.startswith("Processing question:"):
            qid_start = line.find("QID:") + 4
            current_qid = line[qid_start:].strip()
            if current_qid not in data_dict:
                data_dict[current_qid] = {}
        elif any(line.startswith(key) for key in sub_keys):
            if current_qid is not None:
                if current_sub_index not in data_dict[current_qid]:
                    data_dict[current_qid][current_sub_index] = {
                        'Generated Texts': '',
                        'Confidence Info': '',
                        'Run Time in Seconds': ''
                    }
                if line.startswith('Generated Texts:'):
                    data_dict[current_qid][current_sub_index]['Generated Texts'] = line.split(":", 1)[1].strip()
                elif line.startswith('Confidence Info:'):
                    data_dict[current_qid][current_sub_index]['Confidence Info'] = line.split(":", 1)[1].strip()
                elif line.startswith('Run Time in Seconds:'):
                    data_dict[current_qid][current_sub_index]['Run Time in Seconds'] = line.split(":", 1)[1].strip()
                    current_sub_index += 1

    return data_dict

def calculate_confidence_info(confidence_str):
    # Extract probabilities
    probs = re.findall(r"'prob': ([\d\.]+)", confidence_str)
    if probs:
        # convert strings to floats and calculate the average
        return np.mean([float(prob) for prob in probs])
    return 0.0

def process_data(data_dict):
    processed_data = {}
    
    for qid, subkeys in data_dict.items():
        processed_qid = {
            'number_of_subkeys': len(subkeys),
            'subkey_details': {},
            'total_run_time_in_seconds': 0,
            'average_confidence_score_among_all_subkeys': [],
            'average_confidence_score_of_the_last_subkey': 0
        }
        
        for subkey_id, details in subkeys.items():
            avg_confidence = calculate_confidence_info(details['Confidence Info'])
            run_time = float(details['Run Time in Seconds'])
            
            processed_qid['subkey_details'][subkey_id] = {
                'average_confidence_score': avg_confidence,
                'run_time_in_seconds': run_time
            }
            processed_qid['total_run_time_in_seconds'] += run_time
            processed_qid['average_confidence_score_among_all_subkeys'].append(avg_confidence)
        
        if processed_qid['average_confidence_score_among_all_subkeys']:
            processed_qid['average_confidence_score_among_all_subkeys'] = np.mean(processed_qid['average_confidence_score_among_all_subkeys'])
            last_subkey = list(subkeys.keys())[-1]
            processed_qid['average_confidence_score_of_the_last_subkey'] = processed_qid['subkey_details'][last_subkey]['average_confidence_score']
        
        processed_data[qid] = processed_qid

    return processed_data


def write_to_jsonl(data, folder_path, filename):
    file_path = f"{folder_path}/{filename}"
    print(f"Writing data to {file_path}")
    with open(file_path, 'w') as file:
        for qid, details in data.items():
            json_line = json.dumps({qid: details}) + '\n'
            file.write(json_line)
    print(f"Data successfully written to {file_path}")

def process_and_write(agent, log_file, data_type):
    print(f"Processing {data_type} log for {agent}: {log_file}")
    data_dict = parse_log_file(log_file)
    processed_data = process_data(data_dict)
    folder_path = f"{destination_folder}/{data_type}/IndividualAgents/{agent}"
    file_name = f"{agent}_qa_flan_xl_{data_type}_aware_210_51_processed.jsonl"
    write_to_jsonl(processed_data, folder_path, file_name)


In [43]:
for agent, test_log_file, train_log_file in zip(agents, path_to_inference_log_files_for_test_data_for_each_agent, path_to_inference_log_files_for_train_data_for_each_agent):
    process_and_write(agent, test_log_file, "test")
    process_and_write(agent, train_log_file, "train")
    print(f"Processing for {agent} completed\n")

Processing test log for nor: ./LOGS/test/nor_qa_flan_xl_test_aware_210_51.txt
Writing data to ./Results/test/IndividualAgents/nor/nor_qa_flan_xl_test_aware_210_51_processed.jsonl
Data successfully written to ./Results/test/IndividualAgents/nor/nor_qa_flan_xl_test_aware_210_51_processed.jsonl
Processing train log for nor: ./LOGS/train/nor_qa_flan_xl_train_aware_210_51.txt
Writing data to ./Results/train/IndividualAgents/nor/nor_qa_flan_xl_train_aware_210_51_processed.jsonl
Data successfully written to ./Results/train/IndividualAgents/nor/nor_qa_flan_xl_train_aware_210_51_processed.jsonl
Processing for nor completed

Processing test log for oner: ./LOGS/test/oner_qa_flan_xl_test_aware_210_51.txt
Writing data to ./Results/test/IndividualAgents/oner/oner_qa_flan_xl_test_aware_210_51_processed.jsonl
Data successfully written to ./Results/test/IndividualAgents/oner/oner_qa_flan_xl_test_aware_210_51_processed.jsonl
Processing train log for oner: ./LOGS/train/oner_qa_flan_xl_train_aware_210_51

In [39]:
# temp_file = path_to_inference_log_files_for_test_data_for_each_agent[2]
# print(f"Processing {temp_file}")

# data_dict = parse_log_file(temp_file)

# for key, value in data_dict["single_nq_dev_2922"].items():
#     print(key, value)

# print(f"\n--------------")

# processed_log_data = process_data(data_dict)

# for key, value in processed_log_data["single_nq_dev_2922"].items():
#     print(key, value)

In [None]:

for agent, log_file in zip(agents, path_to_inference_log_files_for_test_data_for_each_agent):
    data_dict = parse_log_file(log_file)
    processed_log_data = process_data(data_dict)
    write_to_jsonl(processed_log_data, f"{destination_folder}/test/IndividualAgents/{agent}", f"{agent}_qa_flan_xl_test_aware_210_51.jsonl")

# 2. Preprocess 2


In [79]:
import os
import json

output_folder = "../Preprocessed_Data_for_CMAB"


# read the ORGINAL train and test files

path_to_original_files = {
    "train": "../AQA_Data_Final/train_aware_210_51.jsonl",
    "test": "../AQA_Data_Final/test_aware_210_51.jsonl"
}


# read the PROCESSED PREDICTION FILES of NoR, OneR and IRCoT agents for the original train and test files

path_to_processed_logs = {
    "test": [
        f"../Results/test/IndividualAgents/{agent}/{agent}_qa_flan_xl_test_aware_210_51_processed.jsonl" for agent in agents
    ],
    "train": [
        f"../Results/train/IndividualAgents/{agent}/{agent}_qa_flan_xl_train_aware_210_51_processed.jsonl" for agent in agents
    ]
}

path_to_predictions = {
    "test": [
        f"../Results/test/IndividualAgents/{agent}/{agent}_qa_flan_xl_test_aware_210_51.json" for agent in agents
    ],
    "train": [
        f"../Results/train/IndividualAgents/{agent}/{agent}_qa_flan_xl_train_aware_210_51.json" for agent in agents
    ]
}


# where we will save the processed data for CMAB
output_folder = "../Results/processed_data_for_CMAB"

In [66]:
def read_data(file_path):
    data = []
    with open(file_path, "r") as f:
        for line in f:
            data_point = json.loads(line)
            gold_answers = data_point["answers_objects"][0]["spans"]
            # Flatten the gold answers
            flattened_answers = [ans if isinstance(ans, str) else ''.join(ans) for ans in gold_answers]
            
            data.append({
                "question_id": data_point["question_id"],
                "question_text": data_point["question_text"],
                "gold_answers": flattened_answers,
                "complexity_label": data_point["complexity_label"]
            })
    return data

# Read the train and test data

train_data = read_data(path_to_original_files["train"])
test_data = read_data(path_to_original_files["test"])


print(f"Number of questions in the original train file: {len(train_data)}")
print(f"Number of questions in the original test file: {len(test_data)}")

train_data[0], test_data[0]


Number of questions in the original train file: 210
Number of questions in the original test file: 51


({'question_id': '5a7ca33f5542990527d554ee',
  'question_text': "Which restaurant chain is based further south, Pizza Fusion or Imo's Pizza?",
  'gold_answers': ['Pizza Fusion'],
  'complexity_label': 'B'},
 {'question_id': 'single_nq_dev_2922',
  'question_text': 'which is the eighth planet from the sun ( in order of increasing mean distance or semimajor axis )',
  'gold_answers': ['Neptune'],
  'complexity_label': 'B'})

In [92]:
from AQA_final_eval import evaluate_single

# now read the processed predictions for each agent for the train and test data and merge them with the original data and save them in the output folder

def merger(path_to_original_file, processed_logs_file_paths, predictions_file_paths):
    original_data = read_data(path_to_original_file)
    processed_logs = {}
    all_predictions = {}
    for agent, processed_file, prediction_file in zip(agents, processed_logs_file_paths, predictions_file_paths):
        print(f"Agent: {agent}, Processed File: {processed_file}, Prediction File: {prediction_file}")

        with open(processed_file, "r") as f:
            processed_data = {}
            for line in f:
                data_point = json.loads(line)
                qid = list(data_point.keys())[0]
                processed_data[qid] = data_point[qid]
            processed_logs[agent] = processed_data
        
        with open(prediction_file, "r") as f:
            predictions = json.load(f)
            all_predictions[agent] = predictions

    # merge the original data with the processed logs
    merged_data = []
    for data_point in original_data:
        qid = data_point["question_id"]
        merged_point = {
            "question_id": qid,
            "question_text": data_point["question_text"],
            "gold_answers": data_point["gold_answers"],
            "complexity_label": data_point["complexity_label"]
        }
        for agent in agents:
            if qid in processed_logs[agent]:
                merged_point[f"{agent}_predicted_answer"] = all_predictions[agent][qid]
                merged_point[f"{agent}_time_taken"] = processed_logs[agent][qid]["total_run_time_in_seconds"]
                merged_point[f"{agent}_steps_taken"] = processed_logs[agent][qid]["number_of_subkeys"]
                merged_point[f"{agent}_evaluation_results"] = {}
        merged_data.append(merged_point)
    
    return merged_data
    
        



ModuleNotFoundError: No module named 'AQA_final_eval'

In [89]:
merged_test_data = merger(path_to_original_files["test"], path_to_processed_logs["test"], path_to_predictions["test"])


Agent: nor, Processed File: ./Results/test/IndividualAgents/nor/nor_qa_flan_xl_test_aware_210_51_processed.jsonl, Prediction File: ./Results/test/IndividualAgents/nor/nor_qa_flan_xl_test_aware_210_51.json
Agent: oner, Processed File: ./Results/test/IndividualAgents/oner/oner_qa_flan_xl_test_aware_210_51_processed.jsonl, Prediction File: ./Results/test/IndividualAgents/oner/oner_qa_flan_xl_test_aware_210_51.json
Agent: ircot, Processed File: ./Results/test/IndividualAgents/ircot/ircot_qa_flan_xl_test_aware_210_51_processed.jsonl, Prediction File: ./Results/test/IndividualAgents/ircot/ircot_qa_flan_xl_test_aware_210_51.json


In [91]:
merged_test_data[0]

{'question_id': 'single_nq_dev_2922',
 'question_text': 'which is the eighth planet from the sun ( in order of increasing mean distance or semimajor axis )',
 'gold_answers': ['Neptune'],
 'complexity_label': 'B',
 'nor_predicted_answer': 'Uranus',
 'nor_time_taken': 2.038320779800415,
 'nor_steps_taken': 1,
 'nor_evaluation_results': {},
 'oner_predicted_answer': 'Neptune',
 'oner_time_taken': 6.091372489929199,
 'oner_steps_taken': 1,
 'oner_evaluation_results': {},
 'ircot_predicted_answer': 'Neptune',
 'ircot_time_taken': 1359.9865555763245,
 'ircot_steps_taken': 11,
 'ircot_evaluation_results': {}}

In [14]:


# path to output preprocessed CMAB folder
output_folder = "/home/mhoveyda/AdaptiveQA/CMAB_Experiments/Preprocessed_Data_For_CMAB_Experiments"



# read the original train and test files
path_to_original_train_file = "/home/mhoveyda/AdaptiveQA/AdaptiveQA_Data_Final/train_aware_210_51.jsonl"
path_to_original_test_file = "/home/mhoveyda/AdaptiveQA/AdaptiveQA_Data_Final/test_aware_210_51.jsonl"

# read the prediction files of NoR, OneR and IRCoT to the original train and test files

# NoR predictions:
path_to_NoR_train_predictions_folder = "/home/mhoveyda/AdaptiveQA/Adaptive-RAG/Agents_Executed_Final_Train/nor"
path_to_NoR_test_predictions_folder = "/home/mhoveyda/AdaptiveQA/Adaptive-RAG/Agents_Executed_Final/nor"

# OneR predictions:
# fill later
path_to_OneR_train_predictions_folder = "/home/mhoveyda/AdaptiveQA/Adaptive-RAG/Agents_Executed_Final_Train/oner"
path_to_OneR_test_predictions_folder = "/home/mhoveyda/AdaptiveQA/Adaptive-RAG/Agents_Executed_Final/oner"

# IRCoT predictions:
# fill later
path_to_IRCoT_train_predictions_folder = "/home/mhoveyda/AdaptiveQA/Adaptive-RAG/Agents_Executed_Final_Train/ircot"
path_to_IRCoT_test_predictions_folder = "/home/mhoveyda/AdaptiveQA/Adaptive-RAG/Agents_Executed_Final/ircot"

# Get the 1) question_id, 2)question_text, 4) gold_answer, 5) predicted_answer, 6) evaluation_results, 7) complexity_label, 8) time_taken (and steps_taken) for each question in the train and test files and from the predictions of NoR, OneR and IRCoT

# Read the original train file
train_data = []
with open(path_to_original_train_file, "r") as f:
    for line in f:
        data_point = json.loads(line)
        question_id = data_point["question_id"]
        question_text = data_point["question_text"]
        gold_answer_default = data_point["answers_objects"][0]["spans"]
        # flatten the gold answers
        gold_answer = []
        
        for answer in gold_answer_default:
            if isinstance(answer, str):
                gold_answer.append(answer)
            else:
                gold_answer.extend(answer)
        complexity_label = data_point["complexity_label"]

        train_data.append({"question_id": question_id, "question_text": question_text, "gold_answers": gold_answer, "complexity_label": complexity_label})


print("Number of questions in the original train file: ", len(train_data))  

# Read the original test file
test_data = []
with open(path_to_original_test_file, "r") as f:
    for line in f:
        data_point = json.loads(line)
        question_id = data_point["question_id"]
        question_text = data_point["question_text"]
        gold_answer_default = data_point["answers_objects"][0]["spans"]
        # flatten the gold answers
        gold_answer = []
        
        for answer in gold_answer_default:
            if isinstance(answer, str):
                gold_answer.append(answer)
            else:
                gold_answer.extend(answer)

        complexity_label = data_point["complexity_label"]

        test_data.append({"question_id": question_id, "question_text": question_text, "gold_answers": gold_answer, "complexity_label": complexity_label})


print("Number of questions in the original test file: ", len(test_data))

train_data[0], test_data[0]

Number of questions in the original train file:  210
Number of questions in the original test file:  51


({'question_id': '5a7ca33f5542990527d554ee',
  'question_text': "Which restaurant chain is based further south, Pizza Fusion or Imo's Pizza?",
  'gold_answers': ['Pizza Fusion'],
  'complexity_label': 'B'},
 {'question_id': 'single_nq_dev_2922',
  'question_text': 'which is the eighth planet from the sun ( in order of increasing mean distance or semimajor axis )',
  'gold_answers': ['Neptune'],
  'complexity_label': 'B'})

In [15]:
# Read the predictions for the train file


path_to_NoR_train_predictions_file = os.path.join(path_to_NoR_train_predictions_folder, "nor_qa_flan_xl_train_aware_210_51.json")
path_to_OneR_train_predictions_file = os.path.join(path_to_OneR_train_predictions_folder, "oner_qa_flan_xl_train_aware_210_51.json")
path_to_IRCoT_train_predictions_file = os.path.join(path_to_IRCoT_train_predictions_folder, "ircot_qa_flan_xl_train_aware_210_51.json")
path_to_IRCoT_train_steps_taken_file = os.path.join(path_to_IRCoT_train_predictions_folder, "ircot_qa_flan_xl_train_aware_210_51_steps_taken.json")
path_to_IRCoT_train_time_taken_file = os.path.join(path_to_IRCoT_train_predictions_folder, "ircot_qa_flan_xl_train_aware_210_51_individual_time_taken.json")

with open(path_to_NoR_train_predictions_file, "r") as f:
    NoR_train_predictions = json.load(f)

with open(path_to_OneR_train_predictions_file, "r") as f:
    OneR_train_predictions = json.load(f)

with open(path_to_IRCoT_train_predictions_file, "r") as f:
    IRCoT_train_predictions = json.load(f)


with open(path_to_IRCoT_train_steps_taken_file, "r") as f:
    IRCoT_train_steps_taken = json.load(f)

with open(path_to_IRCoT_train_time_taken_file, "r") as f:
    IRCoT_train_time_taken = json.load(f)


assert len(NoR_train_predictions) == len(train_data)
assert len(OneR_train_predictions) == len(train_data)
assert len(IRCoT_train_predictions) == len(train_data)
assert len(IRCoT_train_steps_taken) == len(train_data)
assert len(IRCoT_train_time_taken) == len(train_data)

for instance in train_data:
    question_id = instance["question_id"]
    for k, v in NoR_train_predictions.items():
        if k == question_id:
            instance["NoR_predicted_answer"] = v
            instance["NoR_time_taken"] = 0.733
    for k, v in OneR_train_predictions.items():
        if k == question_id:
            instance["OneR_predicted_answer"] = v
            instance["OneR_time_taken"] = 7.038

    for k, v in IRCoT_train_predictions.items():
        if k == question_id:
            instance["IRCoT_predicted_answer"] = v
    for k, v in IRCoT_train_steps_taken.items():
        if k == question_id:
            instance["IRCoT_steps_taken"] = v
    for k, v in IRCoT_train_time_taken.items():
        if k == question_id:
            instance["IRCoT_time_taken"] = v
            
    



In [16]:
train_data[0]

{'question_id': '5a7ca33f5542990527d554ee',
 'question_text': "Which restaurant chain is based further south, Pizza Fusion or Imo's Pizza?",
 'gold_answers': ['Pizza Fusion'],
 'complexity_label': 'B',
 'NoR_predicted_answer': "Imo's Pizza",
 'NoR_time_taken': 0.733,
 'OneR_predicted_answer': 'Pizza Fusion',
 'OneR_time_taken': 7.038,
 'IRCoT_predicted_answer': 'Pizza Fusion',
 'IRCoT_steps_taken': 3,
 'IRCoT_time_taken': 144.5744680851064}

In [17]:
# Read the predictions of NoR for the test file

path_to_NoR_test_predictions_file = os.path.join(path_to_NoR_test_predictions_folder, "nor_qa_flan_xl_test_aware_210_51.json")
path_to_OneR_test_predictions_file = os.path.join(path_to_OneR_test_predictions_folder, "oner_qa_flan_xl_test_aware_210_51.json")
path_to_IRCoT_test_predictions_file = os.path.join(path_to_IRCoT_test_predictions_folder, "ircot_qa_flan_xl_test_aware_210_51.json")
path_to_IRCoT_test_steps_taken_file = os.path.join(path_to_IRCoT_test_predictions_folder, "ircot_qa_flan_xl_test_aware_210_51_steps_taken.json")
path_to_IRCoT_test_time_taken_file = os.path.join(path_to_IRCoT_test_predictions_folder, "ircot_qa_flan_xl_test_aware_210_51_individual_time_taken.json")

with open(path_to_NoR_test_predictions_file, "r") as f:
    NoR_test_predictions = json.load(f)

with open(path_to_OneR_test_predictions_file, "r") as f:
    OneR_test_predictions = json.load(f)

with open(path_to_IRCoT_test_predictions_file, "r") as f:
    IRCoT_test_predictions = json.load(f)

with open(path_to_IRCoT_test_steps_taken_file, "r") as f:
    IRCoT_test_steps_taken = json.load(f)

with open(path_to_IRCoT_test_time_taken_file, "r") as f:
    IRCoT_test_time_taken = json.load(f)


assert len(NoR_test_predictions) == len(test_data)
assert len(OneR_test_predictions) == len(test_data)
assert len(IRCoT_test_predictions) == len(test_data)
assert len(IRCoT_test_steps_taken) == len(test_data)
assert len(IRCoT_test_time_taken) == len(test_data)

for instance in test_data:
    question_id = instance["question_id"]
    for k, v in NoR_test_predictions.items():
        if k == question_id:
            instance["NoR_predicted_answer"] = v
            instance["NoR_time_taken"] = 0.705
    for k, v in OneR_test_predictions.items():
        if k == question_id:
            instance["OneR_predicted_answer"] = v
            instance["OneR_time_taken"] = 6.392

            
    instance["IRCoT_steps_taken"] = None
    for k, v in IRCoT_test_predictions.items():
        if k == question_id:
            instance["IRCoT_predicted_answer"] = v
    for k, v in IRCoT_test_steps_taken.items():
        if k == question_id:
            instance["IRCoT_steps_taken"] = v
    for k, v in IRCoT_test_time_taken.items():
        if k == question_id:
            instance["IRCoT_time_taken"] = v
    
    
    

test_data[0]


{'question_id': 'single_nq_dev_2922',
 'question_text': 'which is the eighth planet from the sun ( in order of increasing mean distance or semimajor axis )',
 'gold_answers': ['Neptune'],
 'complexity_label': 'B',
 'NoR_predicted_answer': 'Uranus',
 'NoR_time_taken': 0.705,
 'OneR_predicted_answer': 'Neptune',
 'OneR_time_taken': 6.392,
 'IRCoT_steps_taken': 10,
 'IRCoT_predicted_answer': 'Neptune',
 'IRCoT_time_taken': 709.5155709342561}

In [18]:
test_data[:10]

[{'question_id': 'single_nq_dev_2922',
  'question_text': 'which is the eighth planet from the sun ( in order of increasing mean distance or semimajor axis )',
  'gold_answers': ['Neptune'],
  'complexity_label': 'B',
  'NoR_predicted_answer': 'Uranus',
  'NoR_time_taken': 0.705,
  'OneR_predicted_answer': 'Neptune',
  'OneR_time_taken': 6.392,
  'IRCoT_steps_taken': 10,
  'IRCoT_predicted_answer': 'Neptune',
  'IRCoT_time_taken': 709.5155709342561},
 {'question_id': '2hop__53147_7298',
  'question_text': "Along with the subject of Katy Perry's The One Who Got Away, what notable pop artist started out his career on adult contemporary radio?",
  'gold_answers': ['Michael Bublé'],
  'complexity_label': 'B',
  'NoR_predicted_answer': 'Elton John',
  'NoR_time_taken': 0.705,
  'OneR_predicted_answer': 'David Bowie',
  'OneR_time_taken': 6.392,
  'IRCoT_steps_taken': 4,
  'IRCoT_predicted_answer': 'Billy Joel',
  'IRCoT_time_taken': 283.8062283737024},
 {'question_id': 'single_squad_dev_440

# Evaluations



In [19]:
from AQA_final_eval import evaluate_single

# Evaluate the predictions of NoR, OneR and IRCoT on the train and test files

# First evaluate the predictions on the train file

output_train_path = "DATA_FOR_CMAB/Train_Data_For_CMAB.json"
output_test_path = "DATA_FOR_CMAB/Test_Data_For_CMAB.json"

for i, instance in enumerate(train_data):

    NoR_predicted_answer = instance["NoR_predicted_answer"]
    OneR_predicted_answer = instance["OneR_predicted_answer"]
    IRCoT_predicted_answer = instance["IRCoT_predicted_answer"]

    gold_answers = instance["gold_answers"]

    NoR_evaluation_results = evaluate_single(NoR_predicted_answer, gold_answers)
    OneR_evaluation_results = evaluate_single(OneR_predicted_answer, gold_answers)
    IRCoT_evaluation_results = evaluate_single(IRCoT_predicted_answer, gold_answers)

    instance["NoR_evaluation_results"] = NoR_evaluation_results
    instance["OneR_evaluation_results"] = OneR_evaluation_results
    instance["IRCoT_evaluation_results"] = IRCoT_evaluation_results


# Evaluate the predictions on the test file

for i, instance in enumerate(test_data):

    NoR_predicted_answer = instance["NoR_predicted_answer"]
    OneR_predicted_answer = instance["OneR_predicted_answer"]
    IRCoT_predicted_answer = instance["IRCoT_predicted_answer"]

    gold_answers = instance["gold_answers"]

    NoR_evaluation_results = evaluate_single(NoR_predicted_answer, gold_answers)
    OneR_evaluation_results = evaluate_single(OneR_predicted_answer, gold_answers)
    IRCoT_evaluation_results = evaluate_single(IRCoT_predicted_answer, gold_answers)

    instance["NoR_evaluation_results"] = NoR_evaluation_results
    instance["OneR_evaluation_results"] = OneR_evaluation_results
    instance["IRCoT_evaluation_results"] = IRCoT_evaluation_results


# Save the train and test data for CMAB
    
with open(output_train_path, "w") as f:
    for instance in train_data:
        json.dump(instance, f)
        f.write("\n")
    
with open(output_test_path, "w") as f:
    for instance in test_data:
        json.dump(instance, f)
        f.write("\n")


# gold: imos pizza, pred: pizza fusion, exact: 0
# gold_toks: ['imos', 'pizza'], pred_toks: ['pizza', 'fusion'], precision=0.5, recall=0.5, f1: 0.5
# gold: pizza fusion, pred: pizza fusion, exact: 1
# gold_toks: ['pizza', 'fusion'], pred_toks: ['pizza', 'fusion'], precision=1.0, recall=1.0, f1: 1.0
# gold: pizza fusion, pred: pizza fusion, exact: 1
# gold_toks: ['pizza', 'fusion'], pred_toks: ['pizza', 'fusion'], precision=1.0, recall=1.0, f1: 1.0
# gold: 105, pred: 45, exact: 0
# gold: 137, pred: 45, exact: 0
# gold: 137, pred: 45, exact: 0
# gold: switchfoot, pred: midnight oil, exact: 0
# gold: midnight oil, pred: midnight oil, exact: 1
# gold_toks: ['midnight', 'oil'], pred_toks: ['midnight', 'oil'], precision=1.0, recall=1.0, f1: 1.0
# gold: midnight oil, pred: midnight oil, exact: 1
# gold_toks: ['midnight', 'oil'], pred_toks: ['midnight', 'oil'], precision=1.0, recall=1.0, f1: 1.0
# gold: ludwig van beethoven, pred: joseph haydn, exact: 0
# gold: ludwig van beethoven, pred: jose

In [2]:
# Read the data for CMAB
train_data_for_CMAB = []
test_data_for_CMAB = []
path_to_train_data_for_CMAB = "DATA_FOR_CMAB/Train_Data_For_CMAB.json"
path_to_test_data_for_CMAB = "DATA_FOR_CMAB/Test_Data_For_CMAB.json"
import json
with open(path_to_train_data_for_CMAB, "r") as f:
    for line in f:
        data_point = json.loads(line)
        train_data_for_CMAB.append(data_point)

with open(path_to_test_data_for_CMAB, "r") as f:
    for line in f:
        data_point = json.loads(line)
        test_data_for_CMAB.append(data_point)

train_data_for_CMAB[3], test_data_for_CMAB[3]

({'question_id': 'single_nq_dev_5290',
  'question_text': 'who composed the music of the german national anthem',
  'gold_answers': ['Joseph Haydn'],
  'complexity_label': 'B',
  'NoR_predicted_answer': 'Ludwig van Beethoven',
  'NoR_time_taken': 0.733,
  'OneR_predicted_answer': 'Ludwig van Beethoven',
  'OneR_time_taken': 7.038,
  'IRCoT_predicted_answer': 'Ludwig van Beethoven',
  'IRCoT_steps_taken': 4,
  'IRCoT_time_taken': 192.7659574468085,
  'NoR_evaluation_results': {'em': 0.0,
   'f1': 0.0,
   'count': 1,
   'accuracy': 0.0},
  'OneR_evaluation_results': {'em': 0.0,
   'f1': 0.0,
   'count': 1,
   'accuracy': 0.0},
  'IRCoT_evaluation_results': {'em': 0.0,
   'f1': 0.0,
   'count': 1,
   'accuracy': 0.0}},
 {'question_id': 'single_squad_dev_1294',
  'question_text': 'When was the term kirishitan used by the Japanese?',
  'gold_answers': ['16th and 17th centuries'],
  'complexity_label': 'B',
  'NoR_predicted_answer': '1868',
  'NoR_time_taken': 0.705,
  'OneR_predicted_answer

# Timings


In [35]:
import json
import os

def parse_text_file(file_path):
    result_dict = {}
    current_id = None
    current_count = 0
    reading_answers = False

    with open(file_path, 'r') as file:
        lines = file.readlines()

    for i in range(len(lines)):
        line = lines[i].strip()

        # Check for ID start
        if line and not lines[i-1].strip() and (i == 0 or lines[i-3].strip() == 'Q: [EOQ]'):
            if current_id is not None:
                # Store the previous ID count, decrement by 1
                result_dict[current_id] = max(0, current_count - 1)
            current_id = line
            current_count = 0
            reading_answers = True
        elif line.startswith('Q: [EOQ]'):
            reading_answers = False
        elif reading_answers and line.startswith('A: ["'):
            current_count += 1

    # Add the last ID if it wasn't added
    if current_id is not None and current_id not in result_dict:
        result_dict[current_id] = max(0, current_count - 1)

    # Construct the JSON file path based on the input file path
    base_name = os.path.basename(file_path)
    directory = os.path.dirname(file_path)
    new_base_name = base_name.replace('chains.txt', 'steps_taken.json')
    json_path = os.path.join(directory, new_base_name)

    # Save the dictionary as a JSON file
    with open(json_path, 'w') as json_file:
        json.dump(result_dict, json_file, indent=4)

    return result_dict

# test_file_path = "/home/mhoveyda/AdaptiveQA/Adaptive-RAG/Agents_Executed_Final/ircot/ircot_qa_flan_xl_test_aware_210_51_chains.txt"
# test_parsed_ircot_step_count = parse_text_file(test_file_path)

# test_parsed_ircot_step_count
train_file_path = "/home/mhoveyda/AdaptiveQA/Adaptive-RAG/Agents_Executed_Final_Train/ircot/ircot_qa_flan_xl_train_aware_210_51_chains.txt"
train_parsed_ircot_step_count = parse_text_file(train_file_path)
len(train_parsed_ircot_step_count)

210

In [36]:
import json
import os

def estimate_individual_time(json_file_path, time_file_path):
    # Read the steps from the JSON file
    with open(json_file_path, 'r') as file:
        steps_data = json.load(file)

    # Read the total execution time from the time file
    with open(time_file_path, 'r') as file:
        total_time = float(file.readline().strip())

    # Calculate total steps taken
    total_steps = sum(steps_data.values())

    # Calculate the average time per step
    if total_steps > 0:
        time_per_step = total_time / total_steps
    else:
        time_per_step = 0

    # Calculate time taken for each question based on its steps
    individual_times = {key: steps * time_per_step for key, steps in steps_data.items()}

    # Construct the path for the output JSON file
    directory = os.path.dirname(json_file_path)
    output_file_name = os.path.basename(json_file_path).replace('steps_taken.json', 'individual_time_taken.json')
    output_file_path = os.path.join(directory, output_file_name)

    # Save the individual times as a JSON file
    with open(output_file_path, 'w') as file:
        json.dump(individual_times, file, indent=4)

    print(f"Individual time estimations saved to {output_file_path}")

# Example usage
json_file_path = "/home/mhoveyda/AdaptiveQA/Adaptive-RAG/Agents_Executed_Final_Train/ircot/ircot_qa_flan_xl_train_aware_210_51_steps_taken.json"
time_file_path = "/home/mhoveyda/AdaptiveQA/Adaptive-RAG/Agents_Executed_Final_Train/ircot/ircot_qa_flan_xl_train_aware_210_51_time_taken.txt"
estimate_individual_time(json_file_path, time_file_path)

Individual time estimations saved to /home/mhoveyda/AdaptiveQA/Adaptive-RAG/Agents_Executed_Final_Train/ircot/ircot_qa_flan_xl_train_aware_210_51_individual_time_taken.json
