The goal here is to preprocess the output files that we got from running individual agents to make it ready for CMAB and GPTSwarm experiments.

We Get the 1) question_id, 2)question_text, 4) gold_answer, 5) predicted_answer, 6) evaluation_results, 7) complexity_label, 8) time_taken (and steps_taken) for each question in the train and test files and from the predictions of NoR, OneR and IRCoT


In [1]:
import os
import json
import re
import numpy as np

# 1. Preprocess 1
Here we first extract some values (time taken, confidence scores, etc.) from the log files 

In [2]:
agents = ["nor", "oner", "ircot"]
path_to_inference_log_files_for_test_data_for_each_agent = [
   f"../LOGS/test/{agent}_qa_flan_xl_test_aware_210_51.txt" for agent in agents
]

path_to_inference_log_files_for_train_data_for_each_agent = [
    f"../LOGS/train/{agent}_qa_flan_xl_train_aware_210_51.txt" for agent in agents
]

destination_folder = "../Results"


In [3]:
def parse_log_file(filepath):
    data_dict = {}
    current_index = None
    current_qid = None
    current_sub_index = 0
    sub_keys = ['Generated Texts:', 'Confidence Info:', 'Run Time in Seconds:']

    with open(filepath, 'r') as file:
        lines = file.readlines()

    for line in lines:
        line = line.strip()
        if line.startswith("Index:"):
            current_index = int(line.split(":")[1].strip())
            current_qid = None
            current_sub_index = 0
        elif line.startswith("Processing question:"):
            qid_start = line.find("QID:") + 4
            current_qid = line[qid_start:].strip()
            if current_qid not in data_dict:
                data_dict[current_qid] = {}
        elif any(line.startswith(key) for key in sub_keys):
            if current_qid is not None:
                if current_sub_index not in data_dict[current_qid]:
                    data_dict[current_qid][current_sub_index] = {
                        'Generated Texts': '',
                        'Confidence Info': '',
                        'Run Time in Seconds': ''
                    }
                if line.startswith('Generated Texts:'):
                    data_dict[current_qid][current_sub_index]['Generated Texts'] = line.split(":", 1)[1].strip()
                elif line.startswith('Confidence Info:'):
                    data_dict[current_qid][current_sub_index]['Confidence Info'] = line.split(":", 1)[1].strip()
                elif line.startswith('Run Time in Seconds:'):
                    data_dict[current_qid][current_sub_index]['Run Time in Seconds'] = line.split(":", 1)[1].strip()
                    current_sub_index += 1

    return data_dict

def calculate_confidence_info(confidence_str):
    # Extract probabilities
    probs = re.findall(r"'prob': ([\d\.]+)", confidence_str)
    if probs:
        # convert strings to floats and calculate the average
        return np.mean([float(prob) for prob in probs])
    return 0.0

def process_data(data_dict):
    processed_data = {}
    
    for qid, subkeys in data_dict.items():
        processed_qid = {
            'number_of_subkeys': len(subkeys),
            'subkey_details': {},
            'total_run_time_in_seconds': 0,
            'average_confidence_score_among_all_subkeys': [],
            'average_confidence_score_of_the_last_subkey': 0
        }
        
        for subkey_id, details in subkeys.items():
            avg_confidence = calculate_confidence_info(details['Confidence Info'])
            run_time = float(details['Run Time in Seconds'])
            
            processed_qid['subkey_details'][subkey_id] = {
                'average_confidence_score': avg_confidence,
                'run_time_in_seconds': run_time
            }
            processed_qid['total_run_time_in_seconds'] += run_time
            processed_qid['average_confidence_score_among_all_subkeys'].append(avg_confidence)
        
        if processed_qid['average_confidence_score_among_all_subkeys']:
            processed_qid['average_confidence_score_among_all_subkeys'] = np.mean(processed_qid['average_confidence_score_among_all_subkeys'])
            last_subkey = list(subkeys.keys())[-1]
            processed_qid['average_confidence_score_of_the_last_subkey'] = processed_qid['subkey_details'][last_subkey]['average_confidence_score']
        
        processed_data[qid] = processed_qid

    return processed_data


def write_to_jsonl(data, folder_path, filename):
    file_path = f"{folder_path}/{filename}"
    print(f"Writing data to {file_path}")
    with open(file_path, 'w') as file:
        for qid, details in data.items():
            json_line = json.dumps({qid: details}) + '\n'
            file.write(json_line)
    print(f"Data successfully written to {file_path}")

def process_and_write(agent, log_file, data_type):
    print(f"Processing {data_type} log for {agent}: {log_file}")
    data_dict = parse_log_file(log_file)
    processed_data = process_data(data_dict)
    folder_path = f"{destination_folder}/{data_type}/IndividualAgents/{agent}"
    file_name = f"{agent}_qa_flan_xl_{data_type}_aware_210_51_processed.jsonl"
    write_to_jsonl(processed_data, folder_path, file_name)


In [4]:
for agent, test_log_file, train_log_file in zip(agents, path_to_inference_log_files_for_test_data_for_each_agent, path_to_inference_log_files_for_train_data_for_each_agent):
    process_and_write(agent, test_log_file, "test")
    process_and_write(agent, train_log_file, "train")
    print(f"Processing for {agent} completed\n")

Processing test log for nor: ../LOGS/test/nor_qa_flan_xl_test_aware_210_51.txt
Writing data to ../Results/test/IndividualAgents/nor/nor_qa_flan_xl_test_aware_210_51_processed.jsonl
Data successfully written to ../Results/test/IndividualAgents/nor/nor_qa_flan_xl_test_aware_210_51_processed.jsonl
Processing train log for nor: ../LOGS/train/nor_qa_flan_xl_train_aware_210_51.txt
Writing data to ../Results/train/IndividualAgents/nor/nor_qa_flan_xl_train_aware_210_51_processed.jsonl
Data successfully written to ../Results/train/IndividualAgents/nor/nor_qa_flan_xl_train_aware_210_51_processed.jsonl
Processing for nor completed

Processing test log for oner: ../LOGS/test/oner_qa_flan_xl_test_aware_210_51.txt
Writing data to ../Results/test/IndividualAgents/oner/oner_qa_flan_xl_test_aware_210_51_processed.jsonl
Data successfully written to ../Results/test/IndividualAgents/oner/oner_qa_flan_xl_test_aware_210_51_processed.jsonl
Processing train log for oner: ../LOGS/train/oner_qa_flan_xl_train_aw

# 2. Preprocess 2


In [5]:
import os
import json

output_folder = "../Preprocessed_Data_for_CMAB"


# read the ORGINAL train and test files

path_to_original_files = {
    "train": "../AQA_Data_Final/train_aware_210_51.jsonl",
    "test": "../AQA_Data_Final/test_aware_210_51.jsonl"
}


# read the PROCESSED PREDICTION FILES of NoR, OneR and IRCoT agents for the original train and test files

path_to_processed_logs = {
    "test": [
        f"../Results/test/IndividualAgents/{agent}/{agent}_qa_flan_xl_test_aware_210_51_processed.jsonl" for agent in agents
    ],
    "train": [
        f"../Results/train/IndividualAgents/{agent}/{agent}_qa_flan_xl_train_aware_210_51_processed.jsonl" for agent in agents
    ]
}

path_to_predictions = {
    "test": [
        f"../Results/test/IndividualAgents/{agent}/{agent}_qa_flan_xl_test_aware_210_51.json" for agent in agents
    ],
    "train": [
        f"../Results/train/IndividualAgents/{agent}/{agent}_qa_flan_xl_train_aware_210_51.json" for agent in agents
    ]
}


# where we will save the processed data for CMAB
output_folder = "../Results/processed_data_for_CMAB"

In [6]:
def read_data(file_path):
    data = []
    with open(file_path, "r") as f:
        for line in f:
            data_point = json.loads(line)
            gold_answer_default = data_point["answers_objects"][0]["spans"]
            # Flatten the gold answers
            # flattened_answers = [ans if isinstance(ans, str) else ''.join(ans) for ans in gold_answers]
            
            gold_answer = []
            for answer in gold_answer_default:
                if isinstance(answer, str):
                    gold_answer.append(answer)
                else:
                    gold_answer.extend(answer)

            data.append({
                "question_id": data_point["question_id"],
                "question_text": data_point["question_text"],
                "gold_answers": gold_answer,
                "complexity_label": data_point["complexity_label"]
            })
    return data

# Read the train and test data

train_data = read_data(path_to_original_files["train"])
test_data = read_data(path_to_original_files["test"])


print(f"Number of questions in the original train file: {len(train_data)}")
print(f"Number of questions in the original test file: {len(test_data)}")

train_data[0], test_data[0]


Number of questions in the original train file: 210
Number of questions in the original test file: 51


({'question_id': '5a7ca33f5542990527d554ee',
  'question_text': "Which restaurant chain is based further south, Pizza Fusion or Imo's Pizza?",
  'gold_answers': ['Pizza Fusion'],
  'complexity_label': 'B'},
 {'question_id': 'single_nq_dev_2922',
  'question_text': 'which is the eighth planet from the sun ( in order of increasing mean distance or semimajor axis )',
  'gold_answers': ['Neptune'],
  'complexity_label': 'B'})

In [7]:
from AQA_final_eval import evaluate_single

# now read the processed predictions for each agent for the train and test data and merge them with the original data and save them in the output folder

def merger(path_to_original_file, processed_logs_file_paths, predictions_file_paths, destination_folder):
    original_data = read_data(path_to_original_file)
    processed_logs = {}
    all_predictions = {}
    for agent, processed_file, prediction_file in zip(agents, processed_logs_file_paths, predictions_file_paths):
        print(f"Agent: {agent}, Processed File: {processed_file}, Prediction File: {prediction_file}")

        with open(processed_file, "r") as f:
            processed_data = {}
            for line in f:
                data_point = json.loads(line)
                qid = list(data_point.keys())[0]
                processed_data[qid] = data_point[qid]
            processed_logs[agent] = processed_data
        
        with open(prediction_file, "r") as f:
            predictions = json.load(f)
            all_predictions[agent] = predictions

    # merge the original data with the processed logs
    merged_data = []
    for data_point in original_data:
        qid = data_point["question_id"]
        merged_point = {
            "question_id": qid,
            "question_text": data_point["question_text"],
            "gold_answers": data_point["gold_answers"],
            "complexity_label": data_point["complexity_label"]
        }
        for agent in agents:
            if qid in processed_logs[agent]:
                merged_point[f"{agent}_predicted_answer"] = all_predictions[agent][qid]
                merged_point[f"{agent}_time_taken"] = processed_logs[agent][qid]["total_run_time_in_seconds"]
                merged_point[f"{agent}_steps_taken"] = processed_logs[agent][qid]["number_of_subkeys"]
                merged_point[f"{agent}_evaluation_results"] = evaluate_single(merged_point[f"{agent}_predicted_answer"], merged_point["gold_answers"])
        merged_data.append(merged_point)

    # save the merged data in a jsonl file
    output_file_name = os.path.basename(path_to_original_file).replace(".jsonl", "_complete.jsonl")
    output_file_path = os.path.join(destination_folder, output_file_name)
    os.makedirs(destination_folder, exist_ok=True)
    with open(output_file_path, "w") as f:
        for data_point in merged_data:
            f.write(json.dumps(data_point) + "\n")
    return merged_data
    
        



In [8]:
merged_test_data = merger(path_to_original_files["test"], path_to_processed_logs["test"], path_to_predictions["test"], output_folder)


Agent: nor, Processed File: ../Results/test/IndividualAgents/nor/nor_qa_flan_xl_test_aware_210_51_processed.jsonl, Prediction File: ../Results/test/IndividualAgents/nor/nor_qa_flan_xl_test_aware_210_51.json
Agent: oner, Processed File: ../Results/test/IndividualAgents/oner/oner_qa_flan_xl_test_aware_210_51_processed.jsonl, Prediction File: ../Results/test/IndividualAgents/oner/oner_qa_flan_xl_test_aware_210_51.json
Agent: ircot, Processed File: ../Results/test/IndividualAgents/ircot/ircot_qa_flan_xl_test_aware_210_51_processed.jsonl, Prediction File: ../Results/test/IndividualAgents/ircot/ircot_qa_flan_xl_test_aware_210_51.json


In [9]:
merged_test_data[0]

{'question_id': 'single_nq_dev_2922',
 'question_text': 'which is the eighth planet from the sun ( in order of increasing mean distance or semimajor axis )',
 'gold_answers': ['Neptune'],
 'complexity_label': 'B',
 'nor_predicted_answer': 'Uranus',
 'nor_time_taken': 2.038320779800415,
 'nor_steps_taken': 1,
 'nor_evaluation_results': {'em': 0.0, 'f1': 0.0, 'count': 1, 'accuracy': 0.0},
 'oner_predicted_answer': 'Neptune',
 'oner_time_taken': 6.091372489929199,
 'oner_steps_taken': 1,
 'oner_evaluation_results': {'em': 1.0,
  'f1': 1.0,
  'count': 1,
  'accuracy': 1.0},
 'ircot_predicted_answer': 'Neptune',
 'ircot_time_taken': 1359.9865555763245,
 'ircot_steps_taken': 11,
 'ircot_evaluation_results': {'em': 1.0,
  'f1': 1.0,
  'count': 1,
  'accuracy': 1.0}}

In [10]:
merged_train_data = merger(path_to_original_files["train"], path_to_processed_logs["train"], path_to_predictions["train"], output_folder)

Agent: nor, Processed File: ../Results/train/IndividualAgents/nor/nor_qa_flan_xl_train_aware_210_51_processed.jsonl, Prediction File: ../Results/train/IndividualAgents/nor/nor_qa_flan_xl_train_aware_210_51.json
Agent: oner, Processed File: ../Results/train/IndividualAgents/oner/oner_qa_flan_xl_train_aware_210_51_processed.jsonl, Prediction File: ../Results/train/IndividualAgents/oner/oner_qa_flan_xl_train_aware_210_51.json
Agent: ircot, Processed File: ../Results/train/IndividualAgents/ircot/ircot_qa_flan_xl_train_aware_210_51_processed.jsonl, Prediction File: ../Results/train/IndividualAgents/ircot/ircot_qa_flan_xl_train_aware_210_51.json


In [11]:
merged_train_data[0]

{'question_id': '5a7ca33f5542990527d554ee',
 'question_text': "Which restaurant chain is based further south, Pizza Fusion or Imo's Pizza?",
 'gold_answers': ['Pizza Fusion'],
 'complexity_label': 'B',
 'nor_predicted_answer': "Imo's Pizza",
 'nor_time_taken': 0.8518612384796143,
 'nor_steps_taken': 1,
 'nor_evaluation_results': {'em': 0.0, 'f1': 0.5, 'count': 1, 'accuracy': 0.0},
 'oner_predicted_answer': 'Pizza Fusion',
 'oner_time_taken': 5.093897819519043,
 'oner_steps_taken': 1,
 'oner_evaluation_results': {'em': 1.0,
  'f1': 1.0,
  'count': 1,
  'accuracy': 1.0},
 'ircot_predicted_answer': 'Pizza Fusion',
 'ircot_time_taken': 84.7508134841919,
 'ircot_steps_taken': 4,
 'ircot_evaluation_results': {'em': 1.0,
  'f1': 1.0,
  'count': 1,
  'accuracy': 1.0}}