In [None]:
import os
import shutil

def process_text(content):
    content = content.replace("Output:", "").replace("```", "")
    content = content.replace("Done\tDone", "").replace("Done", "")
    content = content.replace("\n\n", "\n")
    content = content.replace("\n\nNCT Number\tAdjuvant Name", "NCT Number\tAdjuvant Name")
    content = content.replace("\nNCT Number\tAdjuvant Name", "NCT Number\tAdjuvant Name")
    header = "NCT Number\tAdjuvant Name\n"
    if not content.startswith(header):
        content = header + content
    content = content.replace("\n\n", "\n")
    return content

def process_text_file(input_filepath, output_filepath):
    with open(input_filepath, 'r', encoding='utf-8') as file:
        content = file.read()
    content = process_text(content)
    os.makedirs(os.path.dirname(output_filepath), exist_ok=True)
    with open(output_filepath, 'w', encoding='utf-8') as file:
        file.write(content)
          
def process_directory(input_dir, output_dir):
    for subdir, dirs, files in os.walk(input_dir):
        if "AdjuvareDB104_T0.0_24June" in subdir:
            for file in files:
                if file.endswith('.txt'):
                    input_filepath = os.path.join(subdir, file)
                    relative_path = os.path.relpath(input_filepath, input_dir)
                    output_filepath = os.path.join(output_dir, relative_path)
                    process_text_file(input_filepath, output_filepath)


In [None]:
for i in range(0, 5):
    # Set the input and output directories
    input_dir = 'Output/Prompt2_merged_interventions_'+str(i)+'shot'
    output_dir = 'Output/Prompt2_merged_interventions_'+str(i)+'shot_postprocessed'
    
    # Process all .txt files from input directory to output directory
    process_directory(input_dir, output_dir)

In [None]:
for i in range(0, 5):
    # Set the input and output directories
    input_dir = 'Output/Prompt2_merged_'+str(i)+'shot'
    output_dir = 'Output/Prompt2_merged_'+str(i)+'shot_postprocessed'

    # Process all .txt files from input directory to output directory
    process_directory(input_dir, output_dir)


In [None]:
####### Merge Outputs  ############

In [None]:
import os
import pandas as pd

def merge_csv_files(folder_path, output_file):
    # List to store DataFrames
    dataframes = []

    # Loop through all files in the folder
    for filename in os.listdir(folder_path):
        print(folder_path)
        if filename.endswith('.txt'):
            file_path = os.path.join(folder_path, filename)
            #print(filename)
            df = pd.read_csv(file_path, sep = '\t')
            
            # Skip rows containing "Done" in any cell
            df_filtered = df[~df.apply(lambda row: row.astype(str).str.contains('Done').any(), axis=1)]
            dataframes.append(df_filtered)

    # Concatenate all DataFrames
    merged_df = pd.concat(dataframes, ignore_index=True)

    # Save the merged DataFrame to a new CSV file
    merged_df.to_csv(output_file, index=False)

    print(f"All CSV files in {folder_path} have been successfully merged, skipping rows with 'Done'!")

import os

models = ['GPT_gpt-3.5-turbo-1106', 'GPT_gpt-3.5-turbo-0125', 'GPT_gpt-4-turbo-2024-04-09']
shots = [0, 1, 2, 3, 4]
base_folder = 'Output'
run_folders = ['AdjuvareDB104_T0.0_Run1', 'AdjuvareDB104_T0.0_Run2', 'AdjuvareDB104_T0.0_Run3']

# Iterate over each model and shot combination
for model in models:
    for shot in shots:
        # Construct the base folder path for each model and shot
        folder_path = os.path.join(base_folder, f'Prompt2_merged_interventions_{shot}shot_postprocessed', model, 'AdjuvareDB104_T0.0_24June')

        # Process each run folder
        for run_folder in run_folders:
            full_folder_path = os.path.join(folder_path, run_folder)
            output_file = os.path.join(folder_path, f'{run_folder}_merged_file.csv')
            merge_csv_files(full_folder_path, output_file)



In [None]:
import pandas as pd
import os

models = ['GPT_gpt-3.5-turbo-1106', 'GPT_gpt-3.5-turbo-0125', 'GPT_gpt-4-turbo-2024-04-09']
shots = [0, 1, 2, 3, 4]
runs = [("AdjuvareDB104_T0.0_Run1", "Run1"), ("AdjuvareDB104_T0.0_Run2", "Run2"), ("AdjuvareDB104_T0.0_Run3", "Run3")]
goldstandard_path = 'Dataset/AdjuvareDB104_Standard/10_folds_preprocessed_merged_file.csv'

def merge_goldstandard_with_predicted(model, shot, run_folder, run_number):
    predicted_path = os.path.join('Output', f'Prompt2_merged_interventions_{shot}shot_postprocessed', model, f'AdjuvareDB104_T0.0_18June/{run_folder}_merged_file.csv')
    output_path = os.path.join('Output', f'Prompt2_merged_interventions_{shot}shot_postprocessed', model, f'AdjuvareDB104_T0.0_18June/{run_folder}_merged_with_goldstandard.csv')

    # Read the gold standard and predicted datasets
    goldstandard_df = pd.read_csv(goldstandard_path)
    predicted_df = pd.read_csv(predicted_path)

    # Perform outer join based on the "NCT Number" column
    merged_df = pd.merge(goldstandard_df, predicted_df, on="NCT Number", how="outer")

    # Save the merged DataFrame to a new CSV file
    merged_df.to_csv(output_path, index=False)

    print(f"The gold standard and predicted datasets for {model}, {shot} shots, {run_number} have been successfully merged with an outer join based on 'NCT Number'!")

    return merged_df

# Iterate over each model, shot, and run combination
all_merged_dfs = {}
for model in models:
    for shot in shots:
        merged_dfs = []
        for run_folder, run_number in runs:
            merged_df = merge_goldstandard_with_predicted(model, shot, run_folder, run_number)
            merged_dfs.append(merged_df)
        all_merged_dfs[(model, shot)] = merged_dfs




In [None]:
import pandas as pd
import os

models = ['GPT_gpt-3.5-turbo-1106', 'GPT_gpt-3.5-turbo-0125', 'GPT_gpt-4-turbo-2024-04-09']
shots = [0, 1, 2, 3, 4]
runs = [("AdjuvareDB104_T0.0_Run1", "Run1"), ("AdjuvareDB104_T0.0_Run2", "Run2"), ("AdjuvareDB104_T0.0_Run3", "Run3")]
goldstandard_path = 'Dataset/AdjuvareDB104_Standard/10_folds_preprocessed_merged_file.csv'

def merge_goldstandard_with_predicted(model, shot, run_folder, run_number):
    predicted_path = os.path.join('Output', f'Prompt2_merged_interventions_{shot}shot_postprocessed', model, f'AdjuvareDB104_T0.0_24June/{run_folder}_merged_file.csv')
    output_path = os.path.join('Output', f'Prompt2_merged_interventions_{shot}shot_postprocessed', model, f'AdjuvareDB104_T0.0_24June/{run_folder}_merged_with_goldstandard.csv')

    # Read the gold standard and predicted datasets
    goldstandard_df = pd.read_csv(goldstandard_path)
    predicted_df = pd.read_csv(predicted_path)

    # Perform outer join based on the "NCT Number" column
    merged_df = pd.merge(goldstandard_df, predicted_df, on="NCT Number", how="outer")

    # Save the merged DataFrame to a new CSV file
    merged_df.to_csv(output_path, index=False)

    print(f"The gold standard and predicted datasets for {model}, {shot} shots, {run_number} have been successfully merged with an outer join based on 'NCT Number'!")

    return merged_df

# Iterate over each model, shot, and run combination
all_merged_dfs = {}
for model in models:
    for shot in shots:
        merged_dfs = []
        for run_folder, run_number in runs:
            merged_df = merge_goldstandard_with_predicted(model, shot, run_folder, run_number)
            merged_dfs.append(merged_df)
        all_merged_dfs[(model, shot)] = merged_dfs



In [None]:
import pandas as pd
import os
# Load the CSV file to check its structure and the first few rows
file_path = 'Output/Prompt2_merged_interventions_0shot_postprocessed/GPT_gpt-3.5-turbo-1106/AdjuvareDB104_T0.0_18June/AdjuvareDB104_T0.0_Run1_merged_with_goldstandard.csv'
data = pd.read_csv(file_path)

nan_count = data['Adjuvant Name_x'].isna().sum()
print(f'Number of NaN values in Adjuvant Name_x column: {nan_count}')
df_cleaned = data.dropna(subset=['Adjuvant Name_x'])
#df_cleaned


nan_count = df_cleaned['Adjuvant Name_y'].isna().sum()
print(f'Number of NaN values in Adjuvant Name_y column: {nan_count}')

'''# Remove rows with NaN
df_cleaned = df_cleaned.dropna(subset=['Adjuvant Name_y'])'''
# Replace NaN values with "No output Returned"
df_cleaned['Adjuvant Name_y'].fillna("No output Returned", inplace=True)



df_cleaned_label = df_cleaned.copy()

# Initialize the 'Match' column with False
df_cleaned_label['Match'] = False


# Function to clean and split names
def clean_and_split(name):
    # Lowercase, replace hyphens with spaces, and split by "and"
    return [part.strip() for part in name.split(" and ")]

# Define a dictionary for abbreviation matching
abbreviation_dict = {
    "IFA": "Incomplete Freund's Adjuvant",
    "Incomplete Freund's adjuvant (IFA)": "Incomplete Freund's Adjuvant",
    "rhGM-CSF": "GM-CSF",
    "Sargramostim (GM-CSF)": "GM-CSF",
    "recombinant fowlpox GM-CSF vaccine adjuvant":"GM-CSF", 
    "granulocyte-macrophage colony-stimulating factor (GM-CSF)":"GM-CSF", 
    "Montanide ISA 51 VG": "Montanide ISA 51",
    "Montanide ISA-51 VG": "Montanide ISA 51",
    "MONTANIDE ISA 51 VG": "Montanide ISA 51",
    "Montanide ISA51 VG": "Montanide ISA51",
    
    "Interleukin-2": "IL-2",
    "polyinosinic-polycytidylic acid - poly-L-lysine carboxymethylcellulose (poly-ICLC)": "Poly-ICLC",
    "polyinosinic-polycytidylic acid - poly-L-lysine carboxymethylcellulose (poly-ICLC)": "Poly-ICLC",
    "Hiltonol (Poly-ICLC)": "Poly-ICLC",
    "gp96 heat shock protein-peptide complex": "GP96",
    "Therapeutic Vaccine GI-4000":"GI-4000"
    # Add more entries as needed
 
}
def resolve_abbreviation(name):
    """Resolve abbreviations using a predefined dictionary with robust matching."""
    # Lowercase the name for consistent dictionary lookup
    return abbreviation_dict.get(name, name)


# Loop through the DataFrame and compare the values using .loc for assignment
for index, row in df_cleaned_label.iterrows():
    parts_x = clean_and_split(row['Adjuvant Name_x'])
    parts_y = clean_and_split(row['Adjuvant Name_y'])
    
    match_found = False
    
    for part_x in parts_x:
        for part_y in parts_y:
            resolved_part_x = resolve_abbreviation(part_x)
            resolved_part_y = resolve_abbreviation(part_y)
            
            if resolved_part_x.lower() == resolved_part_y.lower():
                match_found = True
                break
            elif resolved_part_x.lower().replace("-", "").replace(" adjuvant vaccine", "").replace(" adjuvant system", "").replace(" adjuvant", "") == resolved_part_y.lower().replace("-", "").replace(" adjuvant vaccine", "").replace(" adjuvant system", "").replace(" adjuvant", ""):
                match_found = True
                break
            elif resolved_part_x.lower().replace("-", " ").replace(" adjuvant vaccine", "").replace(" adjuvant system", "").replace(" adjuvant", "") == resolved_part_y.lower().replace("-", " ").replace(" adjuvant vaccine", "").replace(" adjuvant system", "").replace(" adjuvant", ""):
                match_found = True
                break
            elif resolved_part_x.lower().replace(" ", "").replace("-", "").replace(" adjuvant vaccine", "").replace(" adjuvant system", "").replace(" adjuvant", "") == resolved_part_y.replace(" ", "").lower().replace("-", "").replace(" adjuvant vaccine", "").replace(" adjuvant system", "").replace(" adjuvant", ""):
                match_found = True
                break
                
        if match_found:
            break
    
    df_cleaned_label.loc[index, 'Match'] = match_found

#df_cleaned_label

# Filter the DataFrame to show only rows where 'Match' is False
no_match_data = df_cleaned_label[df_cleaned_label['Match'] == False]

# Optionally, print the rows where 'Match' is False to inspect them
print("Number of Wrong labels",len(no_match_data))
# Calculate accuracy
accuracy = df_cleaned_label['Match'].mean()
print("Accuracy", accuracy*100)
no_match_data


In [None]:
import pandas as pd
import os
import math
def process_file(file_path):
    data = pd.read_csv(file_path)

    halucination_count = data['Adjuvant Name_x'].isna().sum()
    #print(f'Number of NaN values in Adjuvant Name_x column: {halucination_count}')
    df_cleaned = data.dropna(subset=['Adjuvant Name_x'])
    #df_cleaned

    
    missing_output_count = df_cleaned['Adjuvant Name_y'].isna().sum()

    # Replace NaN values with "No output Returned"
    df_cleaned['Adjuvant Name_y'].fillna("No output Returned", inplace=True)

    df_cleaned_label = df_cleaned.copy()

    # Initialize the 'Match' column with False
    df_cleaned_label['Match'] = False


    # Function to clean and split names
    def clean_and_split(name):
        # Lowercase, replace hyphens with spaces, and split by "and"
        return [part.strip() for part in name.split(" and ")]

    # Define a dictionary for abbreviation matching
    abbreviation_dict = {
        "IFA": "Incomplete Freund's Adjuvant",
        "Incomplete Freund's adjuvant (IFA)": "Incomplete Freund's Adjuvant",
        "rhGM-CSF": "GM-CSF",
        "recombinant fowlpox GM-CSF vaccine adjuvant":"GM-CSF", 
        "Sargramostim (GM-CSF)": "GM-CSF",
        "granulocyte-macrophage colony-stimulating factor (GM-CSF)":"GM-CSF", 
        "Montanide ISA 51 VG": "Montanide ISA 51",
        "Montanide ISA-51 VG": "Montanide ISA 51",
        "MONTANIDE ISA 51 VG": "Montanide ISA 51",
        "Montanide ISA51 VG": "Montanide ISA51",
        
        "Interleukin-2": "IL-2",
        "polyinosinic-polycytidylic acid - poly-L-lysine carboxymethylcellulose (poly-ICLC)": "Poly-ICLC",
        "Hiltonol (Poly-ICLC)": "Poly-ICLC",
        "gp96 heat shock protein-peptide complex": "GP96",
        "Therapeutic Vaccine GI-4000":"GI-4000" 
        # Add more entries as needed
    }
    def resolve_abbreviation(name):
        """Resolve abbreviations using a predefined dictionary with robust matching."""
        # Lowercase the name for consistent dictionary lookup
        return abbreviation_dict.get(name, name)


    # Loop through the DataFrame and compare the values using .loc for assignment
    for index, row in df_cleaned_label.iterrows():
        parts_x = clean_and_split(row['Adjuvant Name_x'])
        parts_y = clean_and_split(row['Adjuvant Name_y'])

        match_found = False

        for part_x in parts_x:
            for part_y in parts_y:
                resolved_part_x = resolve_abbreviation(part_x)
                resolved_part_y = resolve_abbreviation(part_y)

                if resolved_part_x.lower() == resolved_part_y.lower():
                    match_found = True
                    break
                elif resolved_part_x.lower().replace("-", "").replace(" adjuvant vaccine", "").replace(" adjuvant system", "").replace(" adjuvant", "") == resolved_part_y.lower().replace("-", "").replace(" adjuvant vaccine", "").replace(" adjuvant system", "").replace(" adjuvant", ""):
                    match_found = True
                    break
                elif resolved_part_x.lower().replace("-", " ").replace(" adjuvant vaccine", "").replace(" adjuvant system", "").replace(" adjuvant", "") == resolved_part_y.lower().replace("-", " ").replace(" adjuvant vaccine", "").replace(" adjuvant system", "").replace(" adjuvant", ""):
                    match_found = True
                    break
                elif resolved_part_x.lower().replace(" ", "").replace("-", "").replace(" adjuvant vaccine", "").replace(" adjuvant system", "").replace(" adjuvant", "") == resolved_part_y.replace(" ", "").lower().replace("-", "").replace(" adjuvant vaccine", "").replace(" adjuvant system", "").replace(" adjuvant", ""):
                    match_found = True
                    break

            if match_found:
                break

        df_cleaned_label.loc[index, 'Match'] = match_found
        
    return df_cleaned_label, halucination_count, missing_output_count

prompt_name = ["Prompt2_merged_0shot_postprocessed", "Prompt2_merged_1shot_postprocessed", "Prompt2_merged_2shot_postprocessed", "Prompt2_merged_3shot_postprocessed", 
               "Prompt2_merged_4shot_postprocessed", "Prompt2_merged_interventions_0shot_postprocessed", "Prompt2_merged_interventions_1shot_postprocessed", 
               "Prompt2_merged_interventions_2shot_postprocessed","Prompt2_merged_interventions_3shot_postprocessed", "Prompt2_merged_interventions_4shot_postprocessed"] 
model_name = ["GPT_gpt-3.5-turbo-1106", "GPT_gpt-3.5-turbo-0125", "GPT_gpt-4-turbo-2024-04-09"]
run_no = ["1","2","3"]

base_path = "Output"

# List to store results
results = []
no_match_data_frames = []


# Process each file and store the results
for prompt in prompt_name:
    for model in model_name:
        accuracies = []
        precisions = []
        recalls = []
        f1_scores = []
        row_counts = []
        halucination_counts = []
        missing_output_counts = []
        
        
        for run in run_no:
            and_data_frames = []
            
            if "interventions" in prompt:
                file_path = f"{base_path}/{prompt}/{model}/AdjuvareDB104_T0.0_24June/AdjuvareDB104_T0.0_Run{run}_merged_with_goldstandard.csv"
            else:
                file_path = f"{base_path}/{prompt}/{model}/AdjuvareDB104_T0.0_18June/AdjuvareDB104_T0.0_Run{run}_merged_with_goldstandard.csv"
            
            df_cleaned, halucination_count, missing_output_count = process_file(file_path)
            df_cleaned.to_csv(f"plot/output/df_cleaned_{prompt}_{model}_{run}.csv", index=False)
            
            no_match_data = df_cleaned[df_cleaned['Match'] == False]
            no_match_data_frames.append(no_match_data)
            
            and_data = df_cleaned[df_cleaned["Adjuvant Name_x"].str.contains(" and ", na=False)]
            and_data_frames.append(and_data)
            
            if df_cleaned is not None:
                accuracy = df_cleaned['Match'].mean() * 100
                row_count = len(df_cleaned)
                
                
                #### Count Missed
                # Concatenate the list of DataFrames into a single DataFrame
                combined_df = pd.concat(and_data_frames, ignore_index=True)
                # Specify the column to group by
                group_column = 'NCT Number'
                # Grouping the combined dataframe based on the specified column
                grouped = combined_df.groupby(group_column).size()
                # Identifying groups where the occurrence of rows is less than two
                missed_groups = grouped[grouped < 2]
                #print(combined_df)
                # Counting the total number of missed groups
                total_missed = len(missed_groups)
                #print(f'Total number of missed groups: {total_missed} {file_path}')
 
                ### Count nonspecific
                nonspecific_count_1 = (df_cleaned["Adjuvant Name_y"] == "adjuvant therapy").sum()
                nonspecific_count_2 = (df_cleaned["Adjuvant Name_y"] == "immunologic adjuvant").sum()
                nonspecific_count_3 = (df_cleaned["Adjuvant Name_y"] == "immunotherapy").sum()
                
                total_nonspecific_count = nonspecific_count_1 + nonspecific_count_2+nonspecific_count_3
                
                
                #Total = row_count-total_none_count+total_missed
                Total = row_count+total_missed
                TP = df_cleaned['Match'].sum()  # True positives
                FP = len(df_cleaned) - TP  # False positives
                FN = 0  
                precision = (TP-total_nonspecific_count)/TP
                recall = TP / Total if Total != 0 else 0
                f1_score = 2 * (precision * recall) / (precision + recall) if (precision + recall) != 0 else 0
                results.append({
                    "Prompt": prompt,
                    "Model": model,
                    "Run": run,
                    "halucination_count": int(halucination_count),
                    "missing_output_count": int(missing_output_count),
                    "Accuracy": round(accuracy, 2),
                    "Precision": round(precision*100, 2),
                    "Recall": round(recall*100, 2),
                    "F1 Score": round(f1_score*100, 2),
                })
                accuracies.append(accuracy)
                precisions.append(precision)
                recalls.append(recall)
                f1_scores.append(f1_score)
                row_counts.append(row_count)
                halucination_counts.append(halucination_count)
                missing_output_counts.append(missing_output_count)
                
                
        if accuracies:
            average_accuracy = sum(accuracies) / len(accuracies)
            
            average_precision = sum(precisions) / len(precisions)
            average_recall = sum(recalls) / len(recalls)
            average_f1_score = sum(f1_scores) / len(f1_scores)
            
            average_halucination_count = sum(halucination_counts) / len(halucination_counts)
            average_missing_output_count = sum(missing_output_counts) / len(missing_output_counts)
            
            results.append({
                "Prompt": prompt,
                "Model": model,
                "Run": "Average",
                "halucination_count": round(average_halucination_count,2),
                "missing_output_count": round(average_missing_output_count,2),
                "Accuracy": round(average_accuracy, 2),
                "Precision": round(average_precision*100, 2),
                "Recall": round(average_recall*100, 2),
                "F1 Score": round(average_f1_score*100, 2),
            })

# Convert results to a DataFrame
results_df = pd.DataFrame(results)
no_match_df = pd.concat(no_match_data_frames, ignore_index=True)
and_df = pd.concat(and_data_frames, ignore_index=True)

and_df

In [None]:
results_df

In [None]:
and_df.to_csv("plot/and_df.csv", index=False)

In [None]:
# Save the results as a CSV file
results_df.to_csv("plot/accuracy_and_wrong_labels_results.csv", index=False)

In [None]:
unique_no_match_df = no_match_df.drop_duplicates()
unique_no_match_df.to_csv("plot/unique_mismatched cases.csv", index=False)
unique_no_match_df

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np

# Filter for rows where Run is "Average"
average_df = results_df[results_df["Run"] == "Average"]

# Function to plot bar chart
def plot_bar_chart(df, value_col, y_label, title):
    folder_name = "plot"
    filename = value_col +".png"
    # Construct the full path
    full_path = os.path.join(folder_name, filename)
    
    plt.figure(figsize=(14, 7))
    x = np.arange(len(df["Prompt"].unique()))  # the label locations
    width = 0.2  # the width of the bars

    models = df["Model"].unique()
    offsets = (np.arange(len(models)) - len(models)/2) * width  # bar offsets for each model

    for i, model in enumerate(models):
        model_data = df[df["Model"] == model]
        plt.bar(x + offsets[i], model_data[value_col], width, label=model)

    plt.xlabel("Prompt")
    plt.ylabel(y_label)
    plt.title(title)
    plt.xticks(x, df["Prompt"].unique(), rotation=45, ha='right')
    plt.legend(title="Model")
    plt.tight_layout()
    plt.savefig(full_path)
    plt.show()

# Plot the average accuracy bar chart
plot_bar_chart(average_df, "Accuracy", "Average Accuracy (%)", "Average Accuracy for Each Prompt and Model Combination")

# Plot the average accuracy bar chart
plot_bar_chart(average_df, "Precision", "Average Precision (%)", "Average Precision for Each Prompt and Model Combination")

# Plot the average accuracy bar chart
plot_bar_chart(average_df, "Recall", "Average Recall (%)", "Average Recall for Each Prompt and Model Combination")

# Plot the average accuracy bar chart
plot_bar_chart(average_df, "F1 Score", "Average F1 Score (%)", "Average F1 Score for Each Prompt and Model Combination")

# Plot the average wrong labels count bar chart
plot_bar_chart(average_df, "halucination_count", "Halucination Count", "Average Halucination Count for Each Prompt and Model Combination")


# Plot the average wrong labels count bar chart
plot_bar_chart(average_df, "missing_output_count", "Missing Output Count", "Average Missing Output Count Count for Each Prompt and Model Combination")


# Plot the average wrong labels count bar chart
plot_bar_chart(average_df, "Considered Rows", "Considered Rows", "Average Considered Rows Count for Each Prompt and Model Combination")


In [None]:
import pandas as pd

# Load the dataset
file_path = 'plot/accuracy_and_wrong_labels_results_1.csv'
df = pd.read_csv(file_path)

df = pd.read_csv(file_path)

# Filter out the rows where Run is "Average"
average_rows = df[df['Run'] == 'Average']

# Group the data by the 'Model' column
grouped_by_model = average_rows.groupby('Model')

# Dictionary to store the highest stats for each model
max_stats_per_model = {}

# Loop through each model group
for model, group in grouped_by_model:
    # Find the row with the highest precision, recall, and F1 score
    max_precision_row = group.loc[group['Precision'].idxmax()]
    max_recall_row = group.loc[group['Recall'].idxmax()]
    max_f1_score_row = group.loc[group['F1 Score'].idxmax()]
    
    # Store the results in the dictionary
    max_stats_per_model[model] = {
        'Highest Precision': max_precision_row,
        'Highest Recall': max_recall_row,
        'Highest F1 Score': max_f1_score_row
    }

# Display the results for each model
for model, stats in max_stats_per_model.items():
    print(f"\nModel: {model}")
    print("\nHighest Precision:")
    print(stats['Highest Precision'])
    print("\nHighest Recall:")
    print(stats['Highest Recall'])
    print("\nHighest F1 Score:")
    print(stats['Highest F1 Score'])

In [None]:
# Prepare a list to store the results for the dataframe
results = []

# Loop through each model group and append the highest stats
for model, stats in max_stats_per_model.items():
    # Add highest precision row
    results.append({
        'Model': model,
        'Prompt': stats['Highest Precision']['Prompt'],
        'Metric': 'Highest Precision',
        'Precision': stats['Highest Precision']['Precision'],
        'Recall': stats['Highest Precision']['Recall'],
        'F1 Score': stats['Highest Precision']['F1 Score']
    })
    
    # Add highest recall row
    results.append({
        'Model': model,
        'Prompt': stats['Highest Recall']['Prompt'],
        'Metric': 'Highest Recall',
        'Precision': stats['Highest Recall']['Precision'],
        'Recall': stats['Highest Recall']['Recall'],
        'F1 Score': stats['Highest Recall']['F1 Score']
    })
    
    # Add highest F1 score row
    results.append({
        'Model': model,
        'Prompt': stats['Highest F1 Score']['Prompt'],
        'Metric': 'Highest F1 Score',
        'Precision': stats['Highest F1 Score']['Precision'],
        'Recall': stats['Highest F1 Score']['Recall'],
        'F1 Score': stats['Highest F1 Score']['F1 Score']
    })

# Convert the results into a dataframe
results_df = pd.DataFrame(results)

results_df