In [2]:
import pandas as pd

# Load the Excel file containing the raw evaluation data
file_path = 'Evaluated_Questions_few_shot.xlsx'
evaluations_df = pd.read_excel(file_path)

# Define the parsing function for the raw evaluation text
def parse_criteria_output(raw_output):
    # Initialize a dictionary to store parsed data
    parsed_data = {}
    
    # Split the output into lines
    lines = raw_output.strip().split("\n")
    
    for line in lines:
        if line.startswith("Criterion:"):
            # Extract Criterion, Grade, and Reason
            parts = line.split("|")
            criterion = parts[0].replace("Criterion:", "").strip()
            grade = parts[1].replace("Grade:", "").strip()
            reason = parts[2].replace("Reason:", "").strip()
            
            # Store in dictionary
            parsed_data[f"{criterion} Grade"] = grade
            parsed_data[f"{criterion} Reason"] = reason
        elif line.startswith("Total Percentage:"):
            # Extract total percentage
            total_percentage = line.split(":")[1].strip().replace("%", "")
            parsed_data["Total Percentage"] = total_percentage
    
    return parsed_data

# Apply parsing to each row
parsed_evaluations = []
for index, row in evaluations_df.iterrows():
    raw_output = row.get("Raw Evaluation", "")
    parsed_data = parse_criteria_output(raw_output)
    
    # Retain the first 3 columns
    parsed_data["Question"] = row["Question"]
    parsed_data["Choices"] = row["Choices"]
    parsed_data["Answer"] = row["Answer"]
    
    parsed_evaluations.append(parsed_data)

# Convert parsed data to DataFrame
parsed_df = pd.DataFrame(parsed_evaluations)

# Ensure the first three columns appear at the beginning
columns_order = ["Question", "Choices", "Answer"] + [col for col in parsed_df.columns if col not in ["Question", "Choices", "Answer"]]
parsed_df = parsed_df[columns_order]

# Save the parsed data to a new Excel file
output_file_path = 'Parsed_Evaluated_Questions_few_shot.xlsx'
parsed_df.to_excel(output_file_path, index=False)

print(f"Parsing complete. Parsed data saved to {output_file_path}.")


Parsing complete. Parsed data saved to Parsed_Evaluated_Questions_few_shot.xlsx.
