In [1]:
!pip install pandas



In [2]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [9]:
## SST-2

import pandas as pd

# Load the CSV file
file_path = '/content/drive/MyDrive/Algoverse/New Results/Evaluation Results/Gemini-1.5-Pro Eval Results/SST-2/SST-2_Evaluation_Gemini_F.csv'
data = pd.read_csv(file_path)

# Ensure these column names exist in the CSV
required_columns = ['SAE Answer', 'AAVE Answer', 'Actual Label']
if not all(column in data.columns for column in required_columns):
    raise ValueError("The CSV file must contain 'SAE Answer', 'AAVE Answer', and 'Actual Label' columns.")

# Calculate accuracy for SAE Answer
sae_correct = (data['SAE Answer'] == data['Actual Label']).sum()
sae_accuracy = sae_correct / len(data) * 100

# Calculate accuracy for AAVE Answer
aave_correct = (data['AAVE Answer'] == data['Actual Label']).sum()
aave_accuracy = aave_correct / len(data) * 100

# Display results
print(f"SAE Answer Accuracy: {sae_accuracy:.2f}%")
print(f"AAVE Answer Accuracy: {aave_accuracy:.2f}%")


SAE Answer Accuracy: 92.00%
AAVE Answer Accuracy: 91.40%


In [11]:
## CoLa

import pandas as pd

# Load the CSV file
file_path = '/content/drive/MyDrive/Algoverse/New Results/Evaluation Results/Gemini-1.5-Pro Eval Results/CoLa/CoLA_Evaluation_Gemini_F.csv'
data = pd.read_csv(file_path)

# Ensure these column names exist in the CSV
required_columns = ['Actual Label', 'SAE Answer', 'AAVE Answer']
if not all(column in data.columns for column in required_columns):
    raise ValueError("The CSV file must contain 'Actual Label', 'SAE Answer', and 'AAVE Answer' columns.")

# Calculate accuracy for SAE Answer
sae_correct = (data['SAE Answer'] == data['Actual Label']).sum()
sae_accuracy = sae_correct / len(data) * 100

# Calculate accuracy for AAVE Answer
aave_correct = (data['AAVE Answer'] == data['Actual Label']).sum()
aave_accuracy = aave_correct / len(data) * 100

# Display results
print(f"SAE Answer Accuracy: {sae_accuracy:.2f}%")
print(f"AAVE Answer Accuracy: {aave_accuracy:.2f}%")

SAE Answer Accuracy: 80.20%
AAVE Answer Accuracy: 41.20%


In [3]:
# BoolQ

import pandas as pd
import re

# Load the CSV file
file_path = '/content/drive/MyDrive/Algoverse/New Results/Evaluation Results/Gemini-1.5-Flash Eval results/BoolQ/BoolQ_Evaluation_GeminiF.csv'
data = pd.read_csv(file_path)

# Ensure these column names exist in the CSV
required_columns = ['Actual Label', 'SAE Answer', 'AAVE Answer']
if not all(column in data.columns for column in required_columns):
    raise ValueError("The CSV file must contain 'Actual Label', 'SAE Answer', and 'AAVE Answer' columns.")

# Function to clean and standardize answer values
def clean_answer(answer):
    # Check if the answer is already a boolean
    if isinstance(answer, bool):
        return 'true' if answer else 'false'
    # Convert answer to lowercase for uniform comparison
    answer = str(answer).strip().lower()
    # Use regex to determine if the answer is 'true' or 'false'
    if re.match(r'^true$', answer, re.IGNORECASE):
        return 'true'
    elif re.match(r'^false$', answer, re.IGNORECASE):
        return 'false'
    else:
        # If answer is neither 'true' nor 'false', handle accordingly
        raise ValueError(f"Unexpected answer format: {answer}")

# Clean and standardize the answers in each column
data['Actual Label'] = data['Actual Label'].apply(clean_answer)
data['SAE Answer'] = data['SAE Answer'].apply(clean_answer)
data['AAVE Answer'] = data['AAVE Answer'].apply(clean_answer)

# Calculate accuracy for SAE Answer
sae_correct = (data['SAE Answer'] == data['Actual Label']).sum()
sae_accuracy = sae_correct / len(data) * 100

# Calculate accuracy for AAVE Answer
aave_correct = (data['AAVE Answer'] == data['Actual Label']).sum()
aave_accuracy = aave_correct / len(data) * 100

# Display results
print(f"SAE Answer Accuracy: {sae_accuracy:.2f}%")
print(f"AAVE Answer Accuracy: {aave_accuracy:.2f}%")


SAE Answer Accuracy: 89.69%
AAVE Answer Accuracy: 87.29%


In [None]:
# MultiRC

import pandas as pd
import re

# Load the CSV file
file_path = '/content/drive/MyDrive/Algoverse/New Results/Evaluation Results/GPT-4-turbo Eval Results/MultiRC/MultiRC_Evaluation_GPT4Turbo.csv'
data = pd.read_csv(file_path)

# Strip whitespace from column names
data.columns = data.columns.str.strip()

# Ensure these column names exist in the CSV
required_columns = ['Actual Answer', 'SAE Answer', 'AAVE Answer']
if not all(column in data.columns for column in required_columns):
    raise ValueError("The CSV file must contain 'Actual Answer', 'SAE Answer', and 'AAVE Answer' columns.")

# Function to clean and standardize answer values
def clean_answer(answer):
    # Check if the answer is already an integer (0 or 1)
    if isinstance(answer, int) and answer in (0, 1):
        return answer
    # Convert to string and use regex to find a number
    answer_str = str(answer)
    match = re.search(r'[01]', answer_str)
    if match:
        return int(match.group(0))
    else:
        # If no valid number is found, raise an error
        raise ValueError(f"Unexpected answer format: {answer}")

# Clean and standardize the answers in each column
data['Actual Answer'] = data['Actual Answer'].apply(clean_answer)
data['SAE Answer'] = data['SAE Answer'].apply(clean_answer)
data['AAVE Answer'] = data['AAVE Answer'].apply(clean_answer)

# Calculate accuracy for SAE Answer
sae_correct = (data['SAE Answer'] == data['Actual Answer']).sum()
sae_accuracy = sae_correct / len(data) * 100

# Calculate accuracy for AAVE Answer
aave_correct = (data['AAVE Answer'] == data['Actual Answer']).sum()
aave_accuracy = aave_correct / len(data) * 100

# Display results
print(f"SAE Answer Accuracy: {sae_accuracy:.2f}%")
print(f"AAVE Answer Accuracy: {aave_accuracy:.2f}%")

SAE Answer Accuracy: 86.20%
AAVE Answer Accuracy: 73.70%


In [10]:
# COPA

import pandas as pd
import re

# Load the CSV file
file_path = '/content/drive/MyDrive/Algoverse/New Results/Evaluation Results/Gemini-1.5-Pro Eval Results/COPA/COPA_Evaluation_Gemini.csv'
data = pd.read_csv(file_path)

# Strip whitespace from column names
data.columns = data.columns.str.strip()

# Ensure these column names exist in the CSV
required_columns = ['Actual Answer', 'SAE Answer', 'AAVE Answer']
if not all(column in data.columns for column in required_columns):
    raise ValueError("The CSV file must contain 'Actual Answer', 'SAE Answer', and 'AAVE Answer' columns.")

# Function to clean and standardize answer values
def clean_answer(answer):
    # Check if the answer is already an integer (0 or 1)
    if isinstance(answer, int) and answer in (0, 1):
        return answer
    # Convert to string and use regex to find a number
    answer_str = str(answer)
    match = re.search(r'[01]', answer_str)
    if match:
        return int(match.group(0))
    else:
        # If no valid number is found, raise an error
        raise ValueError(f"Unexpected answer format: {answer}")

# Clean and standardize the answers in each column
data['Actual Answer'] = data['Actual Answer'].apply(clean_answer)
data['SAE Answer'] = data['SAE Answer'].apply(clean_answer)
data['AAVE Answer'] = data['AAVE Answer'].apply(clean_answer)

# Calculate accuracy for SAE Answer
sae_correct = (data['SAE Answer'] == data['Actual Answer']).sum()
sae_accuracy = sae_correct / len(data) * 100

# Calculate accuracy for AAVE Answer
aave_correct = (data['AAVE Answer'] == data['Actual Answer']).sum()
aave_accuracy = aave_correct / len(data) * 100

# Display results
print(f"SAE Answer Accuracy: {sae_accuracy:.2f}%")
print(f"AAVE Answer Accuracy: {aave_accuracy:.2f}%")

SAE Answer Accuracy: 97.40%
AAVE Answer Accuracy: 95.80%


In [8]:
## WSC

import pandas as pd

# Load the CSV file
file_path = '/content/drive/MyDrive/Algoverse/New Results/Evaluation Results/Gemini-1.5-Pro Eval Results/WSC/WSC_Evaluation_GeminiF.csv'
data = pd.read_csv(file_path)

# Ensure these column names exist in the CSV
required_columns = ['Actual Label', 'SAE Answer', 'AAVE Answer']
if not all(column in data.columns for column in required_columns):
    raise ValueError("The CSV file must contain 'Actual Label', 'SAE Answer', and 'AAVE Answer' columns.")

# Calculate accuracy for SAE Answer
sae_correct = (data['SAE Answer'] == data['Actual Label']).sum()
sae_accuracy = sae_correct / len(data) * 100

# Calculate accuracy for AAVE Answer
aave_correct = (data['AAVE Answer'] == data['Actual Label']).sum()
aave_accuracy = aave_correct / len(data) * 100

# Display results
print(f"SAE Answer Accuracy: {sae_accuracy:.2f}%")
print(f"AAVE Answer Accuracy: {aave_accuracy:.2f}%")

SAE Answer Accuracy: 51.37%
AAVE Answer Accuracy: 51.22%
