In [5]:
import os
import pandas as pd
import glob

def is_correct_prediction(target, predicted):
    if target == 'entailment' and predicted == 'LABEL_0':
        return True
    elif target == 'non-entailment' and (predicted == 'LABEL_1' or predicted == 'LABEL_2'): #Label1 and label2 both count as non-entailment
        return True
    else:
        return False


def calculate_accuracy_by_heuristic(tsv_file):
    results_df = pd.read_csv(tsv_file, sep='\t')


    correct_lexical_overlap = 0
    correct_subsequence = 0
    correct_constituent = 0
    incorrect_lexical_overlap = 0
    incorrect_subsequence = 0
    incorrect_constituent = 0
    total_lexical_overlap = 0
    total_subsequence = 0
    total_constituent = 0

    for index, row in results_df.iterrows():
        correct = is_correct_prediction(row['target'], row['predicted'])
        heuristic = row['condition'] 

        if heuristic == 'lexical_overlap':
            total_lexical_overlap += 1
            if correct:
                correct_lexical_overlap += 1
            else:
                incorrect_lexical_overlap += 1
        elif heuristic == 'subsequence':
            total_subsequence += 1
            if correct:
                correct_subsequence += 1
            else:
                incorrect_subsequence += 1
        elif heuristic == 'constituent':
            total_constituent += 1
            if correct:
                correct_constituent += 1
            else:
                incorrect_constituent += 1

    correct_accuracy_lexical = correct_lexical_overlap / total_lexical_overlap if total_lexical_overlap > 0 else 0
    correct_accuracy_subsequence = correct_subsequence / total_subsequence if total_subsequence > 0 else 0
    correct_accuracy_constituent = correct_constituent / total_constituent if total_constituent > 0 else 0

    incorrect_accuracy_lexical = incorrect_lexical_overlap / total_lexical_overlap if total_lexical_overlap > 0 else 0
    incorrect_accuracy_subsequence = incorrect_subsequence / total_subsequence if total_subsequence > 0 else 0
    incorrect_accuracy_constituent = incorrect_constituent / total_constituent if total_constituent > 0 else 0

    return {
        'correct_lexical_overlap': correct_accuracy_lexical * 100,
        'correct_subsequence': correct_accuracy_subsequence * 100,
        'correct_constituent': correct_accuracy_constituent * 100,
        'incorrect_lexical_overlap': incorrect_accuracy_lexical * 100,
        'incorrect_subsequence': incorrect_accuracy_subsequence * 100,
        'incorrect_constituent': incorrect_accuracy_constituent * 100
    }

results_folder = 'midterm_files/results'

tsv_files = glob.glob(os.path.join(results_folder, '*.tsv'))

accuracy_results = []

# Calculate and store accuracy for each file, broken down by heuristics
for tsv_file in tsv_files:
    heuristic_accuracy = calculate_accuracy_by_heuristic(tsv_file)
    heuristic_accuracy['file'] = tsv_file  # Add the filename to the results
    accuracy_results.append(heuristic_accuracy)

# Convert the results into a DataFrame
results_df = pd.DataFrame(accuracy_results)

# Format the columns
results_df = results_df[['file', 'correct_lexical_overlap', 'correct_subsequence', 'correct_constituent', 
                         'incorrect_lexical_overlap', 'incorrect_subsequence', 'incorrect_constituent']]

# Save the final table 
results_df.to_csv('hans_accuracy_by_heuristic.csv', index=False)

print("Heuristic-based accuracy results saved to hans_accuracy_by_heuristic.csv'.")


Heuristic-based accuracy results saved to accuracy_results.csv'.
