In [9]:
import os
import json

# Define the two folders to compare

antistereotype_folder = "bias-gender-professions-dataset/antistereo-baseline"
stereotype_folder = "bias-gender-professions-dataset/prostereo-baseline"
nonstereo_folder = "bias-gender-professions-dataset/nonstereo-baseline"
folders = [antistereotype_folder, stereotype_folder, nonstereo_folder]

# Define label mappings for clarity
label_mapping = {0: "entailment", 1: "neutral", 2: "contradiction"}

def count_labels(file_path):
    """Counts the labels in a JSONL file."""
    label_counts = {0: 0, 1: 0, 2: 0}
    with open(file_path, "r") as f:
        for line in f:
            prediction = json.loads(line)
            predicted_label = prediction.get("predicted_label")
            if predicted_label in label_counts:
                label_counts[predicted_label] += 1
    return label_counts

# Process each folder
results = {}
for folder in folders:
    eval_file_path = os.path.join(folder, "eval_predictions.jsonl")
    if os.path.exists(eval_file_path):
        results[folder] = count_labels(eval_file_path)
    else:
        print(f"File not found: {eval_file_path}")

# Display the results
for folder, label_counts in results.items():
    total = sum(label_counts.values())
    print(f"Results for {folder}:")
    for label, count in label_counts.items():
        percentage = (count / total) * 100 if total > 0 else 0
        print(f"  {label_mapping[label]}: {count} ({percentage:.2f}%)")
    print()
    

Results for bias-gender-professions-dataset/antistereo-baseline:
  entailment: 9 (0.90%)
  neutral: 232 (23.20%)
  contradiction: 759 (75.90%)

Results for bias-gender-professions-dataset/prostereo-baseline:
  entailment: 680 (68.00%)
  neutral: 301 (30.10%)
  contradiction: 19 (1.90%)

Results for bias-gender-professions-dataset/nonstereo-baseline:
  entailment: 795 (23.25%)
  neutral: 1621 (47.40%)
  contradiction: 1004 (29.36%)



In [None]:

# Define the two folders to compare
antistereotype_folder = "results-stereoset-gender-antistereotype-baseline"
stereotype_folder = "results-stereoset-gender-stereotype-baseline"

# Define file paths for eval_predictions.jsonl
antistereotype_file = os.path.join(antistereotype_folder, "eval_predictions.jsonl")
stereotype_file = os.path.join(stereotype_folder, "eval_predictions.jsonl")

def load_predictions(file_path):
    """Loads predictions from a JSONL file into a list."""
    predictions = []
    with open(file_path, "r") as f:
        for line in f:
            predictions.append(json.loads(line))
    return predictions

# Ensure both files exist
if not os.path.exists(antistereotype_file):
    raise FileNotFoundError(f"File not found: {antistereotype_file}")
if not os.path.exists(stereotype_file):
    raise FileNotFoundError(f"File not found: {stereotype_file}")

# Load predictions
antistereotype_predictions = load_predictions(antistereotype_file)
stereotype_predictions = load_predictions(stereotype_file)

# Compare predictions and collect differences
differences = []
for anti_pred, stereo_pred in zip(antistereotype_predictions, stereotype_predictions):
    if anti_pred["predicted_label"] != stereo_pred["predicted_label"]:
        differences.append({
            "premise": anti_pred.get("premise", "N/A"),
            "antistereotype_hypothesis": anti_pred.get("hypothesis", "N/A"),
            "antistereotype_prediction": anti_pred["predicted_label"],
            "stereotype_hypothesis": stereo_pred.get("hypothesis", "N/A"),
            "stereotype_prediction": stereo_pred["predicted_label"]
        })

# Display differences
print(f"Examples where predictions differ between antistereotype and stereotype:")
if differences:
    for diff in differences:
        print(json.dumps(diff, indent=2))
else:
    print("No differences found.")