<a href="https://colab.research.google.com/github/jamesalv/HateDeRC/blob/master/HateDeRC_Full.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import shutil
import os

# Delete HateDeRC directory if it exists
if os.path.exists('HateDeRC'):
  shutil.rmtree('HateDeRC')
!git clone https://github.com/jamesalv/HateDeRC
%cd HateDeRC

Cloning into 'HateDeRC'...
remote: Enumerating objects: 79, done.[K
remote: Counting objects: 100% (79/79), done.[K
remote: Compressing objects: 100% (59/59), done.[K
remote: Total 79 (delta 35), reused 53 (delta 18), pack-reused 0 (from 0)[K
Receiving objects: 100% (79/79), 5.26 MiB | 5.31 MiB/s, done.
Resolving deltas: 100% (35/35), done.
/content/HateDeRC


In [2]:
from TrainingConfig import TrainingConfig
from typing import Dict, Any, Tuple, List
import numpy as np
import torch
from transformers import AutoTokenizer
import json

In [3]:
data_path = 'Data/dataset.json'

In [4]:
config = TrainingConfig()

In [5]:
# Seed all randomness for reproducibility
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
torch.manual_seed(config.seed)
if device.type == 'cuda':
    torch.cuda.manual_seed_all(config.seed)
np.random.seed(config.seed)

# Preprocessing

In [6]:
import re
import string

def deobfuscate_text(text):
    """
    Normalize common text obfuscation patterns to reveal original words.
    Useful for hate speech detection and content analysis.

    Args:
        text (str): Input text with potential obfuscations

    Returns:
        str: Text with obfuscations normalized
    """
    if not isinstance(text, str):
        return text

    # Make a copy to work with
    result = text.lower()

    # 1. Handle asterisk/symbol replacements
    symbol_patterns = {
        # Common profanity
        r'f\*+c?k': 'fuck',
        r'f\*+': 'fuck',
        r's\*+t': 'shit',
        r'b\*+ch': 'bitch',
        r'a\*+s': 'ass',
        r'd\*+n': 'damn',
        r'h\*+l': 'hell',
        r'c\*+p': 'crap',

        # Slurs and hate speech terms (be comprehensive for detection)
        r'n\*+g+[aer]+': 'nigger',  # Various n-word obfuscations
        r'f\*+g+[ot]*': 'faggot',
        r'r\*+[dt]ard': 'retard',
        r'sp\*+c': 'spic',

        # Other symbols
        r'@ss': 'ass',
        r'b@tch': 'bitch',
        r'sh!t': 'shit',
        r'f#ck': 'fuck',
        r'd@mn': 'damn',
    }

    for pattern, replacement in symbol_patterns.items():
        result = re.sub(pattern, replacement, result, flags=re.IGNORECASE)

    # 2. Handle character spacing (f u c k -> fuck)
    spacing_patterns = {
        r'\bf\s+u\s+c\s+k\b': 'fuck',
        r'\bs\s+h\s+i\s+t\b': 'shit',
        r'\bd\s+a\s+m\s+n\b': 'damn',
        r'\bh\s+e\s+l\s+l\b': 'hell',
        r'\ba\s+s\s+s\b': 'ass',
        r'\bc\s+r\s+a\s+p\b': 'crap',
    }

    for pattern, replacement in spacing_patterns.items():
        result = re.sub(pattern, replacement, result, flags=re.IGNORECASE)

    # 3. Handle number/letter substitutions
    leet_patterns = {
        # Basic leet speak
        r'\b3\s*1\s*1\s*3\b': 'elle',  # 3113 -> elle
        r'\bf4g\b': 'fag',
        r'\bf4gg0t\b': 'faggot',
        r'\bn00b\b': 'noob',
        r'\bl33t\b': 'leet',
        r'\bh4t3\b': 'hate',
        r'\b5h1t\b': 'shit',
        r'\bf0ck\b': 'fock',
    }

    for pattern, replacement in leet_patterns.items():
        result = re.sub(pattern, replacement, result, flags=re.IGNORECASE)

    # 4. Handle repeated characters and separators
    # Remove excessive punctuation between letters
    result = re.sub(r'([a-z])[^\w\s]+([a-z])', r'\1\2', result)

    # Handle underscore separation
    result = re.sub(r'([a-z])_+([a-z])', r'\1\2', result)

    # Handle dot separation
    result = re.sub(r'([a-z])\.+([a-z])', r'\1\2', result)

    # 5. Handle common misspellings/variations used for evasion
    evasion_patterns = {
        r'\bfuk\b': 'fuck',
        r'\bfuq\b': 'fuck',
        r'\bfck\b': 'fuck',
        r'\bshyt\b': 'shit',
        r'\bshit\b': 'shit',
        r'\bbiatch\b': 'bitch',
        r'\bbeatch\b': 'bitch',
        r'\basshole\b': 'asshole',
        r'\ba55hole\b': 'asshole',
        r'\btard\b': 'retard',
        r'\bfagg\b': 'fag',
    }

    for pattern, replacement in evasion_patterns.items():
        result = re.sub(pattern, replacement, result, flags=re.IGNORECASE)

    # 6. Clean up multiple spaces
    result = re.sub(r'\s+', ' ', result).strip()

    return result

In [7]:
def aggregate_rationales(rationales, labels, post_length, drop_abnormal=False):
    """
    If all 3 annotators are normal → 3 zero spans → average (all zeros).
    If k annotators are non-normal and k spans exist → average the k spans (no added zeros).
    If k non-normal but fewer than k spans:
        If the missing annotators are non-normal → do not fill with zeros; average only existing spans and record rationale_support = #spans.
        If the missing annotators are normal (e.g., 2 hate + 1 normal + 2 spans) → append one zero span for the normal.
    """
    count_normal = labels.count(0)
    count_hate = labels.count(1)
    count_rationales = len(rationales)
    pad = np.zeros(post_length, dtype="int").tolist()

    # If there are hate labels but no rationales, something is wrong
    if count_hate > 0 and count_rationales == 0:
        if drop_abnormal:
            return None

        # Else just fill with 0
        return np.zeros(post_length).tolist()

    # If all annotators are normal, return all zeros
    if count_normal == 3:
        return np.zeros(post_length).tolist()

    # If we have hate annotators
    if count_hate > 0:
        # Case 1: Number of rationales matches number of hate annotators
        if count_rationales == count_hate:
            return np.average(rationales, axis=0).tolist()

        # Case 2: Fewer rationales than hate annotators
        elif count_rationales < count_hate:
            # Add zero padding for normal annotators only
            rationales_copy = rationales.copy()
            zeros_to_add = count_normal
            for _ in range(zeros_to_add):
                rationales_copy.append(pad)
            return np.average(rationales_copy, axis=0).tolist()

        # Case 3: More rationales than hate annotators (shouldn't happen normally)
        else:
            # Just average what we have
            return np.average(rationales, axis=0).tolist()

    # Fallback: return zeros if no clear case matches
    return np.zeros(post_length).tolist()

In [8]:
from typing import List, Tuple

def preprocess_text(raw_text):
    preprocessed_text = raw_text
    # # Remove HTML tags <>
    preprocessed_text = preprocessed_text.replace("<", "").replace(">", "")
    # # De-Obsfucate Patterns
    preprocessed_text = deobfuscate_text(preprocessed_text)

    return preprocessed_text


def create_text_segment(
    text_tokens: List[str], rationale_mask: List[int]
) -> List[Tuple[List[str], int]]:
    """
    Process a rationale mask to identify contiguous segments of highlighted text.
    Then create a segmented representation of the tokens

    Args:
        text_tokens: Original text tokens
        mask: Binary mask where 1 indicates a highlighted token (this consists of mask from 3 annotators)

    Returns:
        A list of tuples (text segment, mask value)
    """
    # Handle case where mask is empty (no rationale provided), usually this is normal classification
    mask = rationale_mask

    # for mask in all_rationale_mask:
    # Find breakpoints (transitions between highlighted/1 and non-highlighted/0)
    breakpoints = []
    mask_values = []

    # Always start with position 0
    breakpoints.append(0)
    mask_values.append(mask[0])

    # Find transitions in the mask
    for i in range(1, len(mask)):
        if mask[i] != mask[i - 1]:
            breakpoints.append(i)
            mask_values.append(mask[i])

    # Always end with the length of the text
    if breakpoints[-1] != len(mask):
        breakpoints.append(len(mask))

    # Create segments based on breakpoints
    segments = []
    for i in range(len(breakpoints) - 1):
        start = breakpoints[i]
        end = breakpoints[i + 1]
        segments.append((text_tokens[start:end], mask_values[i]))

    return segments


def align_rationales(tokens, rationales, tokenizer, max_length=128):
    """
    Align rationales with tokenized text while handling different tokenizer formats.

    Args:
        tokens: Original text tokens
        rationales: Original rationale masks
        tokenizer: The tokenizer to use
        max_length: Maximum sequence length

    Returns:
        Dictionary with tokenized inputs and aligned rationale masks
    """
    segments = create_text_segment(tokens, rationales)
    all_human_rationales = []
    all_input_ids = []
    all_attention_mask = []
    all_token_type_ids = []
    all_rationales = []
    for text_segment, rationale_value in segments:
        inputs = {}
        concatenated_text = " ".join(text_segment)
        processed_segment = preprocess_text(concatenated_text)
        tokenized = tokenizer(
            processed_segment, add_special_tokens=False, return_tensors="pt"
        )

        # Extract the relevant data
        segment_input_ids = tokenized["input_ids"][0]
        segment_attention_mask = tokenized["attention_mask"][0]
        # Handle token_type_ids if present
        if "token_type_ids" in tokenized:
            segment_token_type_ids = tokenized["token_type_ids"][0]
            all_token_type_ids.extend(segment_token_type_ids)

        # Add input IDs and attention mask
        all_input_ids.extend(segment_input_ids)
        all_attention_mask.extend(segment_attention_mask)

        # Add rationales (excluding special tokens)
        segment_rationales = [rationale_value] * len(segment_input_ids)
        all_rationales.extend(segment_rationales)
    # Get special token IDs
    cls_token_id = tokenizer.cls_token_id
    sep_token_id = tokenizer.sep_token_id

    # Add special tokens at the beginning and end
    all_input_ids = [cls_token_id] + all_input_ids + [sep_token_id]
    all_attention_mask = [1] + all_attention_mask + [1]

    # Handle token_type_ids if the model requires it
    if hasattr(tokenizer, "create_token_type_ids_from_sequences"):
        all_token_type_ids = tokenizer.create_token_type_ids_from_sequences(
            all_input_ids[1:-1]
        )
    elif all_token_type_ids:
        all_token_type_ids = [0] + all_token_type_ids + [0]
    else:
        all_token_type_ids = [0] * len(all_input_ids)

    # Check tokenized vs rationales length
    if len(all_input_ids) != len(all_attention_mask):
        print("Warning: length of tokens and rationales do not match")

    # Add zero rationale values for special tokens
    all_rationales = [0] + all_rationales + [0]

    # Truncate to max length if needed
    if len(all_input_ids) > max_length:
        print("WARNING: NEED TO TRUNCATE")
        all_input_ids = all_input_ids[:max_length]
        all_attention_mask = all_attention_mask[:max_length]
        all_token_type_ids = all_token_type_ids[:max_length]
        all_rationales = all_rationales[:max_length]

    # Pad to max_length if needed
    pad_token_id = tokenizer.pad_token_id
    padding_length = max_length - len(all_input_ids)

    if padding_length > 0:
        all_input_ids = all_input_ids + [pad_token_id] * padding_length
        all_attention_mask = all_attention_mask + [0] * padding_length
        all_token_type_ids = all_token_type_ids + [0] * padding_length
        all_rationales = all_rationales + [0] * padding_length

    # Convert lists to tensors
    inputs = {
        "input_ids": torch.tensor([all_input_ids], dtype=torch.long),
        "attention_mask": torch.tensor([all_attention_mask], dtype=torch.long),
        "token_type_ids": (
            torch.tensor([all_token_type_ids], dtype=torch.long)
            if "token_type_ids" in tokenizer.model_input_names
            else None
        ),
        "rationales": torch.tensor([all_rationales], dtype=torch.float32),
    }

    # Remove None values
    inputs = {k: v for k, v in inputs.items() if v is not None}
    return inputs

In [9]:
import re
import json
import os
import string
from collections import Counter
from tqdm import tqdm
import more_itertools as mit

def find_ranges(iterable):
    """Yield range of consecutive numbers."""
    for group in mit.consecutive_groups(iterable):
        group = list(group)
        if len(group) == 1:
            yield group[0]
        else:
            yield group[0], group[-1]

def process_and_convert_data(data, tokenizer, post_id_divisions, save_path='Data/explanations/', drop_abnormal=False):
    """
    Combined function that processes raw entries and converts to ERASER format in one pass.
    Also splits data into train/val/test sets.
    """
    print("Processing and converting data...")

    # Initialize outputs
    train_data = []
    val_data = []
    test_data = []
    dropped = 0

    # Create directories if saving splits
    if save_path:
        os.makedirs(save_path, exist_ok=True)
        os.makedirs(os.path.join(save_path, 'docs'), exist_ok=True)
        train_fp = open(os.path.join(save_path, 'train.jsonl'), 'w')
        val_fp = open(os.path.join(save_path, 'val.jsonl'), 'w')
        test_fp = open(os.path.join(save_path, 'test.jsonl'), 'w')

    for key, value in tqdm(data.items()):
        try:
            # Extract labels
            labels = [1 if annot["label"] in ['hatespeech', 'offensive'] else 0
                     for annot in value["annotators"]]

            # Process rationales
            rationales = value.get("rationales", [])
            aggregated_rationale = aggregate_rationales(
                rationales, labels, len(value["post_tokens"]), drop_abnormal=drop_abnormal
            )

            if aggregated_rationale is None:
                dropped += 1
                continue

            inputs = align_rationales(value['post_tokens'], aggregated_rationale, tokenizer)

            # Calculate labels
            hard_label = Counter(labels).most_common(1)[0][0]
            soft_label = sum(labels) / len(labels)

            # Determine target groups (mentioned at least 3 times)
            target_groups = [t for annot in value['annotators'] for t in annot['target']]
            filtered_targets = [k for k, v in Counter(target_groups).items() if v > 2]

            # Create processed entry
            processed_entry = {
                'post_id': key,
                'input_ids': inputs['input_ids'],
                'attention_mask': inputs['attention_mask'],
                'rationales': inputs['rationales'],
                'raw_text': " ".join(value['post_tokens']),
                'hard_label': hard_label,
                'soft_label': soft_label,
                'target_groups': filtered_targets
            }

            # Convert to ERASER format if it's hateful/offensive content
            if hard_label == 1 and save_path:
                input_ids_list = inputs['input_ids'].squeeze().tolist()
                rationales_list = inputs['rationales'].squeeze().ceil().int().tolist()

                # Build evidences
                evidences = []
                indexes = sorted([i for i, each in enumerate(rationales_list) if each == 1])
                for span in find_ranges(indexes):
                    if isinstance(span, int):
                        start, end = span, span + 1
                    else:
                        start, end = span[0], span[1] + 1

                    evidences.append({
                        "docid": key,
                        "end_sentence": -1,
                        "end_token": end,
                        "start_sentence": -1,
                        "start_token": start,
                        "text": ' '.join([str(x) for x in input_ids_list[start:end]])
                    })

                eraser_entry = {
                    'annotation_id': key,
                    'classification': str(hard_label),
                    'evidences': [evidences],
                    'query': "What is the class?",
                    'query_type': None
                }

                # Save document
                with open(os.path.join(save_path, 'docs', key), 'w') as fp:
                    fp.write(' '.join([str(x) for x in input_ids_list if x > 0]))

                # Write to appropriate split
                if key in post_id_divisions['train']:
                    train_fp.write(json.dumps(eraser_entry) + '\n')
                elif key in post_id_divisions['val']:
                    val_fp.write(json.dumps(eraser_entry) + '\n')
                elif key in post_id_divisions['test']:
                    test_fp.write(json.dumps(eraser_entry) + '\n')

            # Add to appropriate split list
            if key in post_id_divisions['train']:
                train_data.append(processed_entry)
            elif key in post_id_divisions['val']:
                val_data.append(processed_entry)
            elif key in post_id_divisions['test']:
                test_data.append(processed_entry)

        except Exception as e:
            dropped += 1
            print(f"Error processing {key}: {e}")

    if save_path:
        train_fp.close()
        val_fp.close()
        test_fp.close()

    print(f"Train: {len(train_data)}, Val: {len(val_data)}, Test: {len(test_data)}, Dropped: {dropped}")

    return {
        'train': train_data,
        'val': val_data,
        'test': test_data
    }

In [10]:
with open(data_path, 'r') as file:
    data = json.load(file)

with open('Data/post_id_divisions.json') as file:
    post_id_divisions = json.load(file)

# Process everything in one pass
tokenizer = AutoTokenizer.from_pretrained(config.model_name)
splits = process_and_convert_data(
    data=data,
    tokenizer=tokenizer,
    post_id_divisions=post_id_divisions,
    save_path='Data/explanations/',
    drop_abnormal=False
)

# Access splits directly
train_data = splits['train']
val_data = splits['val']
test_data = splits['test']

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json:   0%|          | 0.00/48.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/483 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

Processing and converting data...


 36%|███▋      | 7338/20148 [00:14<00:22, 575.10it/s]



 89%|████████▉ | 17966/20148 [00:36<00:04, 486.56it/s]

Error processing 24439295_gab: setting an array element with a sequence. The requested array has an inhomogeneous shape after 1 dimensions. The detected shape was (2,) + inhomogeneous part.


100%|██████████| 20148/20148 [00:41<00:00, 491.21it/s]

Train: 15382, Val: 1922, Test: 1924, Dropped: 1





In [13]:
train_data[3]

{'post_id': '24198545_gab',
 'input_ids': tensor([[  101,  1998,  2023,  2003,  2339,  1045,  2203,  2039,  2007,  9152,
          13327, 26758,  7435,  2040,  2064,  2025,  3713,  7919,  3768,  3937,
           3716,  1997,  7366,  2009,  5621, 12459,  2065,  1996,  2270,  2069,
           2354,   102,     0,     0,     0,     0,     0,     0,     0,     0,
              0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
              0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
              0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
              0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
              0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
              0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
              0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
              0,     0,     0,     0,     0,     0,     0,     0,     0,     0,

# Training

In [11]:
from HateDataset import HateDataset

# Create datasets with pre-tokenized data
train_dataset = HateDataset(data=train_data)
val_dataset = HateDataset(data=val_data)
test_dataset = HateDataset(data=test_data)

In [12]:
from torch.utils.data import DataLoader

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False) # Use shuffle=False for validation
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False) # Use shuffle=False for testing

In [13]:
# from HateClassifier import HateClassifier
# model = HateClassifier(config)

In [14]:
# history = model.train(train_dataloader=train_loader, val_dataloader=val_loader)

# Evaluation

In [15]:
# import pickle

# result = model.predict(test_dataloader=test_loader, return_attentions=True)

# # Save the result to a file
# with open('prediction_results.pkl', 'wb') as f:
#   pickle.dump(result, f)

# print("Results saved to prediction_results.pkl")

## Bias

In [16]:
def get_bias_evaluation_samples(data, method, group):
    """
    Get positive and negative sample IDs for bias evaluation based on method and group

    Args:
        data: list of data entries
        method: Bias evaluation method ('subgroup', 'bpsn', or 'bnsp')
        group: Target group to evaluate

    Returns:
        Tuple of (positive_ids, negative_ids)
    """
    positive_ids = []
    negative_ids = []

    for idx, row in enumerate(data):
        target_groups = row['target_groups']
        if target_groups is None:
            continue

        is_in_group = group in target_groups

        # Convert various label formats to binary toxic/non-toxic
        if 'hard_label' in row:
            is_toxic = row['hard_label'] == 1
        else:
            continue

        if method == 'subgroup':
            # Only consider samples mentioning the group
            if is_in_group:
                if is_toxic:
                    positive_ids.append(idx)
                else:
                    negative_ids.append(idx)

        elif method == 'bpsn':
            # Compare non-toxic posts mentioning the group with toxic posts NOT mentioning the group
            if is_in_group and not is_toxic:
                negative_ids.append(idx)
            elif not is_in_group and is_toxic:
                positive_ids.append(idx)

        elif method == 'bnsp':
            # Compare toxic posts mentioning the group with non-toxic posts NOT mentioning the group
            if is_in_group and is_toxic:
                positive_ids.append(idx)
            elif not is_in_group and not is_toxic:
                negative_ids.append(idx)

    return positive_ids, negative_ids

In [17]:
from collections import defaultdict
from sklearn.metrics import roc_auc_score

def calculate_gmb_metrics(
    test_data: List[Dict[str, Any]],
    probabilities: np.ndarray,
    target_groups: List[str]
):
    """
    Calculate GMB (Generalized Mean of Bias) AUC metrics from model predictions

    Args:
        probabilities: Model's probability outputs
        test_data: List of test data entries
        target_groups: List of target groups to evaluate

    Returns:
        Dictionary with GMB metrics
    """
    # Create mappings from post_id to predictions and ground truth
    prediction_scores = defaultdict(lambda: defaultdict(dict))
    ground_truth = {}

    for idx, row in enumerate(test_data):
        prediction_scores[idx] = probabilities[idx, 1]
        ground_truth[idx] = row['hard_label']

    # Calculate metrics for each target group and method
    bias_metrics = {}
    methods = ['subgroup', 'bpsn', 'bnsp']

    for method in methods:
        bias_metrics[method] = {}  # Initialize nested dictionary for each method
        for group in target_groups:
            # Get positive and negative samples based on the method
            positive_ids, negative_ids = get_bias_evaluation_samples(test_data, method, group)

            if len(positive_ids) == 0 or len(negative_ids) == 0:
                print(f"Skipping {method} for group {group}: no samples found")
                continue  # Skip if no samples for this group/method

            # Collect ground truth and predictions
            y_true = []
            y_score = []

            for post_id in positive_ids:
                if post_id in ground_truth and post_id in prediction_scores:
                    y_true.append(ground_truth[post_id])
                    y_score.append(prediction_scores[post_id])

            for post_id in negative_ids:
                if post_id in ground_truth and post_id in prediction_scores:
                    y_true.append(ground_truth[post_id])
                    y_score.append(prediction_scores[post_id])

            # Calculate AUC if we have enough samples with both classes
            if len(y_true) > 10 and len(set(y_true)) > 1:
                try:
                    auc = roc_auc_score(y_true, y_score)
                    bias_metrics[method][group] = auc
                except ValueError:
                    print(f"Could not compute AUC for {method} and group {group} due to ValueError")
                    pass

    # Calculate GMB for each method
    gmb_metrics = {}
    power = -5  # Power parameter for generalized mean

    for method in methods:
        if not bias_metrics[method]:
            continue

        scores = list(bias_metrics[method].values())
        if not scores:
            continue

        # Calculate generalized mean with p=-5
        power_mean = np.mean([score ** power for score in scores]) ** (1/power)
        gmb_metrics[f'GMB-{method.upper()}-AUC'] = power_mean

    # Calculate a combined GMB score that includes all methods
    all_scores = []
    for method in methods:
        all_scores.extend(list(bias_metrics[method].values()))

    if all_scores:
        gmb_metrics['GMB-COMBINED-AUC'] = np.mean([score ** power for score in all_scores]) ** (1/power)

    return gmb_metrics, bias_metrics

In [18]:
from itertools import chain

all_target_groups = list(chain.from_iterable(d['target_groups'] for d in train_data + val_data + test_data))

In [19]:
from collections import Counter
# Get top 10 most common target groups in the full dataset

# Remove None
all_target_groups = [group for group in all_target_groups if group != 'None' and group != 'Other']
counter = Counter(all_target_groups)

n_common = 10
bias_target_groups = [tg[0] for tg in counter.most_common(n_common)]

In [20]:
# gmb_metrics, bias_details = calculate_gmb_metrics(
#   test_data=test_data,
#   probabilities=result['probabilities'],
#   target_groups=bias_target_groups
# )

In [21]:
# print('GMB-Metrics')
# for key, value in gmb_metrics.items():
#   print(f'{key}: {value}')

In [22]:
# print('Bias Details')
# print()
# for key, entry in bias_details.items():
#   print(f"Metrics: {key}")
#   for subgroup, value in entry.items():
#     print(f'{subgroup}: {value}')
#   print()

## XAI

In [23]:
import numpy as np
import torch
from torch.utils.data import DataLoader
from typing import List, Dict, Tuple
import json
from sklearn.metrics import (
    precision_recall_curve,
    auc,
    f1_score,
    precision_score,
    recall_score,
)


class FaithfulnessMetrics:
    """
    Compute faithfulness metrics using the model's existing predict() method.
    Creates modified datasets and uses DataLoader for efficient batched processing.
    """

    def __init__(self, model, tokenizer, dataset_class, batch_size=32):
        self.model = model
        self.tokenizer = tokenizer
        self.dataset_class = dataset_class
        self.batch_size = batch_size

        # Get special token IDs
        self.special_token_ids = {
            tokenizer.cls_token_id,
            tokenizer.sep_token_id,
        }
        self.special_token_ids = {x for x in self.special_token_ids if x is not None}

    def compute_all_metrics(
        self,
        test_data: List[Dict],  # Your original test data
        test_results: Dict,  # Results from prediction
        k: int = 5,  # Number of top tokens to consider
        eraser_save_path: str = "Data/eraser_formatted_results.jsonl",
    ) -> Dict[str, float]:
        """
        Compute all ERASER metrics efficiently using DataLoader approach

        Args:
            test_data: List of test instances (each with input_ids, attention_mask, rationales, labels)
            test_results: List of dictionaries containing attention scores for each instance

        Returns:
            Dictionary with all metrics
        """
        print("Computing ERASER metrics using DataLoader approach...")

        # Extract lists for easier processing
        input_ids_list = [item["input_ids"] for item in test_data]
        attention_masks_list = [item["attention_mask"] for item in test_data]
        human_rationales = [item["rationales"] for item in test_data]
        attention_scores = [item for item in test_results["attentions"]]

        # 1. Extract top-k as hard predictions
        hard_predictions = self._extract_top_k_tokens(
            attention_scores, attention_masks_list, input_ids_list, k
        )

        hard_rationale_predictions, soft_rationale_predictions = self._convert_attention_to_evidence_format(input_ids_list, attention_scores, hard_predictions)

        # 2. PLAUSIBILITY METRICS
        print("\n[1/3] Computing plausibility metrics...")
        auprc = self._compute_auprc(
            attention_scores, human_rationales, attention_masks_list, input_ids_list
        )
        token_f1, token_prec, token_rec = self._compute_token_f1(
            hard_predictions, human_rationales, attention_masks_list
        )

        # 3. FAITHFULNESS METRICS
        print("[2/3] Computing comprehensiveness scores...")
        raw_comprehensiveness, comprehensiveness_scores = (
            self._compute_comprehensiveness(test_data, test_results, hard_predictions)
        )

        print("[3/3] Computing sufficiency scores...")
        raw_sufficiency, sufficiency_scores = self._compute_sufficiency(
            test_data, test_results, hard_predictions
        )

        # 4. Convert to eraser format
        results_eraser = self._convert_result_to_eraser_format(test_results, hard_rationale_predictions, soft_rationale_predictions, raw_sufficiency, raw_comprehensiveness)
        # Convert to JSONL format
        jsonl_output = '\n'.join([json.dumps(entry) for entry in results_eraser])
        with open(eraser_save_path, 'w') as f:
            f.write(jsonl_output)

        return {
            # Plausibility
            "auprc": auprc,
            "token_f1": token_f1,
            "token_precision": token_prec,
            "token_recall": token_rec,
            # Faithfulness
            "comprehensiveness": float(np.mean(comprehensiveness_scores)),
            "sufficiency": float(np.mean(sufficiency_scores)),
            # Additional
            "avg_rationale_length": k,
        }

    def _convert_attention_to_evidence_format(self, input_ids_list, attention_scores, hard_predictions):
        # 2. Collect evidence
        hard_rationale_predictions = []
        for idx, hp in enumerate(hard_predictions):
            evidences = []
            indexes = sorted([i for i, each in enumerate(hp.tolist()) if each == 1])
            for span in find_ranges(indexes):
                if isinstance(span, int):
                    start, end = span, span + 1
                else:
                    start, end = span[0], span[1] + 1

                evidences.append({
                    "start_token": start,
                    "end_token": end,
                })
            hard_rationale_predictions.append(evidences)

        soft_rationale_predictions = []
        for att in attention_scores:
            pred = [x for x in att if x > 0]
            soft_rationale_predictions.append(pred)

        return hard_rationale_predictions, soft_rationale_predictions

    def _convert_result_to_eraser_format(
        self,
        test_result: Dict,
        hard_rationale_predictions,
        soft_rationale_predictions,
        sufficiency_scores: np.ndarray,
        comprehensiveness_scores: np.ndarray,
    ):
        all_entries = []
        for idx, data in enumerate(test_result["post_id"]):
            entry = {
            'annotation_id': data,
            'classification': str(int(test_result["predictions"][idx])),
            'classification_scores': {
                0: float(test_result["probabilities"][idx][0]),
                1: float(test_result["probabilities"][idx][1]),
            },
            'rationales': [
                {
                    "docid": data,
                    "hard_rationale_predictions": hard_rationale_predictions[idx],
                    "soft_rationale_predictions": [float(x) for x in soft_rationale_predictions[idx]],
                }
            ],
            'sufficiency_classification_scores': {
                0: float(sufficiency_scores[idx][0]),
                1: float(sufficiency_scores[idx][1])
            },
            'comprehensiveness_classification_scores': {
                0: float(comprehensiveness_scores[idx][0]),
                1: float(comprehensiveness_scores[idx][1])
            }
            }
            all_entries.append(entry)

        return all_entries

    def _calculate_average_rationale_length(
        self,
        human_rationales: List[torch.Tensor],
        attention_masks_list: List[torch.Tensor],
        input_ids_list: List[torch.Tensor],
    ) -> int:
        """Calculate average number of content rationale tokens"""
        lengths = []
        for idx, (rat, mask) in enumerate(zip(human_rationales, attention_masks_list)):
            valid_positions = mask.bool().cpu().numpy().flatten()

            # Exclude special tokens
            input_ids = input_ids_list[idx].cpu().numpy().flatten()
            is_special = np.isin(input_ids, list(self.special_token_ids))
            content_positions = valid_positions & ~is_special

            rat_count = (rat.cpu().numpy().flatten()[content_positions] == 1).sum()
            lengths.append(rat_count)

        return max(1, int(np.mean(lengths)))

    def _extract_top_k_tokens(
        self,
        attention_scores: List[np.ndarray],
        attention_masks_list: List[torch.Tensor],
        input_ids_list: List[torch.Tensor],
        k: int,
    ) -> List[np.ndarray]:
        """Extract top-k content tokens as hard predictions"""
        hard_predictions = []

        for idx, (attn, mask) in enumerate(zip(attention_scores, attention_masks_list)):
            pred_mask = np.zeros_like(attn, dtype=int)
            valid_positions = mask.bool().cpu().numpy().flatten()

            # Exclude special tokens
            input_ids = input_ids_list[idx].cpu().numpy().flatten()
            is_special = np.isin(input_ids, list(self.special_token_ids))
            content_positions = valid_positions & ~is_special

            content_attn = attn[content_positions]

            if k > 0 and len(content_attn) > 0:
                k_actual = min(k, len(content_attn))
                top_k_within_content = np.argsort(content_attn)[-k_actual:]
                content_indices = np.where(content_positions)[0]
                top_k_indices = content_indices[top_k_within_content]
                pred_mask[top_k_indices] = 1

            hard_predictions.append(pred_mask)

        return hard_predictions

    def _compute_auprc(
        self,
        attention_scores: List[np.ndarray],
        human_rationales: List[torch.Tensor],
        attention_masks_list: List[torch.Tensor],
        input_ids_list: List[torch.Tensor],
    ) -> float:
        """Compute AUPRC for soft attention scores"""
        all_scores = []
        all_labels = []

        for idx, (attn, rat, mask) in enumerate(
            zip(attention_scores, human_rationales, attention_masks_list)
        ):
            valid_positions = mask.bool().cpu().numpy().flatten()

            # Exclude special tokens
            input_ids = input_ids_list[idx].cpu().numpy().flatten()
            is_special = np.isin(input_ids, list(self.special_token_ids))
            content_positions = valid_positions & ~is_special

            all_scores.extend(attn[content_positions].tolist())
            all_labels.extend(
                rat.cpu().numpy().flatten()[content_positions].astype(int).tolist()
            )

        all_scores = np.array(all_scores, dtype=float)
        all_labels = np.array(all_labels, dtype=int)

        if len(np.unique(all_labels)) < 2:
            print(f"Warning: Only one class in labels: {np.unique(all_labels)}")
            return 0.0

        precision, recall, _ = precision_recall_curve(all_labels, all_scores)
        return auc(recall, precision)

    def _compute_token_f1(
        self,
        hard_predictions: List[np.ndarray],
        human_rationales: List[torch.Tensor],
        attention_masks_list: List[torch.Tensor],
    ) -> Tuple[float, float, float]:
        """Compute token-level F1, Precision, Recall"""
        all_preds = []
        all_labels = []

        for pred, rat, mask in zip(
            hard_predictions, human_rationales, attention_masks_list
        ):
            valid_positions = mask.bool().cpu().numpy().flatten()
            all_preds.extend(pred[valid_positions].astype(int).tolist())
            all_labels.extend(
                rat.cpu().numpy().flatten()[valid_positions].astype(int).tolist()
            )

        all_preds = np.array(all_preds, dtype=int)
        all_labels = np.array(all_labels, dtype=int)

        f1 = f1_score(all_labels, all_preds, zero_division=0)
        precision = precision_score(all_labels, all_preds, zero_division=0)
        recall = recall_score(all_labels, all_preds, zero_division=0)

        return f1, precision, recall

    def _compute_comprehensiveness(
        self,
        test_data: List[Dict],
        test_results: Dict,
        hard_predictions: List[np.ndarray],
    ) -> Tuple[float, List[float]]:
        """
        Compute comprehensiveness: how much does REMOVING rationales hurt?
        Uses DataLoader approach for efficiency
        """
        # Create modified dataset (remove rationales from attention mask)
        modified_data = []
        for item, rationale_mask in zip(test_data, hard_predictions):
            modified_item = self._create_comprehensiveness_instance(
                item, rationale_mask
            )
            modified_data.append(modified_item)

        # Create DataLoader
        modified_dataset = self.dataset_class(modified_data)
        modified_loader = DataLoader(
            modified_dataset, batch_size=self.batch_size, shuffle=False
        )

        # Get predictions using model's predict method
        results = self.model.predict(modified_loader, return_attentions=False)
        modified_probs = results["probabilities"]

        # Calculate comprehensiveness scores
        comprehensiveness_scores = []
        for idx, (prob, label) in enumerate(zip(test_results["probabilities"], test_results['labels'])):
            original_prob = prob[
                label
            ]  # Probability from normal prediction process for the label
            modified_prob = modified_probs[idx][label]

            # Comprehensiveness = original - modified (higher is better)
            comp_score = original_prob - modified_prob
            comprehensiveness_scores.append(comp_score)

        return modified_probs, comprehensiveness_scores

    def _compute_sufficiency(
        self,
        test_data: List[Dict],
        test_results: Dict,
        hard_predictions: List[np.ndarray],
    ) -> Tuple[List[float], List[float]]:
        """
        Compute sufficiency: how well do ONLY rationales predict?
        Uses DataLoader approach for efficiency
        """
        # Create modified dataset (keep only rationales in attention mask)
        modified_data = []
        for item, rationale_mask in zip(test_data, hard_predictions):
            modified_item = self._create_sufficiency_instance(item, rationale_mask)
            modified_data.append(modified_item)

        # Create DataLoader
        modified_dataset = self.dataset_class(modified_data)
        modified_loader = DataLoader(
            modified_dataset, batch_size=self.batch_size, shuffle=False
        )

        # Get predictions using model's predict method
        results = self.model.predict(modified_loader, return_attentions=False)
        modified_probs = results["probabilities"]

        # Calculate sufficiency scores
        sufficiency_scores = []
        for idx, (prob, label) in enumerate(zip(test_results["probabilities"], test_results['labels'])):
            original_prob = prob[
                label
            ]  # Probability from normal prediction process for the label
            modified_prob = modified_probs[idx][label]

            # Sufficiency = original - modified (lower/negative is better)
            suff_score = original_prob - modified_prob
            sufficiency_scores.append(suff_score)

        return modified_probs, sufficiency_scores

    def _create_comprehensiveness_instance(
        self, item: Dict, rationale_mask: np.ndarray
    ) -> Dict:
        """
        Create instance for comprehensiveness: REMOVE rationales from attention
        Keep: CLS + non-rationale content tokens + SEP
        """
        input_ids = item["input_ids"].cpu().numpy().flatten()
        orig_mask = item["attention_mask"].cpu().numpy().flatten()

        # Start with original mask
        new_mask = orig_mask.copy()

        # Zero out rationale positions (except CLS and SEP)
        for i in range(len(new_mask)):
            if rationale_mask[i] == 1:  # This is a rationale
                # Don't mask if it's CLS or SEP
                if input_ids[i] not in self.special_token_ids:
                    new_mask[i] = 0

        return {
            "post_id": item["post_id"],
            "input_ids": torch.tensor(input_ids).unsqueeze(0),
            "attention_mask": torch.tensor(new_mask).unsqueeze(0),
            "rationales": item["rationales"],
            "hard_label": item["hard_label"],
        }

    def _create_sufficiency_instance(
        self, item: Dict, rationale_mask: np.ndarray
    ) -> Dict:
        """
        Create instance for sufficiency: Keep ONLY rationales in attention
        Keep: CLS + rationale tokens + SEP
        """
        input_ids = item["input_ids"].cpu().numpy().flatten()
        orig_mask = item["attention_mask"].cpu().numpy().flatten()

        # Start with zeros
        new_mask = np.zeros_like(orig_mask)

        # Always keep CLS and SEP
        for i in range(len(new_mask)):
            if input_ids[i] in self.special_token_ids:
                new_mask[i] = 1

        # Keep rationale positions
        for i in range(len(new_mask)):
            if rationale_mask[i] == 1 and orig_mask[i] == 1:
                new_mask[i] = 1

        return {
            "post_id": item["post_id"],
            "input_ids": torch.tensor(input_ids).unsqueeze(0),
            "attention_mask": torch.tensor(new_mask).unsqueeze(0),
            "rationales": item["rationales"],
            "hard_label": item["hard_label"],
        }

# Experiment Management System

This notebook now uses a systematic experiment tracking system that organizes all outputs by experiment.

## Example: Modified Training & Evaluation Pipeline with Experiment Tracking

Below is how you would integrate the experiment manager into your existing pipeline:

In [24]:
# ============================================================================
# FULL EXPERIMENT PIPELINE WITH TRACKING
# ============================================================================
from ExperimentManager import ExperimentManager
from HateClassifier import HateClassifier
from HateDataset import HateDataset
# 1. CREATE EXPERIMENT
experiment_manager = ExperimentManager(base_dir="./experiments")
experiment_dir = experiment_manager.create_experiment(
    config=config,
    custom_name="baseline_distilbert",  # Change this for each experiment
    description="Baseline model with distilbert-base-uncased, standard hyperparameters"
)

Created new experiment: 20251217_081342_baseline_distilbert_54083f69
Directory: experiments/20251217_081342_baseline_distilbert_54083f69
Description: Baseline model with distilbert-base-uncased, standard hyperparameters


In [25]:
# 2. TRAIN MODEL (config.save_dir is automatically updated)
model = HateClassifier(config)
history = model.train(train_dataloader=train_loader, val_dataloader=val_loader)

# Save training history
experiment_manager.save_training_history(history)

model.safetensors:   0%|          | 0.00/268M [00:00<?, ?B/s]

Training on device: cuda
Model: distilbert-base-uncased
Epochs: 2
Batch size: 32
Gradient accumulation steps: 1
Effective batch size: 32
Learning rate: 1e-05
Mixed precision (AMP): True
Gradient clipping: 1.0

Epoch 1/2


Training: 100%|██████████| 481/481 [00:54<00:00,  8.77batch/s, loss=0.514]
Evaluating: 100%|██████████| 61/61 [00:01<00:00, 31.18batch/s]



Epoch 1 Summary:
  Train Loss: 0.5140
  Val Loss:   0.4677
  Val Acc:    0.7711
  Val F1:     0.7647
  ✓ New best model saved! (F1: 0.7647)

Epoch 2/2


Training: 100%|██████████| 481/481 [00:54<00:00,  8.89batch/s, loss=0.418]
Evaluating: 100%|██████████| 61/61 [00:01<00:00, 32.58batch/s]



Epoch 2 Summary:
  Train Loss: 0.4179
  Val Loss:   0.4680
  Val Acc:    0.7695
  Val F1:     0.7633

Training completed!
Best F1 Score: 0.7647
Training history saved to: experiments/20251217_081342_baseline_distilbert_54083f69/checkpoints/training_history.json
Training history saved to: experiments/20251217_081342_baseline_distilbert_54083f69/metrics/training_history.json


In [26]:
# 3. EVALUATE MODEL
result = model.predict(test_dataloader=test_loader, return_attentions=True)

# Save predictions
experiment_manager.save_predictions(result, filename="test_predictions.pkl")

Running inference on 61 batches...


Testing: 100%|██████████| 61/61 [00:01<00:00, 31.85batch/s]


Test Results:
  Test Loss:     0.4493
  Test Accuracy: 0.7968
  Test F1:       0.7902
Predictions saved to: experiments/20251217_081342_baseline_distilbert_54083f69/results/test_predictions.pkl





In [27]:

# 4. BIAS EVALUATION
gmb_metrics, bias_details = calculate_gmb_metrics(
    test_data=test_data,
    probabilities=result['probabilities'],
    target_groups=bias_target_groups
)

# Save bias metrics
experiment_manager.save_bias_metrics(gmb_metrics, bias_details)


Bias metrics saved to: experiments/20251217_081342_baseline_distilbert_54083f69/metrics/bias_metrics.json


In [28]:

# 5. XAI EVALUATION (only on hate samples)
test_data_hate_only = []
test_results_hate_only = {'attentions': [], 'probabilities': [], 'predictions': [], 'post_id': [], 'labels': []}
for idx, td in enumerate(test_data):
    if td['hard_label'] == 1:
        test_data_hate_only.append(td)
        test_results_hate_only['attentions'].append(result['attentions'][idx])
        test_results_hate_only['probabilities'].append(result['probabilities'][idx])
        test_results_hate_only['predictions'].append(result['predictions'][idx])
        test_results_hate_only['post_id'].append(result['post_ids'][idx])
        test_results_hate_only['labels'].append(result['labels'][idx])

calculator = FaithfulnessMetrics(
    model=model,
    tokenizer=tokenizer,
    dataset_class=HateDataset,
    batch_size=32
)

k = 5
eraser_save_path = f"{experiment_dir}/results/test_explain_output.jsonl"
xai_results = calculator.compute_all_metrics(test_data_hate_only, test_results_hate_only, k, eraser_save_path)

# Save XAI metrics
experiment_manager.save_xai_metrics(xai_results)


Computing ERASER metrics using DataLoader approach...

[1/3] Computing plausibility metrics...
[2/3] Computing comprehensiveness scores...
Running inference on 36 batches...


Testing: 100%|██████████| 36/36 [00:01<00:00, 23.98batch/s]



Test Results:
  Test Loss:     1.1826
  Test Accuracy: 0.3126
  Test F1:       0.2382
[3/3] Computing sufficiency scores...
Running inference on 36 batches...


Testing: 100%|██████████| 36/36 [00:01<00:00, 28.36batch/s]



Test Results:
  Test Loss:     0.5711
  Test Accuracy: 0.6874
  Test F1:       0.4074
XAI metrics saved to: experiments/20251217_081342_baseline_distilbert_54083f69/metrics/xai_metrics.json


In [29]:
!git clone https://github.com/jayded/eraserbenchmark.git
!sed -i "285s/.*/    labels=['0', '1']/" eraserbenchmark/rationale_benchmark/metrics.py
!sed -i "286s/.*/    label_to_int = {'0':0, '1': 1}/" eraserbenchmark/rationale_benchmark/metrics.py

Cloning into 'eraserbenchmark'...
remote: Enumerating objects: 90, done.[K
remote: Counting objects: 100% (90/90), done.[K
remote: Compressing objects: 100% (46/46), done.[K
remote: Total 90 (delta 51), reused 79 (delta 43), pack-reused 0 (from 0)[K
Receiving objects: 100% (90/90), 69.80 KiB | 3.17 MiB/s, done.
Resolving deltas: 100% (51/51), done.


In [30]:
score_file = f"{experiment_dir}/results/eraser_result.json"
!PYTHONPATH=./eraserbenchmark:%PYTHONPATH% && python eraserbenchmark/rationale_benchmark/metrics.py --split test --strict --data_dir Data/explanations --results {eraser_save_path} --score_file {score_file}

  6919 MainThread Error in instances: 0 instances fail validation: set()
 12982 MainThread No sentence level predictions detected, skipping sentence-level diagnostic
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
{'classification_scores': {'accuracy': 0.8204903677758318,
                           'aopc_thresholds': None,
                           'comprehensiveness': np.float64(0.36346780362289444),
                           'comprehensiveness_aopc': None,
                           'comprehensiveness_aopc_points': None,
                           'comprehensiveness_entropy': np.float64(-0.1625215074979172),
                           'comprehensiveness_kl': np.float64(1.0234625085906859),
                           'prf': {'0': {'f1-score': 0.0,
                                         'precision': 0.0,
         

In [31]:
# 6. CREATE FINAL SUMMARY
final_summary = {
    "test_accuracy": float(result['accuracy']),
    "test_f1": float(result['f1']),
    "test_loss": float(result['loss']),
    "gmb_metrics": gmb_metrics,
    "xai_metrics": xai_results,
    "total_params": sum(p.numel() for p in model.base_model.parameters()),
}

experiment_manager.save_final_metrics(final_summary)


Final metrics summary saved to: experiments/20251217_081342_baseline_distilbert_54083f69/metrics/final_summary.json


In [32]:
# 7. MARK EXPERIMENT AS COMPLETE
experiment_manager.mark_complete(
    status="completed",
    notes="Baseline experiment completed successfully"
)

print("\n" + "="*80)
print("EXPERIMENT COMPLETED!")
print(f"All results saved to: {experiment_dir}")
print("="*80)

Experiment 20251217_081342_baseline_distilbert_54083f69 marked as: completed

EXPERIMENT COMPLETED!
All results saved to: experiments/20251217_081342_baseline_distilbert_54083f69


## Experiment Management Utilities

Useful commands for managing and comparing experiments:

In [38]:
# # View all experiments
# experiment_manager = ExperimentManager()
# experiment_manager.print_experiment_summary()

In [39]:
# # List only completed experiments
# completed_experiments = experiment_manager.list_experiments(status="completed")
# print(f"Found {len(completed_experiments)} completed experiments")
# for exp in completed_experiments:
#     print(f"  - {exp['experiment_id']}: {exp.get('description', 'No description')}")

In [40]:
# # Compare multiple experiments
# experiment_ids = [
#     "20251217_071414_baseline_distilbert_54083f69",  # Replace with actual experiment IDs
# ]
# comparison = experiment_manager.compare_experiments(experiment_ids)

# # Display comparison
# for exp in comparison["experiments"]:
#     print(f"\nExperiment: {exp['experiment_id']}")
#     print(f"  Model: {exp['config'].get('model_name', 'N/A')}")
#     print(f"  Learning Rate: {exp['config'].get('learning_rate', 'N/A')}")
#     print(f"  Test F1: {exp['metrics'].get('test_f1', 'N/A')}")
#     print(f"  Test Accuracy: {exp['metrics'].get('test_accuracy', 'N/A')}")

In [41]:
# # Load results from a specific experiment
# experiment_id = "20251217_071414_baseline_distilbert_54083f69"  # Replace with actual ID
# exp_dir = experiment_manager.get_experiment_path(experiment_id)

# if exp_dir:
#     # Load config
#     with open(exp_dir / "config.json", 'r') as f:
#         loaded_config = json.load(f)

#     # Load final metrics
#     with open(exp_dir / "metrics" / "final_summary.json", 'r') as f:
#         loaded_metrics = json.load(f)

#     print(f"Loaded experiment: {experiment_id}")
#     print(f"Test F1: {loaded_metrics['test_f1']}")
#     print(f"Test Accuracy: {loaded_metrics['test_accuracy']}")
# else:
#     print(f"Experiment {experiment_id} not found")

In [42]:
# # Export an experiment for sharing or backup
# experiment_id = "20251217_071414_baseline_distilbert_54083f69"  # Replace with actual ID
# experiment_manager.export_experiment(experiment_id, export_path="./exported_experiments")

## Visualization Tools

Visualize and compare experiment results:

In [46]:
# from experiment_visualization import (
#     plot_training_curves,
#     plot_metrics_comparison,
#     plot_bias_metrics,
#     plot_xai_metrics,
#     create_experiment_report
# )

# # Example: Compare training curves across experiments
# experiment_ids = [
#     "20251217_071414_baseline_distilbert_54083f69",  # Replace with your actual experiment IDs
# ]

In [47]:
# # Uncomment to use:
# plot_training_curves(experiment_ids, save_path="training_curves.png")

In [48]:
# # Compare final metrics across experiments
# plot_metrics_comparison(experiment_ids, save_path="metrics_comparison.png")

In [49]:
# # Visualize bias metrics
# plot_bias_metrics(experiment_ids, save_path="bias_comparison.png")

In [50]:
# # Visualize XAI metrics
# plot_xai_metrics(experiment_ids, save_path="xai_comparison.png")

In [51]:
# # Generate HTML report for a specific experiment
# create_experiment_report("20241216_120000_baseline_a1b2c3d4")