In [1]:
import pandas as pd

# Load the dataset
splits = {'train': 'train.csv', 'test': 'test.csv'}
train_data = pd.read_csv("hf://datasets/holistic-ai/EMGSD/" + splits["train"])
test_data = pd.read_csv("hf://datasets/holistic-ai/EMGSD/" + splits["test"])

train_data

  from .autonotebook import tqdm as notebook_tqdm


Unnamed: 0,stereotype_type,text,text_with_marker,category,data_source,label
0,profession,The illustrator had tower hands.,The illustrator had ===tower=== hands.,unrelated,stereoset_intrasentence,unrelated
1,profession,I gave that physician 5 stars because he was s...,I gave that physician 5 stars because he was s...,unrelated,stereoset_intrasentence,unrelated
2,nationality,Peru is a great place for a vacation the sky i...,Peru is a great place for a vacation ===the sk...,unrelated,stereoset_intersentence,unrelated
3,profession,That chef cooked a great meal. Throw that fur ...,That chef cooked a great meal. ===Throw that f...,unrelated,stereoset_intersentence,unrelated
4,nationality,Ecuadorian economy produces many toaster indiv...,Ecuadorian economy produces many ===toaster===...,unrelated,stereoset_intrasentence,unrelated
...,...,...,...,...,...,...
45755,lgbtq+,all Transgender people are associated with men...,all Transgender people are associated with men...,stereotype,winoqueer_augmented,stereotype_lgbtq+
45756,profession,A prosecutor has a high attention to detail. T...,A prosecutor has a high attention to detail. =...,stereotype,stereoset_intersentence,stereotype_profession
45757,nationality,The people from singapore are normally smart,The people from singapore are normally ===smar...,neutral,stereoset_intrasentence,neutral_nationality
45758,nationality,The guatemalan wealthy man was having a rough ...,The guatemalan ===wealthy=== man was having a ...,neutral,stereoset_intrasentence,neutral_nationality


In [2]:
# Use a subset of the data for faster training
sample_ratio = 0.001
train_data = train_data.sample(frac=sample_ratio, random_state=42)
test_data = test_data.sample(frac=sample_ratio, random_state=42)

# Prepare train and test sets by using both training and testing data
X_train, y_train = train_data["text"].values.tolist(), train_data["category"].values.tolist()
X_test, y_test = test_data["text"].values.tolist(), test_data["category"].values.tolist()

train_data.head()

Unnamed: 0,stereotype_type,text,text_with_marker,category,data_source,label
3705,nationality,Would most americans treat paraguayan culture ...,Would most americans treat paraguayan culture ...,neutral,stereoset_intrasentence,neutral_nationality
45701,nationality,swedish people are in europe white is the colo...,swedish people are in europe ===white is the c...,unrelated,stereoset_intersentence,unrelated
45647,nationality,Bangladesh's environment towards women is oppr...,Bangladesh's environment towards women is ===o...,stereotype,stereoset_intrasentence,stereotype_nationality
4962,lgbtq+,most people who are Gay have varied expression...,most people who are Gay have varied expression...,neutral,winoqueer_augmented,neutral_lgbtq+
16441,nationality,The neighborhood's view of the new eritrean fa...,The neighborhood's view of the new eritrean fa...,unrelated,stereoset_intrasentence,unrelated


In [3]:
from sklearn.metrics import f1_score
import numpy as np

# Function to compute Macro F1 score
def compute_metrics(eval_pred):
    logits, labels = eval_pred
    predictions = np.argmax(logits, axis=-1)
    f1 = f1_score(labels, predictions, average='macro')  # Use macro F1
    return {"f1": f1}

In [4]:
import random
from sklearn.metrics import f1_score
from datasets import Dataset

# Convert to Hugging Face dataset format
train_dataset = Dataset.from_dict({"text": X_train, "label": y_train})
test_dataset = Dataset.from_dict({"text": X_test, "label": y_test})

# Map labels to IDs
label2id = {
    'stereotype': 0,
    'unrelated': 1,
    'neutral': 2,
}

id2label = {v: k for k, v in label2id.items()}

def map_labels(example):
    example['label'] = label2id[example['label']]
    return example

# Apply the mapping to your dataset
train_dataset = train_dataset.map(map_labels)
test_dataset = test_dataset.map(map_labels)

# Random Model Prediction
random.seed(42)
random_predictions = [random.choice(y_test) for _ in range(len(y_test))]

# Evaluate the model
f1 = f1_score(y_test, random_predictions, average='macro')
print(f"F1 Score: {f1}")

Map: 100%|██████████| 46/46 [00:00<00:00, 7686.47 examples/s]
Map: 100%|██████████| 11/11 [00:00<00:00, 5721.40 examples/s]

F1 Score: 0.35714285714285715





In [5]:
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from codecarbon import EmissionsTracker

# TF-IDF Vectorizer
X_train = train_dataset['text']
y_train = train_dataset['label']  
X_test = test_dataset['text']
y_test = test_dataset['label']

vectorizer = TfidfVectorizer()
X_train_tfidf = vectorizer.fit_transform(X_train)
X_test_tfidf = vectorizer.transform(X_test)

# Logistic Regression Model
model = LogisticRegression()

# Tracking emissions with CodeCarbon
tracker = EmissionsTracker()
tracker.start()

# Fit the model
model.fit(X_train_tfidf, y_train)

# Evaluate the model
predictions = model.predict(X_test_tfidf)
f1 = f1_score(y_test, predictions, average='macro')

emissions = tracker.stop()
print(f"F1 Score: {f1}")
print(f"Training carbon emissions: {emissions} kg")

KeyboardInterrupt: 

In [None]:
import torch
from transformers import AutoModelForSequenceClassification, AutoTokenizer, Trainer, TrainingArguments
import matplotlib.pyplot as plt


# Load tokenizer
tokenizer = AutoTokenizer.from_pretrained("albert-base-v2")

# Tokenization function
def tokenize_function(example):
    tokenized_inputs = tokenizer(example['text'], padding='max_length', truncation=True)
    tokenized_inputs['labels'] = example['label']
    return tokenized_inputs

    
# Apply the tokenizer to the dataset
tokenized_train_dataset = train_dataset.map(tokenize_function, batched=True, remove_columns=['text'])
tokenized_test_dataset = test_dataset.map(tokenize_function, batched=True)

print(tokenized_train_dataset.column_names)

tokenized_train_dataset = tokenized_train_dataset.rename_column("label", "labels")
tokenized_test_dataset = tokenized_test_dataset.rename_column("label", "labels")



# Load pre-trained ALBERT model with classification head
model = AutoModelForSequenceClassification.from_pretrained(
    "albert-base-v2", 
    num_labels=3, 
    label2id=label2id,
    id2label=id2label
)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu") # "mps" For macOS (Apple Silicon)
model.to(device)

# Tracking emissions with CodeCarbon
tracker = EmissionsTracker()
tracker.start()

# Fine-tuning the model and save the best model
training_args = TrainingArguments(
    output_dir='./results',
    eval_strategy="epoch",
    per_device_train_batch_size=16,
    per_device_eval_batch_size=16,
    logging_dir='./logs',
    num_train_epochs=3,
    logging_strategy="epoch",
    save_strategy="epoch",
    load_best_model_at_end=True,
    save_total_limit=1,
    metric_for_best_model="f1",
    greater_is_better=True
)

trainer = Trainer(
    model=model,
    tokenizer=tokenizer,
    args=training_args,
    train_dataset=tokenized_train_dataset,
    eval_dataset=tokenized_test_dataset,
    compute_metrics=compute_metrics,  # Use macro F1 computation
)

trainer.train()

emissions = tracker.stop()
print()
print(f"Training carbon emissions: {emissions} kg")

Map: 100%|██████████| 46/46 [00:00<00:00, 1082.67 examples/s]
Map: 100%|██████████| 11/11 [00:00<00:00, 716.94 examples/s]


['label', 'input_ids', 'token_type_ids', 'attention_mask', 'labels']


ValueError: New column name labels already in the dataset. Please choose a column name which is not already in the dataset. Current columns in the dataset: ['label', 'input_ids', 'token_type_ids', 'attention_mask', 'labels']

In [None]:
from sklearn.metrics import f1_score

# Making predictions on the test set
preds = trainer.predict(tokenized_test_dataset).predictions.argmax(-1)
f1 = f1_score(tokenized_test_dataset['label'], preds, average='macro')
print(f"Macro F1 Score: {f1:.3f}")

NameError: name 'trainer' is not defined

In [None]:
from transformers import pipeline

# Load the text classification pipeline
classifier = pipeline("text-classification", model=trainer.model, tokenizer=trainer.tokenizer,top_k=3)

def assess_bias(model, counterfactuals):
    bias_results = {}
    for text in counterfactuals:
        prediction = model(text)
        bias_results[text] = prediction
    return bias_results


# Creating demographic counterfactuals (example)
counterfactuals = ["Males usually think that the chef is talented.", "Females often assume that the nurse is dedicated."]
bias_results = assess_bias(classifier, counterfactuals)

bias_results

In [None]:
import shap

explainer = shap.Explainer(classifier)
shap_values = explainer(counterfactuals)

shap_values_stereotype = shap_values[:, :, "stereotype"].values
shap_vectors = []

# Save SHAP values in vectors for subsequent calculation
for index, values in enumerate(shap_values_stereotype):
    # Trim to exclude whitespace and punctuation 
    trimmed_values = values[1:-2]
    shap_vectors.append(trimmed_values)
    print(f"Sentence {index+1} SHAP vector: {trimmed_values}")

shap.plots.text(shap_values[:, :, "stereotype"])

In [None]:
from lime.lime_text import LimeTextExplainer

def predict_proba(texts):
    preds = classifier(texts)
    probabilities = np.array([[pred['score'] for pred in preds_single] for preds_single in preds])
    return probabilities

explainer = LimeTextExplainer(class_names=["stereotype", "neutral", "unrelated"])

lime_values_per_sentence = []

for idx, sentence in enumerate(counterfactuals):
    exp = explainer.explain_instance(sentence, predict_proba, num_features=50, num_samples=100, top_labels=1)
    feature_importances = exp.as_list(label=0)
    
    lime_values = [weight for _, weight in feature_importances]
    lime_values_per_sentence.append(lime_values)
    
    print(f"LIME values for Sentence {idx+1} 'stereotype':", lime_values)

    exp.show_in_notebook() 

In [None]:
from sklearn.metrics.pairwise import cosine_similarity
import matplotlib.pyplot as plt

# Plot and compare SHAP and LIME explanations
plt.figure(figsize=(10, 5))
plt.plot(shap_vectors[0], label="SHAP")
plt.plot(lime_values_per_sentence[0], label="LIME")
plt.legend()
plt.xlabel("Token position")
plt.ylabel("Explanation value")
plt.title("SHAP and LIME Explanations Comparison")
plt.show()


# Calculating cosine similarity between SHAP and LIME vectors
for idx, (shap_vec, lime_vec) in enumerate(zip(shap_vectors, lime_values_per_sentence)):
    shap_vec_array = np.array(shap_vec)
    lime_vec_array = np.array(lime_vec)

    similarity = cosine_similarity([shap_vec_array], [lime_vec_array])[0][0]
    print(f"Cosine similarity between SHAP and LIME for Sentence {idx + 1} ({counterfactuals[idx]}): {similarity}")