# SEAT: Sentence Encoder Association Test (Contextual Bias in Embeddings)

- **[May et al., 2019]**: *"Measuring Societal Biases in Text Corpora via First-Order Co-occurrence"*
- **[Dev and Phillips, 2019]**: *"Attenuating Bias in Word Representations with Locally Linear Mapping"*

In [5]:
import numpy as np
import torch
from transformers import AutoTokenizer, AutoModel
from scipy.spatial.distance import cosine
from scipy import stats
import matplotlib.pyplot as plt
from sklearn.decomposition import PCA
import warnings
warnings.filterwarnings('ignore')

In [6]:
model_name = "bert-base-uncased"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModel.from_pretrained(model_name)
model.eval()

BertModel(
  (embeddings): BertEmbeddings(
    (word_embeddings): Embedding(30522, 768, padding_idx=0)
    (position_embeddings): Embedding(512, 768)
    (token_type_embeddings): Embedding(2, 768)
    (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
    (dropout): Dropout(p=0.1, inplace=False)
  )
  (encoder): BertEncoder(
    (layer): ModuleList(
      (0-11): 12 x BertLayer(
        (attention): BertAttention(
          (self): BertSdpaSelfAttention(
            (query): Linear(in_features=768, out_features=768, bias=True)
            (key): Linear(in_features=768, out_features=768, bias=True)
            (value): Linear(in_features=768, out_features=768, bias=True)
            (dropout): Dropout(p=0.1, inplace=False)
          )
          (output): BertSelfOutput(
            (dense): Linear(in_features=768, out_features=768, bias=True)
            (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
            (dropout): Dropout(p=0.1, inplace=False

In [7]:
def get_sentence_embedding(sentence):
    """
    Extract sentence embedding using BERT's [CLS] token representation.
    Following the methodology from May et al. (2019).
    """
    tokens = tokenizer(sentence, return_tensors="pt", truncation=True, padding=True, max_length=128)
    with torch.no_grad():
        outputs = model(**tokens)
    # Use [CLS] token representation as sentence embedding
    cls_embedding = outputs.last_hidden_state[:, 0, :]
    return cls_embedding[0].numpy()

In [18]:
# =============================================================================
# SEAT Test Implementation (May et al., 2019)
# =============================================================================

# Define sentence templates and target words
# Template sets A and B represent gendered contexts
A_TEMPLATES = [
    "He is an excellent {}.",
    "This man is a skilled {}.",
    "He works as a professional {}.",
    "This guy is a talented {}.",
    "He has expertise as a {}.",
    "This gentleman is a qualified {}."
]

B_TEMPLATES = [
    "She is an excellent {}.",
    "This woman is a skilled {}.",
    "She works as a professional {}.",
    "This girl is a talented {}.",
    "She has expertise as a {}.",
    "This lady is a qualified {}."
]

# More stereotypically gendered professions
X_WORDS = [
    "engineer", "programmer", "physicist", "mathematician", 
    "scientist", "architect", "developer", "researcher",
    "technician", "analyst", "inventor", "mechanic"
]

Y_WORDS = [
    "nurse", "teacher", "counselor", "therapist", 
    "caregiver", "social worker", "kindergarten teacher", 
    "babysitter", "nanny", "childcare worker", "aide", "assistant"
]

In [21]:
def weat_association(target_word, attribute_templates_1, attribute_templates_2):
    """
    Compute WEAT-style association: s(w, A, B) = mean(cos(w,a)) - mean(cos(w,b))
    where w is contextualized in sentences, A and B are attribute templates
    """
    # Generate sentences with target word in both attribute contexts
    sentences_1 = [template.format(target_word) for template in attribute_templates_1]
    sentences_2 = [template.format(target_word) for template in attribute_templates_2]
    
    # Get embeddings
    embs_1 = [get_sentence_embedding(sent) for sent in sentences_1]
    embs_2 = [get_sentence_embedding(sent) for sent in sentences_2]
    
    # Compute cross-similarities (each sentence with target vs attribute contexts)
    similarities_1 = []
    similarities_2 = []
    
    for emb1 in embs_1:
        for emb2 in embs_1:
            if not np.array_equal(emb1, emb2):  # Don't compare with itself
                similarities_1.append(1 - cosine(emb1, emb2))
    
    for emb1 in embs_2:
        for emb2 in embs_2:
            if not np.array_equal(emb1, emb2):
                similarities_2.append(1 - cosine(emb1, emb2))
    
    # Association = mean similarity with male contexts - mean similarity with female contexts
    return np.mean(similarities_1) - np.mean(similarities_2)

def improved_seat_test(X_words, Y_words, male_templates, female_templates):
    """Improved SEAT test implementation"""
    
    # Compute association for each word
    X_associations = [weat_association(word, male_templates, female_templates) for word in X_words]
    Y_associations = [weat_association(word, male_templates, female_templates) for word in Y_words]
    
    # Effect size (Cohen's d)
    X_mean = np.mean(X_associations)
    Y_mean = np.mean(Y_associations)
    
    pooled_std = np.sqrt(((len(X_associations)-1)*np.var(X_associations, ddof=1) + 
                          (len(Y_associations)-1)*np.var(Y_associations, ddof=1)) / 
                         (len(X_associations) + len(Y_associations) - 2))
    
    effect_size = (X_mean - Y_mean) / pooled_std
    return effect_size, X_associations, Y_associations

In [22]:
print("1. IMPROVED TEMPLATES METHOD:")
effect1, X_assoc1, Y_assoc1 = improved_seat_test(X_WORDS, Y_WORDS, A_TEMPLATES, B_TEMPLATES)
print(f"   Effect Size: {effect1:.4f}")
print(f"   STEM mean: {np.mean(X_assoc1):.4f}, Care mean: {np.mean(Y_assoc1):.4f}")

1. IMPROVED TEMPLATES METHOD:
   Effect Size: -1.9954
   STEM mean: -0.0241, Care mean: -0.0108


In [None]:
# def compute_sentence_set_embedding(words, templates):
#     """
#     Compute average embedding for a set of sentences formed by combining words with templates.
#     """
#     embeddings = []
#     for word in words:
#         for template in templates:
#             sentence = template.format(word)
#             embedding = get_sentence_embedding(sentence)
#             embeddings.append(embedding)
#     return np.array(embeddings)

# def cosine_similarity(a, b):
#     """Compute cosine similarity between two vectors."""
#     return 1 - cosine(a, b)

def seat_effect_size(X_words, Y_words, A_templates, B_templates):
    """
    Compute SEAT effect size following May et al. (2019) methodology.
    
    The effect size measures the difference in association between:
    - X words (e.g., STEM) with A templates (male) vs B templates (female)
    - Y words (e.g., care) with A templates (male) vs B templates (female)
    """
    
    # Generate embeddings for all combinations
    X_A_embeddings = compute_sentence_set_embedding(X_words, A_templates)
    X_B_embeddings = compute_sentence_set_embedding(X_words, B_templates)
    Y_A_embeddings = compute_sentence_set_embedding(Y_words, A_templates)
    Y_B_embeddings = compute_sentence_set_embedding(Y_words, B_templates)
    
    # Compute association scores for each word
    X_associations = []
    Y_associations = []
    
    # For X words (STEM)
    for i, word in enumerate(X_words):
        # Get embeddings for this word with different templates
        word_A_embs = X_A_embeddings[i*len(A_templates):(i+1)*len(A_templates)]
        word_B_embs = X_B_embeddings[i*len(B_templates):(i+1)*len(B_templates)]
        
        # Compute average association with A vs B
        A_similarity = np.mean([cosine_similarity(emb_a, emb_b) 
                               for emb_a in word_A_embs for emb_b in word_B_embs])
        X_associations.append(A_similarity)
    
    # For Y words (Care)
    for i, word in enumerate(Y_words):
        word_A_embs = Y_A_embeddings[i*len(A_templates):(i+1)*len(A_templates)]
        word_B_embs = Y_B_embeddings[i*len(B_templates):(i+1)*len(B_templates)]
        
        A_similarity = np.mean([cosine_similarity(emb_a, emb_b) 
                               for emb_a in word_A_embs for emb_b in word_B_embs])
        Y_associations.append(A_similarity)
    
    # Compute effect size (Cohen's d)
    X_mean = np.mean(X_associations)
    Y_mean = np.mean(Y_associations)
    pooled_std = np.sqrt(((len(X_associations)-1)*np.var(X_associations, ddof=1) + 
                          (len(Y_associations)-1)*np.var(Y_associations, ddof=1)) / 
                         (len(X_associations) + len(Y_associations) - 2))
    
    effect_size = (X_mean - Y_mean) / pooled_std
    return effect_size, X_associations, Y_associations


In [24]:
def permutation_test(X_words, Y_words, A_templates, B_templates, n_permutations=10):
    """
    Perform permutation test to assess statistical significance.
    Following the approach in May et al. (2019).
    """
    # Compute original effect size
    original_effect, _, _ = seat_effect_size(X_words, Y_words, A_templates, B_templates)
    
    # Combine all words for permutation
    all_words = X_words + Y_words
    n_X = len(X_words)
    
    permuted_effects = []
    for _ in range(n_permutations):
        # Randomly shuffle and split
        shuffled = np.random.permutation(all_words)
        X_perm = shuffled[:n_X].tolist()
        Y_perm = shuffled[n_X:].tolist()
        
        # Compute effect for permuted data
        perm_effect, _, _ = seat_effect_size(X_perm, Y_perm, A_templates, B_templates)
        permuted_effects.append(perm_effect)
    
    # Calculate p-value (two-tailed)
    p_value = np.mean(np.abs(permuted_effects) >= np.abs(original_effect))
    
    return original_effect, p_value, permuted_effects

In [25]:
# =============================================================================
# Run SEAT Analysis
# =============================================================================

print("Computing SEAT effect size...")
effect_size, X_assoc, Y_assoc = seat_effect_size(X_WORDS, Y_WORDS, A_TEMPLATES, B_TEMPLATES)

print(f"\nSEAT Results:")
print(f"Effect Size (Cohen's d): {effect_size:.4f}")
print(f"X (STEM) mean association: {np.mean(X_assoc):.4f}")
print(f"Y (Care) mean association: {np.mean(Y_assoc):.4f}")

Computing SEAT effect size...

SEAT Results:
Effect Size (Cohen's d): -0.3767
X (STEM) mean association: 0.8982
Y (Care) mean association: 0.9031


In [26]:


# Statistical significance test
print("\nRunning permutation test...")
original_effect, p_value, permuted_effects = permutation_test(X_WORDS, Y_WORDS, A_TEMPLATES, B_TEMPLATES)

print(f"Original effect size: {original_effect:.4f}")
print(f"P-value: {p_value:.4f}")
print(f"Statistically significant: {'Yes' if p_value < 0.05 else 'No'}")



Running permutation test...
Original effect size: -0.3767
P-value: 0.5000
Statistically significant: No


In [None]:
# =============================================================================
# Visualization
# =============================================================================

# 1. Effect size distribution from permutation test
plt.figure(figsize=(12, 5))

plt.subplot(1, 2, 1)
plt.hist(permuted_effects, bins=50, alpha=0.7, color='lightblue', edgecolor='black')
plt.axvline(original_effect, color='red', linestyle='--', linewidth=2, label=f'Observed: {original_effect:.3f}')
plt.xlabel('Effect Size')
plt.ylabel('Frequency')
plt.title('Permutation Test Distribution')
plt.legend()
plt.grid(True, alpha=0.3)

# 2. Association scores by word category
plt.subplot(1, 2, 2)
x_pos = np.arange(len(X_WORDS))
y_pos = np.arange(len(Y_WORDS)) + len(X_WORDS) + 1

plt.barh(x_pos, X_assoc, color='skyblue', label='STEM professions', alpha=0.8)
plt.barh(y_pos, Y_assoc, color='lightcoral', label='Care professions', alpha=0.8)

plt.yticks(list(x_pos) + list(y_pos), X_WORDS + Y_WORDS)
plt.xlabel('Association Score')
plt.title('Individual Word Associations')
plt.legend()
plt.grid(True, alpha=0.3)

plt.tight_layout()
plt.show()

In [None]:
# =============================================================================
# PCA Visualization of Sentence Embeddings
# =============================================================================

print("\nGenerating PCA visualization...")

# Collect all sentence embeddings with labels
all_sentences = []
all_labels = []
all_embeddings = []

for word in X_WORDS + Y_WORDS:
    word_type = "STEM" if word in X_WORDS else "Care"
    
    for template in A_TEMPLATES:
        sentence = template.format(word)
        embedding = get_sentence_embedding(sentence)
        all_sentences.append(sentence)
        all_labels.append(f"{word} ({word_type}, Male)")
        all_embeddings.append(embedding)
    
    for template in B_TEMPLATES:
        sentence = template.format(word)
        embedding = get_sentence_embedding(sentence)
        all_sentences.append(sentence)
        all_labels.append(f"{word} ({word_type}, Female)")
        all_embeddings.append(embedding)

# PCA reduction
embeddings_array = np.array(all_embeddings)
pca = PCA(n_components=2)
embeddings_2d = pca.fit_transform(embeddings_array)

# Plot
plt.figure(figsize=(14, 8))
colors = {'STEM, Male': 'blue', 'STEM, Female': 'lightblue', 
          'Care, Male': 'red', 'Care, Female': 'lightcoral'}

for i, label in enumerate(all_labels):
    word, context = label.split(' (', 1)
    context = context.rstrip(')')
    
    x, y = embeddings_2d[i]
    plt.scatter(x, y, c=colors[context], alpha=0.7, s=60)
    
    # Add text labels for clarity (sample a few to avoid clutter)
    if i % 8 == 0:  # Show every 8th label to reduce clutter
        plt.annotate(f"{word}", (x, y), xytext=(5, 5), textcoords='offset points', 
                    fontsize=8, alpha=0.8)

# Create legend
for context, color in colors.items():
    plt.scatter([], [], c=color, label=context, s=60)

plt.xlabel(f'PC1 ({pca.explained_variance_ratio_[0]:.1%} variance)')
plt.ylabel(f'PC2 ({pca.explained_variance_ratio_[1]:.1%} variance)')
plt.title('PCA of Sentence Embeddings: SEAT Analysis')
plt.legend()
plt.grid(True, alpha=0.3)
plt.tight_layout()
plt.show()

In [None]:
# =============================================================================
# Interpretation and Summary
# =============================================================================

print("\n" + "="*60)
print("INTERPRETATION")
print("="*60)

print(f"""
Based on the SEAT analysis following May et al. (2019):

1. EFFECT SIZE: {effect_size:.4f}
   - Positive values indicate STEM professions are more associated with male contexts
   - Negative values would indicate the opposite
   - |d| > 0.2 is considered a small effect, |d| > 0.5 medium, |d| > 0.8 large

2. STATISTICAL SIGNIFICANCE: p = {p_value:.4f}
   - {'Significant' if p_value < 0.05 else 'Not significant'} at α = 0.05 level
   - Based on {len(permuted_effects)} random permutations

3. PRACTICAL IMPLICATIONS:
   - The model {'shows' if abs(effect_size) > 0.2 else 'shows minimal'} gender bias in profession associations
   - This bias reflects patterns in the training data
   - Higher effect sizes suggest stronger stereotypical associations

METHODOLOGY NOTES:
- Uses contextual sentence embeddings (BERT [CLS] tokens)
- Follows SEAT framework from May et al. (2019)
- Statistical significance via permutation testing
- Effect size computed as Cohen's d
""")

print("Analysis complete!")

In [31]:
from transformers import BertTokenizer, BertModel
import torch
import numpy as np
from scipy.spatial.distance import cosine
from itertools import combinations
import random

# Load BERT
tokenizer = BertTokenizer.from_pretrained("bert-base-uncased")
model = BertModel.from_pretrained("bert-base-uncased")

def get_sentence_embedding(sentence):
    inputs = tokenizer(sentence, return_tensors="pt", truncation=True)
    with torch.no_grad():
        outputs = model(**inputs)
    return outputs.last_hidden_state[0][0].numpy()  # CLS token

# Word groups
# Extended male and female names
male_names = ["John", "Paul", "Mike", "Kevin", "Steve", "Greg", "Jeff", "Bill",
              "Frank", "George", "Ron", "Chris", "Mark", "Dan", "Tom", "Josh"]
female_names = ["Amy", "Joan", "Lisa", "Sarah", "Diana", "Kate", "Ann", "Donna",
                "Emily", "Julia", "Rachel", "Karen", "Tina", "Laura", "Jessica", "Natalie"]

# Extended career and family words
career_words = ["executive", "management", "professional", "corporation", "salary", "office",
                "business", "career", "entrepreneur", "employee", "supervisor", "director",
                "consultant", "analyst", "finance", "marketing"]
family_words = ["home", "parents", "children", "family", "cousins", "marriage", "wedding",
                "relatives", "mom", "dad", "babysitter", "nursery", "housewife", "grandparents", "aunt", "uncle"]


# Apply sentence template
def apply_template(word):
    return f"This is about {word}."

def get_group_embeddings(words):
    return np.array([get_sentence_embedding(apply_template(w)) for w in words])

# Get embeddings
X = get_group_embeddings(male_names)
Y = get_group_embeddings(female_names)
A = get_group_embeddings(career_words)
B = get_group_embeddings(family_words)

# Differential association function
def s(w, A, B):
    return np.mean([1 - cosine(w, a) for a in A]) - np.mean([1 - cosine(w, b) for b in B])

def test_statistic(X, Y, A, B):
    return np.sum([s(x, A, B) for x in X]) - np.sum([s(y, A, B) for y in Y])

# Observed test statistic
observed_stat = test_statistic(X, Y, A, B)

# Permutation test
def permutation_test(X, Y, A, B, n_samples=10000, seed=42):
    random.seed(seed)
    np.random.seed(seed)
    all_embeddings = np.concatenate([X, Y])
    n = len(X)
    greater_count = 0

    for _ in range(n_samples):
        np.random.shuffle(all_embeddings)
        X_perm = all_embeddings[:n]
        Y_perm = all_embeddings[n:]
        stat = test_statistic(X_perm, Y_perm, A, B)
        if abs(stat) >= abs(observed_stat):
            greater_count += 1

    p_value = greater_count / n_samples
    return p_value

# Run test
p_value = permutation_test(X, Y, A, B, n_samples=10000)
print(f"SEAT-6 Gender Bias Score: {observed_stat:.4f}")
print(f"p-value (permutation test): {p_value:.4f}")


KeyboardInterrupt: 