In [125]:
#model (debiased)
#finetuning + debiasing integrated
#it debias all tokens
import torch
from transformers import BertTokenizer, BertForMaskedLM, AdamW
from torch.utils.data import DataLoader, Dataset
import pandas as pd
from tqdm import tqdm

#load and norm. warmth/competence vectors
warmth_vector = pd.read_csv('/Users/aleksandragarbat/Desktop/Thesis/warmth_direction.csv').values.flatten()
competence_vector = pd.read_csv('/Users/aleksandragarbat/Desktop/Thesis/competence_direction.csv').values.flatten()

warmth_vector = torch.tensor(warmth_vector, dtype=torch.float32)
competence_vector = torch.tensor(competence_vector, dtype=torch.float32)

warmth_vector = warmth_vector / warmth_vector.norm()
competence_vector = competence_vector / competence_vector.norm()

#bias subspace setup
def gram_schmidt(vectors):
    ortho = []
    for v in vectors:
        for u in ortho:
            v = v - torch.dot(v, u) * u
        v = v / torch.norm(v)
        ortho.append(v)
    return ortho

g0, g1 = gram_schmidt([warmth_vector, competence_vector])
components = [g0, g1]
weights = [0.6, 0.4] #there weights of either w or c can be adjusted and accounted for in debiasing

#debiasing function
def debias_custom(h, components, weights, n_mask=None):
    debiased = h.clone()
    for i, g in enumerate(components):
        weight = weights[i]
        n = n_mask[i] if n_mask is not None else 1
        debiased -= weight * n * torch.dot(h, g) * g
    return debiased

#custom dataset for MLM (this sample was created based on descriptions from O'net job descriptins)
class MLM_Dataset(Dataset):
    def __init__(self, texts, tokenizer, max_len=128):
        self.texts = texts
        self.tokenizer = tokenizer
        self.max_len = max_len

    def __len__(self):
        return len(self.texts)

    def __getitem__(self, item):
        text = self.texts[item]
        encoding = self.tokenizer.encode_plus(
            text,
            add_special_tokens=True,
            max_length=self.max_len,
            padding="max_length",
            truncation=True,
            return_tensors="pt"
        )
        input_ids = encoding["input_ids"].flatten()
        attention_mask = encoding["attention_mask"].flatten()
        return {
            "input_ids": input_ids,
            "attention_mask": attention_mask,
            "labels": input_ids
        }

#updating BERT model with optional full-sequence debiasing
class DebiasedBertForMaskedLM(BertForMaskedLM):
    def __init__(self, config, components, weights, tokenizer, debias_all_tokens=True):
        super().__init__(config)
        self.components = components
        self.weights = weights
        self.tokenizer = tokenizer
        self.debias_all_tokens = debias_all_tokens

    def forward(self, input_ids, attention_mask=None, token_type_ids=None, labels=None):
        outputs = self.bert(
            input_ids,
            attention_mask=attention_mask,
            token_type_ids=token_type_ids,
            return_dict=True
        )

        sequence_output = outputs.last_hidden_state
        debiased_sequence_output = sequence_output.clone()

        if self.debias_all_tokens:
            # Debias all tokens
            for batch_idx in range(input_ids.size(0)):
                for token_idx in range(input_ids.size(1)):
                    h = sequence_output[batch_idx, token_idx]
                    debiased_sequence_output[batch_idx, token_idx] = debias_custom(h, self.components, self.weights)
        else:
            #only debias [mask] tokens (not needed)
            mask_token_index = (input_ids == self.tokenizer.mask_token_id)
            for batch_idx in range(input_ids.size(0)):
                for token_idx in torch.where(mask_token_index[batch_idx])[0]:
                    h = sequence_output[batch_idx, token_idx]
                    debiased_sequence_output[batch_idx, token_idx] = debias_custom(h, self.components, self.weights)

        prediction_scores = self.cls(debiased_sequence_output)

        loss = None
        if labels is not None:
            loss_fct = torch.nn.CrossEntropyLoss()
            loss = loss_fct(prediction_scores.view(-1, self.config.vocab_size), labels.view(-1))

        return {"loss": loss, "logits": prediction_scores}

#fine-tuning loop
def train_finetune(model, train_dataset, tokenizer, epochs=3, batch_size=8, learning_rate=5e-5):
    model.train()
    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
    optimizer = AdamW(model.parameters(), lr=learning_rate)

    for epoch in range(epochs):
        epoch_loss = 0
        for batch in tqdm(train_loader, desc=f"Training Epoch {epoch + 1}/{epochs}"):
            input_ids = batch["input_ids"].to(model.device)
            attention_mask = batch["attention_mask"].to(model.device)
            labels = batch["labels"].to(model.device)

            optimizer.zero_grad()
            outputs = model(input_ids, attention_mask=attention_mask, labels=labels)
            loss = outputs['loss']
            loss.backward()
            optimizer.step()
            epoch_loss += loss.item()

        print(f"Epoch {epoch + 1}, Loss: {epoch_loss / len(train_loader)}")

# === Setup tokenizer and model ===
tokenizer = BertTokenizer.from_pretrained("bert-base-uncased")

debias_model = DebiasedBertForMaskedLM.from_pretrained(
    "bert-base-uncased", 
    components=components, 
    weights=weights,
    tokenizer=tokenizer,
    debias_all_tokens=True 
)

#sample training texts with masks 
train_texts = [
    "[MASK] conduct subsurface surveys to identify the characteristics of potential land or mining development sites.",
    "[MASK] conduct research on nuclear engineering projects or apply principles and theory of nuclear science.",
    "[MASK] devise methods to improve oil and gas extraction and production.",
    "[MASK] all engineers not listed separately.",
    "[MASK] design, develop, or evaluate energy-related projects or programs.",
    "[MASK] research, design, develop, or test automation, intelligent systems, smart devices, or industrial systems.",
    "[MASK] research, design, develop, or test microelectromechanical systems.",
    "[MASK] design technologies specializing in light information or light energy.",
    "[MASK] research, design, develop, or test robotic applications.",
    "[MASK] design, develop, or supervise the production of materials, devices, or systems of unique composition.",
    "[MASK] design underground or overhead wind farm collector systems.",
    "[MASK] perform site-specific engineering analysis or evaluation of solar projects."
]

train_dataset = MLM_Dataset(train_texts, tokenizer)

#fine-tuning model
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
debias_model.to(device)
train_finetune(debias_model, train_dataset, tokenizer, epochs=3, batch_size=8, learning_rate=5e-5)

#saving fine-tuned debiased model
debias_model.save_pretrained("/Users/aleksandragarbat/Desktop/BERT_finetuned")
tokenizer.save_pretrained("/Users/aleksandragarbat/Desktop/BERT_finetuned")

#saving for reuse
torch.save({
    'components': components,
    'weights': weights
}, '/Users/aleksandragarbat/Desktop/BERT_finetuned/bias_vectors.pt')


  **scheduler_specific_kwargs,
Training Epoch 1/3: 100%|█████████████████████████| 2/2 [00:06<00:00,  3.48s/it]


Epoch 1, Loss: 14.265144348144531


Training Epoch 2/3: 100%|█████████████████████████| 2/2 [00:06<00:00,  3.16s/it]


Epoch 2, Loss: 9.34230375289917


Training Epoch 3/3: 100%|█████████████████████████| 2/2 [00:06<00:00,  3.00s/it]


Epoch 3, Loss: 6.929811954498291


In [127]:
#how to access model and get predictions
#loading bias components and weightsf (as specified above)
bias_data = torch.load('/Users/aleksandragarbat/Desktop/BERT_finetuned/bias_vectors.pt')
components = bias_data['components']
weights = bias_data['weights']

#loadingtokenizer
tokenizer = BertTokenizer.from_pretrained("/Users/aleksandragarbat/Desktop/BERT_finetuned")

#model
model = DebiasedBertForMaskedLM.from_pretrained(
    "/Users/aleksandragarbat/Desktop/BERT_finetuned",
    components=components,
    weights=weights,
    tokenizer=tokenizer,
    debias_all_tokens=True  # or False, depending on your use case
)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.eval()
model.to(device)

professions = [
    'accountant', 'actor', 'architect', 'artist', 'bank teller', 'bartender', 
    'bus driver', 'cashier', 'c.e.o.', 'chef', 'childcare worker', 'computer programmer', 
    'construction worker', 'custodian', 'customer service representative', 'dentist', 
    'director', 'doctor', 'electrician', 'engineer', 'factory worker', 'farmer', 
    'financial advisor', 'firefighter', 'fisherman', 'garbage collector', 'graphic designer', 
    'lab technician', 'landscaper', 'lawyer', 'librarian', 'maid', 'manager', 'mechanic', 
    'medical assistant', 'musician', 'news anchor', 'nurse', 'paramedic', 'pilot', 'plumber', 
    'police', 'politician', 'postal worker', 'professor', 'psychiatrist', 'salesperson', 
    'scientist', 'security guard', 'secretary', 'taxi driver', 'teacher', 'technical support worker', 
    'truck driver', 'vet', 'waiter', 'welder', 'writer'
]

#input sentences related to w and c (created based on sets of warmth and competent associated words)
input_texts = ["The [MASK] was always affectionate towards others.",
    "Everyone described the [MASK] as agreeable and kind.",
    "The [MASK] performed an altruistic act without hesitation.",
    "The [MASK] was known for being amicable and helpful.",
    "The [MASK] was beneficent in all their actions.",
    "The [MASK] showed benevolence during the crisis.",
    "The [MASK] comforted the child with a caring gesture.",
    "The [MASK] was civil even under stress.",
    "The [MASK] responded with compassion.",
    "The [MASK] worked cooperatively with everyone.",
    "The [MASK] was fair to all team members.",
    "The [MASK] was faithful to their responsibilities.",
    "The [MASK] forgave the mistake without a second thought.",
    "The [MASK] always greeted us with friendliness.",
    "The [MASK] was generous with their time.",
    "The [MASK] was genuinely concerned about others.",
    "The [MASK] was always helpful when needed.",
    "The [MASK] displayed honesty in every situation.",
    "The [MASK] welcomed everyone into the group.",
    "The [MASK] was polite during the entire conversation.",
    "The [MASK] was responsible and reliable.",
    "The [MASK] acted with unselfish intentions.",
    "The [MASK] was known for being tender and thoughtful.",
    "The [MASK] was understanding toward their coworkers.",
    "The [MASK] treated everyone with justice and fairness.",
    "The [MASK] solved the problem efficiently.",
    "The [MASK] tackled the technical issue with intelligence.",
    "The [MASK] confidently presented the final report.",
    "The [MASK] is known for being highly competent and reliable.",
    "Everyone admired how the [MASK] handled the pressure.",
    "The [MASK] used logical reasoning to find the solution.",
    "Only a skilled [MASK] could have debugged the system so quickly.",
    "The [MASK] designed the system with remarkable precision.",
    "The [MASK] made an effective plan under tight deadlines.",
    "Our team needed a knowledgeable [MASK] to lead the project.",
    "The [MASK] demonstrated excellent problem-solving abilities.",
    "She’s a meticulous [MASK] with great attention to detail.",
    "A creative [MASK] can always find new approaches.",
    "The [MASK] worked efficiently and completed the task early.",
    "Thanks to the [MASK], the system ran without errors.",
    "The [MASK] applied smart strategies to optimize performance.",
    "The [MASK] had the practical skills needed for the task.",
    "He proved to be a resilient and determined [MASK].",
    "The [MASK] worked independently and stayed motivated.",
    "With a shrewd mind, the [MASK] negotiated the deal."
]

#evaluating model on each sentence
for input_text in input_texts:
    print(f"\nInput: {input_text}")
    
    #tokenize
    inputs = tokenizer(input_text, return_tensors="pt").to(device)

    #forward pass
    with torch.no_grad():
        outputs = model(**inputs)
    logits = outputs['logits']


    #finding masked token index
    masked_index = (inputs.input_ids == tokenizer.mask_token_id).nonzero(as_tuple=True)[1]

    #scoring professions
    predictions = []
    for profession in professions:
        profession_ids = tokenizer.encode(profession, add_special_tokens=False)
        #logits for multi-token professions= approximation
        profession_logit = sum(logits[0, masked_index, token_id].item() for token_id in profession_ids)
        predictions.append((profession, profession_logit))

    #sorting and display top professions
    predictions.sort(key=lambda x: x[1], reverse=True)
    top_k = 5
    print(f"Top {top_k} predicted professions:")
    for profession, logit in predictions[:top_k]:
        print(f"{profession}: {logit:.2f}")

#the logits are not the best approch for this due to problems with multi-token professions so this has to be adjusted (and interpterability as weel)


Input: The [MASK] was always affectionate towards others.
Top 5 predicted professions:
childcare worker: 11.62
customer service representative: 8.16
security guard: 7.81
doctor: 6.06
artist: 5.64

Input: Everyone described the [MASK] as agreeable and kind.
Top 5 predicted professions:
customer service representative: 9.80
childcare worker: 9.45
security guard: 7.08
news anchor: 7.02
doctor: 6.18

Input: The [MASK] performed an altruistic act without hesitation.
Top 5 predicted professions:
childcare worker: 12.85
security guard: 10.24
customer service representative: 9.21
doctor: 7.49
taxi driver: 7.21

Input: The [MASK] was known for being amicable and helpful.
Top 5 predicted professions:
customer service representative: 11.73
security guard: 7.78
childcare worker: 7.03
director: 6.39
doctor: 6.31

Input: The [MASK] was beneficent in all their actions.
Top 5 predicted professions:
security guard: 9.29
customer service representative: 6.51
childcare worker: 6.21
police: 5.04
factory 