<a href="https://colab.research.google.com/github/amelierueeck/ULM-25-authorship-profiling/blob/main/src/BERT_probing.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Code from Polina

In [None]:
# !pip install spacy ftfy==4.4.3
# !python -m spacy download en

In [None]:
import torch
from transformers import BertTokenizer, BertModel
from transformers import AutoTokenizer, AutoModel
import numpy as np
import pandas as pd
import sys
import os
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score

In [None]:
if torch.cuda.is_available():
    device = torch.device('cuda')
elif torch.backends.mps.is_available():
    device = torch.device("mps")
else:
    device = torch.device('cpu')

[more here - in Polina's notebook: getting the model and the tokenizer, getting sentence representations, getting data]

In [None]:
#get data (put files into colab folder)

# load splits
train_df = pd.read_csv("data_train.csv")
val_df   = pd.read_csv("data_val.csv")
test_df  = pd.read_csv("data_test.csv")

print(train_df.head())

                                                text  age  gender
0  urlLink Greenbelt   is a large Christian festi...    4    male
1  urlLink  Take the Purrsonality Quiz!    ohmygo...    2  female
2       Aine's Attempt at Blogging...prepare to hide    0  female
3  I was wondering how someone gets in the circus...    4  female
4  Though I love you so much,  I want you to know...    2  female


In [None]:
#load pretrained BERT
model_name = "bert-base-uncased"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModel.from_pretrained(model_name, output_hidden_states=True).to(device)
model.eval()

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json:   0%|          | 0.00/48.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/570 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/440M [00:00<?, ?B/s]

BertModel(
  (embeddings): BertEmbeddings(
    (word_embeddings): Embedding(30522, 768, padding_idx=0)
    (position_embeddings): Embedding(512, 768)
    (token_type_embeddings): Embedding(2, 768)
    (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
    (dropout): Dropout(p=0.1, inplace=False)
  )
  (encoder): BertEncoder(
    (layer): ModuleList(
      (0-11): 12 x BertLayer(
        (attention): BertAttention(
          (self): BertSdpaSelfAttention(
            (query): Linear(in_features=768, out_features=768, bias=True)
            (key): Linear(in_features=768, out_features=768, bias=True)
            (value): Linear(in_features=768, out_features=768, bias=True)
            (dropout): Dropout(p=0.1, inplace=False)
          )
          (output): BertSelfOutput(
            (dense): Linear(in_features=768, out_features=768, bias=True)
            (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
            (dropout): Dropout(p=0.1, inplace=False

In [None]:
#get the activations for each layer (length: 13)
def get_activations(texts, batch_size=16, max_len=256):
    """Return a list of numpy arrays: one array per layer, shape [N, hidden_size]."""
    all_layers = [ [] for _ in range(13) ]  #prepares list with one empty list per layer, 0=embeddings, 12=last layer

    for i in range(0, len(texts), batch_size):
        batch = texts[i:i+batch_size]
        inputs = tokenizer(batch, return_tensors="pt", truncation=True,
                           padding=True, max_length=max_len).to(device)
        with torch.no_grad():
            outputs = model(**inputs)
            hidden_states = outputs.hidden_states  #tuple of 13 [batch size, sequence length, hidden size 768]

        #take [CLS] token (index 0) from each layer
        for layer_idx, layer_hid in enumerate(hidden_states):
            cls_reps = layer_hid[:, 0, :].cpu().numpy()
            all_layers[layer_idx].append(cls_reps)

    #concatenate batches
    all_layers = [ np.concatenate(layer_parts, axis=0) for layer_parts in all_layers ]
    return all_layers #returns list of 13 numpy arrays (one per layer)

In [None]:
X_train_layers = get_activations(train_df["text"].tolist())
X_val_layers   = get_activations(val_df["text"].tolist())
X_test_layers  = get_activations(test_df["text"].tolist())

In [None]:
#gender:
y_train_gender = train_df["gender"].values
y_val_gender   = val_df["gender"].values
y_test_gender  = test_df["gender"].values

In [None]:
#age:
y_train_age = train_df["age"].values
y_val_age   = val_df["age"].values
y_test_age  = test_df["age"].values

In [None]:
#define probing function

def train_probe(X_train, y_train, X_val, y_val):
    clf = LogisticRegression(max_iter=5000, class_weight="balanced", multi_class="auto")
    clf.fit(X_train, y_train)
    y_pred = clf.predict(X_val)
    return accuracy_score(y_val, y_pred), clf

In [None]:
#run probes for gender:

results_gender = []
for layer_idx in range(13):
    acc, _ = train_probe(
        X_train_layers[layer_idx], y_train_gender,
        X_test_layers[layer_idx], y_test_gender
    )
    results_gender.append(acc)
    print(f"[Gender] Layer {layer_idx}: probe accuracy = {acc:.3f}")

In [None]:
results_age = []
for layer_idx in range(13):
    acc, _ = train_probe(
        X_train_layers[layer_idx], y_train_age,
        X_test_layers[layer_idx], y_test_age
    )
    results_age.append(acc)
    print(f"[Age] Layer {layer_idx}: probe accuracy = {acc:.3f}")

In [None]:
import matplotlib.pyplot as plt

plt.plot(range(13), results_gender, marker="o", label="Gender")
plt.plot(range(13), results_age, marker="s", label="Age")
plt.xlabel("Layer")
plt.ylabel("Probe Accuracy")
plt.title("Probing Pretrained BERT for Gender vs Age")
plt.legend()
plt.show()

**Simple linear classifier**

In [None]:
class Classifier(torch.nn.Module):

  def __init__(self, input_dim, output_dim):
    super(Classifier, self).__init__()
    self.linear = torch.nn.Linear(input_dim, output_dim)

  def forward(self, input):
    return self.linear(input)

def build_classifier(emb_dim, num_labels, device):
  classifier = Classifier(emb_dim, num_lables).to(device)
  criterion = torch.nn.CrossEntropyLoss().to(device)
  optimizer = torch.optim.Adam(classifier.paramters())

  return classifier, criterion, optimizer