In [1]:
!nvidia-smi

Tue May 13 08:10:50 2025       
+-----------------------------------------------------------------------------------------+
| NVIDIA-SMI 550.54.15              Driver Version: 550.54.15      CUDA Version: 12.4     |
|-----------------------------------------+------------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id          Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |           Memory-Usage | GPU-Util  Compute M. |
|                                         |                        |               MIG M. |
|   0  Tesla T4                       Off |   00000000:00:04.0 Off |                    0 |
| N/A   71C    P8             14W /   70W |       0MiB /  15360MiB |      0%      Default |
|                                         |                        |                  N/A |
+-----------------------------------------+------------------------+----------------------+
                                                

In [None]:
# Install dependencies
!pip install transformers datasets scikit-learn
!pip install sympy --upgrade
!pip install datasets --upgrade

In [91]:
# Download anonymized_text.csv from GitHub
!curl -L -o anonymized_text.csv https://raw.githubusercontent.com/e-strauss/AnonymizationXAI/master/DB-bio/anonymized_text_train.csv

  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
100 2052k  100 2052k    0     0  4046k      0 --:--:-- --:--:-- --:--:-- 4040k


In [92]:
import pandas as pd
from datasets import Dataset
from transformers import BertTokenizer, BertForSequenceClassification, TrainingArguments, Trainer
from sklearn.model_selection import train_test_split
import torch

In [93]:
# Load dataset
df = pd.read_csv("/content/anonymized_text.csv")
df = df.rename(columns={"anonymized": "label"})
df = df[["text", "label"]]

test_size = 0.3

# train/test split
train_texts, val_texts, train_labels, val_labels = train_test_split(
    df["text"].tolist(), df["label"].tolist(), test_size=test_size, random_state=42
)

train_dataset = Dataset.from_dict({"text": train_texts, "label": train_labels})
val_dataset = Dataset.from_dict({"text": val_texts, "label": val_labels})

In [94]:
df.shape, df.shape[0] * (1 - test_size)

((1452, 2), 1016.4)

In [95]:
tokenizer = BertTokenizer.from_pretrained("bert-base-uncased")
def tokenize(example):
    return tokenizer(example["text"], padding="max_length", truncation=True, max_length=128)


In [96]:
train_dataset = train_dataset.map(tokenize, batched=True)
val_dataset = val_dataset.map(tokenize, batched=True)

# Remove text column after tokenization
train_dataset = train_dataset.remove_columns(["text"])
val_dataset = val_dataset.remove_columns(["text"])

# Set format for PyTorch
train_dataset.set_format("torch")
val_dataset.set_format("torch")

Map:   0%|          | 0/1016 [00:00<?, ? examples/s]

Map:   0%|          | 0/436 [00:00<?, ? examples/s]

In [97]:
# Load model
model = BertForSequenceClassification.from_pretrained("bert-base-uncased", num_labels=2)

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [98]:
from sklearn.metrics import accuracy_score, precision_recall_fscore_support

# Metrics function
def compute_metrics(eval_pred):
    logits, labels = eval_pred
    preds = torch.argmax(torch.tensor(logits), axis=1)
    acc = accuracy_score(labels, preds)
    precision, recall, f1, _ = precision_recall_fscore_support(labels, preds, average='binary')
    correct = (preds == torch.tensor(labels)).sum().item()
    total = len(labels)
    print(f"\nCorrect predictions: {correct} / {total}")
    return {
        "accuracy": acc,
        "precision": precision,
        "recall": recall,
        "f1": f1,
    }

In [99]:
# Training arguments
training_args = TrainingArguments(
    output_dir="./results",
    num_train_epochs=1,
    per_device_train_batch_size=8,
    per_device_eval_batch_size=16,
    learning_rate=5e-6,
    weight_decay=0.01,
    logging_dir="./logs",
    logging_steps=10,
    eval_strategy="steps",
    eval_steps=8,
    report_to="none",
    fp16=torch.cuda.is_available(),
)

In [100]:
# Trainer
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=val_dataset,
    compute_metrics=compute_metrics,
)

In [101]:
trainer.train()

Step,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
8,No log,0.665162,0.545872,0.518337,0.995305,0.681672
16,0.731600,0.587168,0.816514,0.731707,0.985915,0.84
24,0.585800,0.521668,0.949541,0.961353,0.934272,0.947619
32,0.509300,0.478971,0.96789,0.990148,0.943662,0.966346
40,0.461500,0.389297,0.993119,0.995283,0.99061,0.992941
48,0.461500,0.317765,0.997706,0.995327,1.0,0.997658
56,0.360600,0.276133,0.997706,0.995327,1.0,0.997658
64,0.293000,0.248831,0.997706,0.995327,1.0,0.997658
72,0.265300,0.222048,0.997706,1.0,0.995305,0.997647
80,0.246900,0.201216,0.997706,1.0,0.995305,0.997647



Correct predictions: 238 / 436

Correct predictions: 356 / 436

Correct predictions: 414 / 436

Correct predictions: 422 / 436

Correct predictions: 433 / 436

Correct predictions: 435 / 436

Correct predictions: 435 / 436

Correct predictions: 435 / 436

Correct predictions: 435 / 436

Correct predictions: 435 / 436

Correct predictions: 435 / 436

Correct predictions: 435 / 436

Correct predictions: 436 / 436

Correct predictions: 436 / 436

Correct predictions: 436 / 436


TrainOutput(global_step=127, training_loss=0.3416334737942913, metrics={'train_runtime': 35.5646, 'train_samples_per_second': 28.568, 'train_steps_per_second': 3.571, 'total_flos': 66830208061440.0, 'train_loss': 0.3416334737942913, 'epoch': 1.0})

In [102]:
# Save model
model.save_pretrained("./bert-binary-classifier")
tokenizer.save_pretrained("./bert-binary-classifier")

('./bert-binary-classifier/tokenizer_config.json',
 './bert-binary-classifier/special_tokens_map.json',
 './bert-binary-classifier/vocab.txt',
 './bert-binary-classifier/added_tokens.json')

In [19]:
from google.colab import files
import shutil

# Zip the folder
shutil.make_archive("bert-binary-classifier", 'zip', "./bert-binary-classifier")

# Download the zip file
files.download("bert-binary-classifier.zip")

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [103]:
from transformers import BertTokenizer, BertForSequenceClassification
import torch

# Load model and tokenizer
model = BertForSequenceClassification.from_pretrained("./bert-binary-classifier")
tokenizer = BertTokenizer.from_pretrained("./bert-binary-classifier")

# Ensure model is in eval mode
model.eval()

BertForSequenceClassification(
  (bert): BertModel(
    (embeddings): BertEmbeddings(
      (word_embeddings): Embedding(30522, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (token_type_embeddings): Embedding(2, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): BertEncoder(
      (layer): ModuleList(
        (0-11): 12 x BertLayer(
          (attention): BertAttention(
            (self): BertSdpaSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): LayerNorm((768,), eps=1e

In [104]:
def predict(text):
    # Tokenize input
    inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True, max_length=128)

    # Forward pass
    with torch.no_grad():
        outputs = model(**inputs)
        logits = outputs.logits
        predicted_class = torch.argmax(logits, dim=1).item()
        confidence = torch.softmax(logits, dim=1)[0][predicted_class].item()

    return predicted_class, confidence

In [133]:
# Example query
text = 'Max Schneider, born in 2000, is a software engineer from Brooklyn, NYC. He works at Google and earns $123K per year.'
text_anonym = 'The person is still in his twenties and works for a major U.S. tech company. He already earns a six-figure salary.'
label, confidence = predict(text)

print(text)
print(f"Predicted Label: {label} (Confidence: {confidence:.2f})")

label, confidence = predict(text_anonym)

print(text_anonym)
print(f"Predicted Label for anonym: {label} (Confidence: {confidence:.2f})")


Max Schneider, born in 2000, is a software engineer from Brooklyn, NYC. He works at Google and earns $123K per year.
Predicted Label: 0 (Confidence: 0.79)
The person is still in his twenties and works for a major U.S. tech company. He already earns a six-figure salary.
Predicted Label for anonym: 1 (Confidence: 0.77)


In [117]:
# Download anonymized_text.csv from GitHub
!curl -L -o anonymized_text_val.csv https://raw.githubusercontent.com/e-strauss/AnonymizationXAI/master/DB-bio/anonymized_text_train.csv

  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
100  278k  100  278k    0     0  1014k      0 --:--:-- --:--:-- --:--:-- 1015k


In [106]:
df = pd.read_csv("/content/anonymized_text_val.csv")

In [125]:
total_correct = 0
for t, anonym in zip(df["text"], df["anonymized"]):
  l, c = predict(t)
  correct = l == int(anonym)
  print(l, c, correct)
  total_correct += int(correct)
total_correct, len(df), total_correct / len(df)

0 0.8862438201904297 True
0 0.8550282120704651 True
0 0.8945935368537903 True
0 0.877803385257721 True
0 0.8886864185333252 True
0 0.8802570104598999 True
0 0.8778079152107239 True
0 0.8587680459022522 True
0 0.886317789554596 True
0 0.8868659734725952 True
0 0.8838528990745544 True
0 0.870921790599823 True
0 0.8900377750396729 True
0 0.8856732249259949 True
0 0.8819363117218018 True
0 0.8822823166847229 True
0 0.8781517744064331 True
0 0.8754957318305969 True
0 0.8867427110671997 True
0 0.8884578943252563 True
0 0.8643509149551392 True
0 0.8314184546470642 True
0 0.891890823841095 True
0 0.8900482654571533 True
0 0.8911619186401367 True
0 0.8852280974388123 True
0 0.8794130682945251 True
0 0.8800781965255737 True
0 0.8703942894935608 True
0 0.8909770846366882 True
0 0.8852941989898682 True
0 0.8883830904960632 True
0 0.8649914860725403 True
0 0.8889606595039368 True
0 0.8367301225662231 True
0 0.8760253190994263 True
0 0.8839730620384216 True
0 0.8867992162704468 True
0 0.875220060348

(196, 196, 1.0)

In [114]:
df.columns

Index(['text', 'anonymized'], dtype='object')