In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
import ast
from difflib import SequenceMatcher

import numpy as np
import pandas as pd
from datasets import Dataset

import torch
from transformers import (
    TrainingArguments,
    Trainer,
)

In [None]:
# Path to WNC
WNC_FILE_PATH = "/content/drive/MyDrive/Colab Notebooks/biased.full"

# Path to local RoBERTa checkpoint
ROBERTA_PATH = "/content/drive/MyDrive/Colab Notebooks/roberta-model-complete"

# Max length for token classification
MAX_LENGTH = 256

In [None]:
# Load WNC
df_wnc = pd.read_csv(
    WNC_FILE_PATH,
    sep="\t",
    names=["id", "src_tok", "tgt_tok", "src_raw", "tgt_raw", "src_POS_tags", "tgt_parse_tags"],
    on_bad_lines="warn",
)

print(df_wnc.head())
print(len(df_wnc), "rows loaded from WNC")



  df_wnc = pd.read_csv(


          id                                            src_tok  \
0  258378316  during the campaign , controversy erupted over...   
1  486527143  nic ##aea was con ##vo ##ked by the emperor co...   
2   54024499  it was rather unfortunate that he ve ##hem ##e...   
3  160186886  dennis the menace is an american animated seri...   
4    8797183  today , on large farms , motorcycles , dogs or...   

                                             tgt_tok  \
0  during the campaign , some pointed out alleged...   
1  nic ##aea was con ##vo ##ked by the emperor co...   
2  he ve ##hem ##ently opposed the bud ##ding ind...   
3  dennis the menace is an american animated seri...   
4  today , on large farms , motorcycles , dogs or...   

                                             src_raw  \
0  during the campaign, controversy erupted over ...   
1  nicaea was convoked by the emperor constantine...   
2  it was rather unfortunate that he vehemently o...   
3  dennis the menace is an american 

In [None]:
def parse_token_field(val):
    """
    Robust parser for WNC token columns.
    - If already a list, return as is.
    - If string like "['foo', 'bar']", parse with ast.literal_eval.
    - Otherwise split on whitespace.
    """
    if isinstance(val, list):
        return val
    s = str(val).strip()
    if s.startswith("[") and s.endswith("]"):
        try:
            parsed = ast.literal_eval(s)
            if isinstance(parsed, list):
                return [str(x) for x in parsed]
        except Exception:
            pass
    # fallback: whitespace split
    return s.split()


In [None]:
df_wnc["src_tokens"] = df_wnc["src_tok"].apply(parse_token_field)
df_wnc["tgt_tokens"] = df_wnc["tgt_tok"].apply(parse_token_field)

print(df_wnc[["src_tokens", "tgt_tokens"]].head())

                                          src_tokens  \
0  [during, the, campaign, ,, controversy, erupte...   
1  [nic, ##aea, was, con, ##vo, ##ked, by, the, e...   
2  [it, was, rather, unfortunate, that, he, ve, #...   
3  [dennis, the, menace, is, an, american, animat...   
4  [today, ,, on, large, farms, ,, motorcycles, ,...   

                                          tgt_tokens  
0  [during, the, campaign, ,, some, pointed, out,...  
1  [nic, ##aea, was, con, ##vo, ##ked, by, the, e...  
2  [he, ve, ##hem, ##ently, opposed, the, bud, ##...  
3  [dennis, the, menace, is, an, american, animat...  
4  [today, ,, on, large, farms, ,, motorcycles, ,...  


In [None]:
def get_bio_labels_for_pair(src_tokens, tgt_tokens):
    """
    Compute BIO labels for src_tokens based on diff vs tgt_tokens.
    - equal:    O
    - replace:  B-BIAS for first src token, I-BIAS for remaining in [i1:i2]
    - delete:   B-BIAS / I-BIAS for src tokens [i1:i2]
    - insert:   (tgt-only; ignored for src side)
    """
    src_tokens = list(src_tokens)
    tgt_tokens = list(tgt_tokens)

    labels = ["O"] * len(src_tokens)
    sm = SequenceMatcher(a=src_tokens, b=tgt_tokens)

    for tag, i1, i2, j1, j2 in sm.get_opcodes():
        if tag in ("replace", "delete"):
            if i1 < i2:
                labels[i1] = "B-BIAS"
                for k in range(i1 + 1, i2):
                    labels[k] = "I-BIAS"
        # 'equal' → do nothing (already O)
        # 'insert' → tgt-only tokens, ignore for src labels

    return labels

# Apply to all rows
df_wnc["word_tags"] = df_wnc.apply(
    lambda row: get_bio_labels_for_pair(row["src_tokens"], row["tgt_tokens"]),
    axis=1,
)

# Sanity check lengths
mismatch = (df_wnc["src_tokens"].str.len() != df_wnc["word_tags"].str.len()).sum()
print("Num rows with length mismatch:", mismatch)

df_wnc[["src_tokens", "word_tags"]].head()


Num rows with length mismatch: 0


Unnamed: 0,src_tokens,word_tags
0,"[during, the, campaign, ,, controversy, erupte...","[O, O, O, O, B-BIAS, I-BIAS, I-BIAS, O, O, O, ..."
1,"[nic, ##aea, was, con, ##vo, ##ked, by, the, e...","[O, O, O, O, O, O, O, O, O, O, O, O, O, O, O, ..."
2,"[it, was, rather, unfortunate, that, he, ve, #...","[B-BIAS, I-BIAS, I-BIAS, I-BIAS, I-BIAS, O, O,..."
3,"[dennis, the, menace, is, an, american, animat...","[O, O, O, O, O, O, O, O, O, O, O, O, O, O, O, ..."
4,"[today, ,, on, large, farms, ,, motorcycles, ,...","[O, O, O, O, O, O, O, O, O, O, B-BIAS, O, O, O..."


In [None]:
label2id = {"O": 0, "B-BIAS": 1, "I-BIAS": 2}
id2label = {v: k for k, v in label2id.items()}

df_wnc["word_label_ids"] = df_wnc["word_tags"].apply(
    lambda tags: [label2id[t] for t in tags]
)

df_wnc[["src_tokens", "word_tags", "word_label_ids"]].head()


Unnamed: 0,src_tokens,word_tags,word_label_ids
0,"[during, the, campaign, ,, controversy, erupte...","[O, O, O, O, B-BIAS, I-BIAS, I-BIAS, O, O, O, ...","[0, 0, 0, 0, 1, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, ..."
1,"[nic, ##aea, was, con, ##vo, ##ked, by, the, e...","[O, O, O, O, O, O, O, O, O, O, O, O, O, O, O, ...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ..."
2,"[it, was, rather, unfortunate, that, he, ve, #...","[B-BIAS, I-BIAS, I-BIAS, I-BIAS, I-BIAS, O, O,...","[1, 2, 2, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ..."
3,"[dennis, the, menace, is, an, american, animat...","[O, O, O, O, O, O, O, O, O, O, O, O, O, O, O, ...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ..."
4,"[today, ,, on, large, farms, ,, motorcycles, ,...","[O, O, O, O, O, O, O, O, O, O, B-BIAS, O, O, O...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, ..."


In [None]:
# Filter out any broken rows if needed
mask = df_wnc["src_tokens"].str.len() == df_wnc["word_label_ids"].str.len()
span_df = df_wnc.loc[mask, ["src_tokens", "word_label_ids"]].rename(
    columns={"src_tokens": "words", "word_label_ids": "word_labels"}
)

print("Using", len(span_df), "rows after length sanity check")

def has_bias(labels):
    return any(l in (label2id["B-BIAS"], label2id["I-BIAS"]) for l in labels)

span_df = span_df[span_df["word_labels"].apply(has_bias)]
print("After filtering, rows:", len(span_df))

raw_dataset = Dataset.from_pandas(span_df, preserve_index=False)

# Train/val split (e.g., 90/10)
split_dataset = raw_dataset.train_test_split(test_size=0.1, seed=42)
raw_train_ds = split_dataset["train"]
raw_val_ds = split_dataset["test"]

raw_train_ds[0]


Using 181473 rows after length sanity check
After filtering, rows: 178870


{'words': ['it',
  'is',
  'also',
  'home',
  'to',
  'the',
  'play',
  'fields',
  'of',
  'glasgow',
  "'",
  's',
  'elite',
  'school',
  'st',
  'al',
  '##oys',
  '##ius',
  'college',
  '.'],
 'word_labels': [0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 2, 0, 0, 0, 0, 0, 0]}

In [None]:
# Paths
from pathlib import Path

# If your fine-tuned model dir is actually named something else, fix this:
ROBERTA_PATH = Path("./roberta-model-complete").resolve()
print("Model path:", ROBERTA_PATH)
print("Exists?", ROBERTA_PATH.exists())

# %% [markdown]
# 1) Load tokenizer from base checkpoint (or whatever base you used originally)

from transformers import AutoTokenizer, AutoModelForTokenClassification

tokenizer = AutoTokenizer.from_pretrained("roberta-base", add_prefix_space=True)  # or your original base
print("Tokenizer loaded from roberta-base")

# %% [markdown]
# 2) Load model weights from your local fine-tuned checkpoint (if it exists),
#    otherwise fall back to roberta-base.

if ROBERTA_PATH.exists():
    print("Loading model from local path:", ROBERTA_PATH)
    model = AutoModelForTokenClassification.from_pretrained(
        str(ROBERTA_PATH),
        num_labels=len(label2id),
        id2label=id2label,
        label2id=label2id,
        local_files_only=True,
    )
else:
    print("WARNING: Local model path not found, falling back to roberta-base")
    model = AutoModelForTokenClassification.from_pretrained(
        "roberta-base",
        num_labels=len(label2id),
        id2label=id2label,
        label2id=label2id,
    )


Model path: /content/roberta-model-complete
Exists? False


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json:   0%|          | 0.00/25.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/481 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/899k [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

Tokenizer loaded from roberta-base


model.safetensors:   0%|          | 0.00/499M [00:00<?, ?B/s]

Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [None]:
def tokenize_and_align_labels(example):
    """
    example["words"]: list[str] (word tokens)
    example["word_labels"]: list[int] (same length as words)
    """
    tokenized = tokenizer(
        example["words"],
        is_split_into_words=True,
        truncation=True,
        padding="max_length",
        max_length=MAX_LENGTH,
    )

    word_ids = tokenized.word_ids()  # list of word index or None
    labels = []
    word_labels = example["word_labels"]

    for word_idx in word_ids:
        if word_idx is None:
            labels.append(-100)  # ignore special tokens
        else:
            labels.append(word_labels[word_idx])

    tokenized["labels"] = labels
    return tokenized

# Map over datasets
train_ds = raw_train_ds.map(tokenize_and_align_labels)
val_ds   = raw_val_ds.map(tokenize_and_align_labels)

train_ds.set_format("torch", columns=["input_ids", "attention_mask", "labels"])
val_ds.set_format("torch", columns=["input_ids", "attention_mask", "labels"])

print(train_ds[0])


Map:   0%|          | 0/160983 [00:00<?, ? examples/s]

Map:   0%|          | 0/17887 [00:00<?, ? examples/s]

{'input_ids': tensor([    0,    24,    16,    67,   184,     7,     5,   310,  5447,     9,
         5921,   281, 32770,   128,   579,  6281,   334,  1690,  1076, 47385,
        15093, 47385,  6125,  1564,   479,     2,     1,     1,     1,     1,
            1,     1,     1,     1,     1,     1,     1,     1,     1,     1,
            1,     1,     1,     1,     1,     1,     1,     1,     1,     1,
            1,     1,     1,     1,     1,     1,     1,     1,     1,     1,
            1,     1,     1,     1,     1,     1,     1,     1,     1,     1,
            1,     1,     1,     1,     1,     1,     1,     1,     1,     1,
            1,     1,     1,     1,     1,     1,     1,     1,     1,     1,
            1,     1,     1,     1,     1,     1,     1,     1,     1,     1,
            1,     1,     1,     1,     1,     1,     1,     1,     1,     1,
            1,     1,     1,     1,     1,     1,     1,     1,     1,     1,
            1,     1,     1,     1,     1,     1, 

In [None]:
model = AutoModelForTokenClassification.from_pretrained(
    "roberta-base",
    num_labels=len(label2id),
    id2label=id2label,
    label2id=label2id,
)

Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [None]:
training_args = TrainingArguments(
    output_dir="./bias_span_classifier",
    eval_strategy="epoch",
    save_strategy="epoch",
    learning_rate=2e-5,
    num_train_epochs=16,
    per_device_train_batch_size=16,
    per_device_eval_batch_size=16,
    weight_decay=0.01,
    warmup_ratio=0.1,
    logging_steps=50,
    load_best_model_at_end=True,
)

def compute_metrics(eval_pred):
    logits, labels = eval_pred
    preds = logits.argmax(-1)

    # Flatten & filter out -100
    preds_flat = []
    labels_flat = []
    for p_row, l_row in zip(preds, labels):
        for p, l in zip(p_row, l_row):
            if l == -100:
                continue
            preds_flat.append(int(p))
            labels_flat.append(int(l))

    preds_flat = np.array(preds_flat)
    labels_flat = np.array(labels_flat)

    # Simple token-level precision/recall/F1 for "biased vs non-biased"
    from sklearn.metrics import precision_recall_fscore_support

    # Merge B-BIAS + I-BIAS into one "biased" class for binary metrics
    biased_pred  = np.isin(preds_flat, [label2id["B-BIAS"], label2id["I-BIAS"]])
    biased_label = np.isin(labels_flat, [label2id["B-BIAS"], label2id["I-BIAS"]])

    precision, recall, f1, _ = precision_recall_fscore_support(
        biased_label, biased_pred, average="binary"
    )

    return {
        "precision_bias": precision,
        "recall_bias": recall,
        "f1_bias": f1,
    }

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_ds,
    eval_dataset=val_ds,
    tokenizer=tokenizer,
    compute_metrics=compute_metrics,
)

trainer.train()


  trainer = Trainer(
  | |_| | '_ \/ _` / _` |  _/ -_)
[34m[1mwandb[0m: Logging into wandb.ai. (Learn how to deploy a W&B server locally: https://wandb.me/wandb-server)
[34m[1mwandb[0m: You can find your API key in your browser here: https://wandb.ai/authorize?ref=models
[34m[1mwandb[0m: Paste an API key from your profile and hit enter:

 ··········


[34m[1mwandb[0m: No netrc file found, creating one.
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc
[34m[1mwandb[0m: Currently logged in as: [33mkevcao[0m ([33mkevcao-georgia-institute-of-technology[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


Epoch,Training Loss,Validation Loss,Precision Bias,Recall Bias,F1 Bias
1,0.3117,0.293498,0.714195,0.383014,0.498623
2,0.2923,0.283978,0.793158,0.339331,0.475312
3,0.269,0.287729,0.744131,0.407034,0.526226
4,0.2318,0.293045,0.691301,0.46106,0.553179
5,0.224,0.31528,0.666165,0.479701,0.557762
6,0.1774,0.341745,0.662754,0.474427,0.552996
7,0.154,0.353241,0.688285,0.455269,0.548037
8,0.1437,0.382407,0.663433,0.482179,0.558467
9,0.1329,0.399542,0.647011,0.496857,0.562079
10,0.1189,0.427509,0.661962,0.479258,0.555985


TrainOutput(global_step=160992, training_loss=0.16464710122156134, metrics={'train_runtime': 26960.334, 'train_samples_per_second': 95.538, 'train_steps_per_second': 5.971, 'total_flos': 3.3651772872575386e+17, 'train_loss': 0.16464710122156134, 'epoch': 16.0})

In [None]:
from safetensors.torch import save_file
import os

ROBERTA_PATH = "/content/drive/MyDrive/Colab Notebooks/roberta-model-complete"

# ensure folder exists
os.makedirs(ROBERTA_PATH, exist_ok=True)

# save tokenizer too (recommended)
tokenizer.save_pretrained(ROBERTA_PATH)

# save model config + architecture structure
model.config.save_pretrained(ROBERTA_PATH)

# save weights as safetensors
save_file(model.state_dict(), f"{ROBERTA_PATH}/model.safetensors")

print("Saved fine-tuned model to:", ROBERTA_PATH)


Saved fine-tuned model to: /content/drive/MyDrive/Colab Notebooks/roberta-model-complete


In [3]:
from transformers import AutoTokenizer, AutoModelForTokenClassification
from safetensors.torch import load_file
import torch

ROBERTA_PATH = "/content/drive/MyDrive/Colab Notebooks/nlp project/roberta-model-complete"

# Load tokenizer
tokenizer = AutoTokenizer.from_pretrained(
    ROBERTA_PATH,
    add_prefix_space=True  # IMPORTANT for word alignment
)

# Load model + safetensor weights
model = AutoModelForTokenClassification.from_pretrained(
    ROBERTA_PATH,
    local_files_only=True
)
state_dict = load_file(f"{ROBERTA_PATH}/model.safetensors")
model.load_state_dict(state_dict)
model.eval()


RobertaForTokenClassification(
  (roberta): RobertaModel(
    (embeddings): RobertaEmbeddings(
      (word_embeddings): Embedding(50265, 768, padding_idx=1)
      (position_embeddings): Embedding(514, 768, padding_idx=1)
      (token_type_embeddings): Embedding(1, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): RobertaEncoder(
      (layer): ModuleList(
        (0-11): 12 x RobertaLayer(
          (attention): RobertaAttention(
            (self): RobertaSdpaSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): RobertaSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
            

In [4]:
def predict_token_labels(text):
    encoded = tokenizer(
        text,
        return_tensors="pt",
        truncation=True,
        max_length=256,
    )

    with torch.no_grad():
        outputs = model(**encoded)

    logits = outputs.logits  # (1, seq_len, num_labels)
    pred_ids = logits.argmax(-1)[0].tolist()  # -> list of integers

    tokens = tokenizer.convert_ids_to_tokens(encoded["input_ids"][0])

    # Convert numeric labels to strings:
    id2label = model.config.id2label
    labels = [id2label.get(i, "IGN") for i in pred_ids]

    # Clean up special tokens
    token_label_pairs = []
    for tok, lab in zip(tokens, labels):
        if tok in tokenizer.all_special_tokens:
            continue
        token_label_pairs.append((tok.replace("Ġ", " "), lab))

    return token_label_pairs


In [5]:
example = "The corrupt regime brutally silences any opposition."
result = predict_token_labels(example)

for tok, lab in result:
    print(f"{tok:12}  {lab}")


 The          O
 corrupt      B-BIAS
 regime       I-BIAS
 brutally     B-BIAS
 sil          O
ences         O
 any          O
 opposition   O
.             O
