### ENV Setup

In [None]:
!pip install transformers

In [None]:
!pip install optuna

In [None]:
!pip install scikit-learn

In [None]:
!pip install 'accelerate>=0.26.0'

In [None]:
!pip install --upgrade pyarrow

In [None]:
!pip install datasets

In [None]:
!pip install torch

### Vandalism Detection Binary Classifier

In [2]:
import pandas as pd
import torch
from transformers import AutoTokenizer, AutoModelForSequenceClassification, AutoConfig
from torch.nn.functional import softmax
import numpy as np
from tqdm import tqdm

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
# Use whole dataset when possible
# uk_df = pd.read_csv("wikipedia/uk_before.csv")
# ru_df = pd.read_csv("wikipedia/ru_before.csv")

uk_df = pd.read_csv("/Users/vittoriadiachenko/PycharmProjects/knowledge_manipulation/datasets/uk_before.csv", nrows=10_000)
ru_df = pd.read_csv("/Users/vittoriadiachenko/PycharmProjects/knowledge_manipulation/datasets/ru_before.csv", nrows=10_000)

In [4]:
uk_df["lang"] = "uk"
ru_df["lang"] = "ru"

combined_df = pd.concat([uk_df, ru_df], ignore_index=True)

# save to reuse
# combined_df.to_csv("wikipedia/combined_before.csv", index=False)

In [5]:
combined_df['label'] = combined_df['is_reverted']
combined_df["texts_insert"] = combined_df["texts_insert"].apply(lambda x: "\n".join(eval(x)) if isinstance(x, str) else "\n".join(x))
combined_df["texts_removed"] = combined_df["texts_removed"].apply(lambda x: "\n".join(eval(x)) if isinstance(x, str) else "\n".join(x))

print(len(combined_df))

20000


In [6]:
def process_texts_change(x):
    try:
        if isinstance(x, str):
            pairs = eval(x)
        else:
            pairs = x

        if isinstance(pairs, list):
            return "\n".join([f"{old} [SEP] {new}" for old, new in pairs])
        return ""
    except:
        return ""
    
combined_df["texts_change"] = combined_df["texts_change"].apply(process_texts_change)

In [7]:
combined_df = combined_df.drop_duplicates().reset_index(drop=True)

In [8]:
print(combined_df["label"].value_counts())
print(combined_df["label"].value_counts(normalize=True) * 100)

label
0    17789
1     2211
Name: count, dtype: int64
label
0    88.945
1    11.055
Name: proportion, dtype: float64


In [9]:
from sklearn.utils import resample

# split into majority and minority
df_pos = combined_df[combined_df['label'] == 1]
df_neg = combined_df[combined_df['label'] == 0]

# downsample the majority class
if len(df_pos) < len(df_neg):
    df_neg_downsampled = resample(df_neg, replace=False, n_samples=len(df_pos), random_state=42)
    df_balanced = pd.concat([df_pos, df_neg_downsampled])
else:
    df_pos_downsampled = resample(df_pos, replace=False, n_samples=len(df_neg), random_state=42)
    df_balanced = pd.concat([df_pos_downsampled, df_neg])

# shuffle and reset index
df_balanced = df_balanced.sample(frac=1, random_state=42).reset_index(drop=True)

# 4. Verify balance
print("Balanced label distribution:")
print(df_balanced['label'].value_counts())
print(df_balanced['label'].value_counts(normalize=True) * 100)


Balanced label distribution:
label
0    2211
1    2211
Name: count, dtype: int64
label
0    50.0
1    50.0
Name: proportion, dtype: float64


Fine tuning step

In [13]:
from transformers import TrainingArguments, Trainer, EarlyStoppingCallback
from helpers.bert_trainer_helper import BertTrainerHelper

def prepare_and_train(df, text_col, timestamp_col, model_tag):
    print(f"\nTraining model for field: {text_col}")
    helper = BertTrainerHelper()

    train_dataset, eval_dataset = helper.prepare_datasets_timestamp_split(df, text_col, timestamp_col)

    def build_trainer(training_args):
        return Trainer(
            model_init=helper.model_init,
            args=training_args,
            train_dataset=train_dataset,
            eval_dataset=eval_dataset,
            tokenizer=helper.tokenizer,
            compute_metrics=helper.compute_metrics,
            callbacks=[EarlyStoppingCallback(early_stopping_patience=2)]
        )

    default_args = TrainingArguments(
        output_dir=f"./model_{model_tag}",
        eval_strategy="epoch",
        save_strategy="epoch",
        num_train_epochs=4,
        per_device_train_batch_size=32,
        per_device_eval_batch_size=32,
        learning_rate=2e-5,
        weight_decay=0.01,
        warmup_ratio=0.1,
        load_best_model_at_end=True,
        metric_for_best_model="eval_f1",
        logging_dir=f"./logs_{model_tag}",
        save_total_limit=1,
        bf16=True
    )

    trainer = build_trainer(default_args)

    # best_run = trainer.hyperparameter_search(
    #     direction="maximize",
    #     hp_space=helper.hp_space_optuna,
    #     n_trials=10
    # )
    best_run = trainer.hyperparameter_search(
    direction="maximize",
    backend="optuna",
    n_trials=10,
    hp_space=helper.hp_space_optuna,
    compute_objective=lambda metrics: metrics.get("eval_f1", 0)
    )


    best_args = TrainingArguments(
        output_dir=f"./model_{model_tag}_best",
        eval_strategy="epoch",
        save_strategy="epoch",
        num_train_epochs=best_run.hyperparameters["num_train_epochs"],
        per_device_train_batch_size=best_run.hyperparameters["per_device_train_batch_size"],
        per_device_eval_batch_size=best_run.hyperparameters["per_device_train_batch_size"],
        learning_rate=best_run.hyperparameters["learning_rate"],
        weight_decay=best_run.hyperparameters["weight_decay"],
        warmup_ratio=0.1,
        load_best_model_at_end=True,
        metric_for_best_model="eval_f1",
        logging_dir=f"./logs_{model_tag}_best",
        save_total_limit=1,
        bf16=True
    )

    final_trainer = build_trainer(best_args)
    final_trainer.train()

    print(f"Best hyperparameters for {model_tag}:", best_run.hyperparameters)

    print("Final evaluation:")
    metrics = final_trainer.evaluate()
    print(metrics)

    final_trainer.save_model(f"./model_{model_tag}_best")

In [15]:
# find hyperparams, train model, evaluate model and save final version

prepare_and_train(combined_df, 'texts_insert', 'event_timestamp', 'vandalism_insert')
prepare_and_train(combined_df, 'texts_removed', 'event_timestamp', 'vandalism_remove')
prepare_and_train(combined_df, 'texts_change', 'event_timestamp', 'vandalism_change')
prepare_and_train(combined_df, 'page_title', 'event_timestamp', 'vandalism_title')


Training model for field: texts_insert


Map: 100%|██████████| 1542/1542 [00:00<00:00, 4978.47 examples/s]
Map: 100%|██████████| 330/330 [00:00<00:00, 4845.12 examples/s]
  return Trainer(
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
[I 2025-04-25 20:40:53,242] A new study created in memory with name: no-name-f9ad6cf2-1e43-4924-84f8-3175ddccff66
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss


[W 2025-04-25 20:42:58,437] Trial 0 failed with parameters: {'learning_rate': 2.1008384045070094e-05, 'num_train_epochs': 6, 'per_device_train_batch_size': 16, 'weight_decay': 0.11298196827241126} because of the following error: KeyboardInterrupt().
Traceback (most recent call last):
  File "/Users/vittoriadiachenko/PycharmProjects/knowledge_manipulation/.venv/lib/python3.10/site-packages/optuna/study/_optimize.py", line 197, in _run_trial
    value_or_values = func(trial)
  File "/Users/vittoriadiachenko/PycharmProjects/knowledge_manipulation/.venv/lib/python3.10/site-packages/transformers/integrations/integration_utils.py", line 254, in _objective
    trainer.train(resume_from_checkpoint=checkpoint, trial=trial)
  File "/Users/vittoriadiachenko/PycharmProjects/knowledge_manipulation/.venv/lib/python3.10/site-packages/transformers/trainer.py", line 2245, in train
    return inner_training_loop(
  File "/Users/vittoriadiachenko/PycharmProjects/knowledge_manipulation/.venv/lib/python3.1

KeyboardInterrupt: 

In [None]:
import pandas as pd
import numpy as np
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import torch
from torch.nn.functional import softmax
from tqdm import tqdm

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

def chunk_text(text, tokenizer, max_length=512, stride=256):
    tokens = tokenizer.encode(text, add_special_tokens=True)
    chunks = []
    for i in range(0, len(tokens), stride):
        chunk = tokens[i:i + max_length]
        if len(chunk) < 10:
            break
        chunks.append(tokenizer.decode(chunk, skip_special_tokens=True))
    return chunks

def add_model_features(df, model_path, text_column, prefix):
    print(f"\nApplying model: {prefix} | column: {text_column}")

    tokenizer = AutoTokenizer.from_pretrained(model_path)
    model = AutoModelForSequenceClassification.from_pretrained(model_path)
    model.to(device)
    model.eval()

    valid_mask = df[text_column].fillna("").str.strip().str.len() > 10
    texts = df.loc[valid_mask, text_column].astype(str).tolist()

    all_mean_logits = []
    all_max_logits = []
    all_min_logits = []
    all_mean_probs = []
    all_max_probs = []
    all_min_probs = []

    for text in tqdm(texts):
        chunks = chunk_text(text, tokenizer)
        chunk_logits = []
        with torch.no_grad():
            for i in range(0, len(chunks), 32):
                batch = chunks[i:i + 32]
                encoded = tokenizer(batch, padding=True, truncation=True, max_length=512, return_tensors="pt").to(device)
                outputs = model(**encoded)
                logits = outputs.logits.cpu()
                chunk_logits.append(logits)

        logits = torch.cat(chunk_logits, dim=0)
        probs = softmax(logits, dim=1)

        all_mean_logits.append(logits.mean(dim=0).numpy())
        all_max_logits.append(logits.max(dim=0).values.numpy())
        all_min_logits.append(logits.min(dim=0).values.numpy())

        all_mean_probs.append(probs.mean(dim=0).numpy())
        all_max_probs.append(probs.max(dim=0).values.numpy())
        all_min_probs.append(probs.min(dim=0).values.numpy())

    # Assign to DataFrame
    df.loc[valid_mask, f"{prefix}_mean_logit_0"] = [x[0] for x in all_mean_logits]
    df.loc[valid_mask, f"{prefix}_mean_logit_1"] = [x[1] for x in all_mean_logits]
    df.loc[valid_mask, f"{prefix}_max_logit_0"] = [x[0] for x in all_max_logits]
    df.loc[valid_mask, f"{prefix}_max_logit_1"] = [x[1] for x in all_max_logits]
    df.loc[valid_mask, f"{prefix}_min_logit_0"] = [x[0] for x in all_min_logits]
    df.loc[valid_mask, f"{prefix}_min_logit_1"] = [x[1] for x in all_min_logits]

    df.loc[valid_mask, f"{prefix}_mean_prob_0"] = [x[0] for x in all_mean_probs]
    df.loc[valid_mask, f"{prefix}_mean_prob_1"] = [x[1] for x in all_mean_probs]
    df.loc[valid_mask, f"{prefix}_max_prob_0"] = [x[0] for x in all_max_probs]
    df.loc[valid_mask, f"{prefix}_max_prob_1"] = [x[1] for x in all_max_probs]
    df.loc[valid_mask, f"{prefix}_min_prob_0"] = [x[0] for x in all_min_probs]
    df.loc[valid_mask, f"{prefix}_min_prob_1"] = [x[1] for x in all_min_probs]

    return df

In [None]:
import pandas as pd
from helpers.text_classifier_helper import TextClassifierHelper

df = pd.read_csv("uk_holdout_with_all_manip_features.csv")

insert_helper = TextClassifierHelper("./model_insert/checkpoint-15951")
remove_helper = TextClassifierHelper("./model_remove/checkpoint-6632")
change_helper = TextClassifierHelper("./model_change/checkpoint-16330")
title_helper = TextClassifierHelper("./model_title/checkpoint-37524")

df = insert_helper.classify_column(df, text_column="texts_insert", prefix="manip_insert")
df = remove_helper.classify_column(df, text_column="texts_removed", prefix="manip_remove")
df = change_helper.classify_column(df, text_column="texts_change", prefix="manip_change")
df = title_helper.classify_column(df, text_column="page_title", prefix="manip_title")

df.to_csv("uk_holdout_with_all_manip_features_and_logits.csv", index=False)