In [None]:
from datasets import Dataset
from transformers import (
    AutoTokenizer, AutoModelForSequenceClassification,
    DataCollatorWithPadding, Trainer, TrainingArguments,
    EarlyStoppingCallback, set_seed
)
import pandas as pd, numpy as np, torch
from sklearn.model_selection import train_test_split
from sklearn.metrics import f1_score, accuracy_score, roc_auc_score, classification_report

SEED = 42
set_seed(SEED)
MODEL_NAME = "dbmdz/bert-base-turkish-cased"
CSV_PATH = "Trial_Data.csv"


In [None]:
df = pd.read_csv(CSV_PATH)
df_tr = df[df["lang"].astype(str).str.lower().eq("tur")].copy()
df_tr["text"] = df_tr["text"].astype(str).str.strip()
df_tr = df_tr[df_tr["text"].str.len() > 0].copy()
df_tr["label"] = df_tr["polarization"].astype(int)

train_df, val_df = train_test_split(df_tr[["text","label"]], test_size=0.2, random_state=SEED, stratify=df_tr["label"])


In [None]:
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
def tokenize(batch):
    return tokenizer(batch["text"], truncation=True, max_length=128)
train_ds = Dataset.from_pandas(train_df.reset_index(drop=True)).map(tokenize, batched=True)
val_ds   = Dataset.from_pandas(val_df.reset_index(drop=True)).map(tokenize, batched=True)
train_ds.set_format(type="torch", columns=["input_ids","attention_mask","label"])
val_ds.set_format(type="torch", columns=["input_ids","attention_mask","label"])




Map:   0%|          | 0/15 [00:00<?, ? examples/s]

Map:   0%|          | 0/4 [00:00<?, ? examples/s]

In [None]:
model = AutoModelForSequenceClassification.from_pretrained(MODEL_NAME, num_labels=2)

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at dbmdz/bert-base-turkish-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [None]:
def compute_metrics(eval_pred):
    logits, labels = eval_pred
    preds = np.argmax(logits, axis=-1)
    acc = accuracy_score(labels, preds)
    f1_macro = f1_score(labels, preds, average="macro", zero_division=0)
    f1_micro = f1_score(labels, preds, average="micro", zero_division=0)
    try:
        probs = torch.softmax(torch.tensor(logits), dim=-1).numpy()[:,1]
        auc = roc_auc_score(labels, probs)
    except Exception:
        auc = float("nan")
    return {"accuracy": acc, "f1_macro": f1_macro, "f1_micro": f1_micro, "roc_auc": auc}


In [None]:
bsz = 4
args = TrainingArguments(
    output_dir="./out_berturk_subtask1",
    learning_rate=2e-5,
    per_device_train_batch_size=bsz,
    per_device_eval_batch_size=2*bsz,
    num_train_epochs=10,
    weight_decay=0.01,
    evaluation_strategy="epoch",
    save_strategy="epoch",
    logging_strategy="epoch",
    logging_steps=10,
    load_best_model_at_end=True,
    metric_for_best_model="f1_macro",
    greater_is_better=True,
    warmup_ratio=0.1,
    lr_scheduler_type="linear",
    dataloader_pin_memory=False,
    seed=SEED,
    fp16=torch.cuda.is_available(),
    report_to="none",
)



In [None]:
trainer = Trainer(
    model=model,
    args=args,
    train_dataset=train_ds,
    eval_dataset=val_ds,
    tokenizer=tokenizer,
    data_collator=DataCollatorWithPadding(tokenizer),
    compute_metrics=compute_metrics,
    callbacks=[EarlyStoppingCallback(early_stopping_patience=3)],
)

In [None]:
train_result = trainer.train()
results = trainer.evaluate()
print("\nValidation metrics:", results)


Epoch,Training Loss,Validation Loss,Accuracy,F1 Macro,F1 Micro,Roc Auc
1,0.7079,0.69085,0.5,0.333333,0.5,1.0
2,0.6422,0.679456,0.5,0.333333,0.5,1.0
3,0.639,0.668519,0.5,0.333333,0.5,1.0
4,0.5211,0.657696,0.5,0.333333,0.5,1.0



Validation metrics: {'eval_loss': 0.6908500790596008, 'eval_accuracy': 0.5, 'eval_f1_macro': 0.3333333333333333, 'eval_f1_micro': 0.5, 'eval_roc_auc': 1.0, 'eval_runtime': 0.6109, 'eval_samples_per_second': 6.547, 'eval_steps_per_second': 1.637, 'epoch': 4.0}


In [None]:
preds = trainer.predict(val_ds)
logits, labels = preds.predictions, preds.label_ids
probs = torch.softmax(torch.tensor(logits), dim=-1).numpy()[:, 1]

best_t, best_f1 = 0.5, -1
for t in np.linspace(0.1, 0.9, 17):
    yhat = (probs >= t).astype(int)
    f1 = f1_score(labels, yhat, average="macro", zero_division=0)
    if f1 > best_f1:
        best_f1, best_t = f1, t
print(f"[Threshold tuning] best_t={best_t:.2f}, F1_macro={best_f1:.3f}")

# Show classification report for best threshold
final_preds = (probs >= best_t).astype(int)
print("\nBest-threshold Validation Classification Report:")
print(classification_report(labels, final_preds, digits=4))


[Threshold tuning] best_t=0.10, F1_macro=0.333

Best-threshold Validation Classification Report:
              precision    recall  f1-score   support

           0     0.0000    0.0000    0.0000         2
           1     0.5000    1.0000    0.6667         2

    accuracy                         0.5000         4
   macro avg     0.2500    0.5000    0.3333         4
weighted avg     0.2500    0.5000    0.3333         4



  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [None]:
@torch.no_grad()
def predict_berturk(texts, max_length=128, threshold=best_t):
    if isinstance(texts, str):
        texts = [texts]
    enc = tokenizer(texts, return_tensors="pt", truncation=True, padding=True, max_length=max_length)
    enc = {k: v.to(model.device) for k, v in enc.items()}
    model.eval()
    logits = model(**enc).logits
    probs = torch.softmax(logits, dim=-1)[:, 1].cpu().numpy()
    preds = (probs >= threshold).astype(int)
    return list(zip(texts, preds.tolist(), probs.tolist()))

# Demo
print("\nDemo:", predict_berturk([
    "Bu karar tamamen siyasi bir oyundur.",
    "Bugün hava çok güzel, yürüyüşe çıkacağım."
]))


Demo: [('Bu karar tamamen siyasi bir oyundur.', 1, 0.5788916945457458), ('Bugün hava çok güzel, yürüyüşe çıkacağım.', 1, 0.5515802502632141)]
