In [5]:
import torch
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

Using device: cuda


In [7]:
# final model

from transformers import AutoTokenizer, AutoModelForSequenceClassification, TrainingArguments, Trainer, EarlyStoppingCallback
from datasets import Dataset
import torch
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_recall_fscore_support, confusion_matrix, classification_report
from torch.nn import CrossEntropyLoss

# load data
df = pd.read_csv("reddit_sample_for_sentiment_with_labels_v7.csv")
df = df.dropna(subset=["Text_Preprocessed", "Stance"])

label_map = { -1: 0, 0: 1, 1: 2 }
df["sentiment_label"] = df["Stance"].map(label_map)

# mark quotes
def mark_quoted_lines(text):
    processed_lines = []
    lines = text.split("\n")
    for line in lines:
        if line.strip().startswith(">"):
            quote_content = line.strip()[1:].strip()
            processed_lines.append(f"[QUOTE] {quote_content} [/QUOTE]")
        else:
            processed_lines.append(line)
    return "\n".join(processed_lines)

df["Text_quoted_marked"] = df["Text_Preprocessed"].apply(mark_quoted_lines)

# splitting data
train_val_df, test_df = train_test_split(df, test_size=0.2, random_state=42, stratify=df["sentiment_label"])
train_df, val_df = train_test_split(train_val_df, test_size=0.1, random_state=42, stratify=train_val_df["sentiment_label"])
train_df = train_df.reset_index(drop=True)
val_df = val_df.reset_index(drop=True)
test_df = test_df.reset_index(drop=True)

# tokenizer
tokenizer = AutoTokenizer.from_pretrained("microsoft/deberta-v3-base")

# dataset class
class SimpleSentimentDataset(torch.utils.data.Dataset):
    def __init__(self, df, tokenizer, max_length=256):
        self.encodings = tokenizer(df["Text_quoted_marked"].tolist(), truncation=True, padding="max_length", max_length=max_length)
        self.labels = torch.tensor(df["sentiment_label"].tolist(), dtype=torch.long)

    def __getitem__(self, idx):
        item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
        item["labels"] = self.labels[idx]
        return item

    def __len__(self):
        return len(self.labels)

# create datasets
train_dataset = SimpleSentimentDataset(train_df, tokenizer)
val_dataset = SimpleSentimentDataset(val_df, tokenizer)
test_dataset = SimpleSentimentDataset(test_df, tokenizer)

# class weights
class_weights_tensor = torch.tensor([2.0, 1.0, 2.0], dtype=torch.float)

# load model
model = AutoModelForSequenceClassification.from_pretrained("microsoft/deberta-v3-base", num_labels=3, ignore_mismatched_sizes=True)
model.classifier.weight.data = model.classifier.weight.data.clone()

# custom trainer
class WeightedLossTrainer(Trainer):
    def compute_loss(self, model, inputs, return_outputs=False, **kwargs):
        labels = inputs.pop("labels")
        outputs = model(**inputs)
        logits = outputs.logits
        # ✅ Move weights to the model's current device
        loss_fct = CrossEntropyLoss(weight=class_weights_tensor.to(model.device))
        loss = loss_fct(logits, labels)
        return (loss, outputs) if return_outputs else loss

# metrics
def compute_metrics(eval_pred):
    logits, labels = eval_pred
    preds = torch.tensor(logits).argmax(dim=-1).numpy()
    labels = torch.tensor(labels).numpy()
    precision, recall, f1, _ = precision_recall_fscore_support(labels, preds, average=None, zero_division=0)
    return {
        "accuracy": accuracy_score(labels, preds),
        "f1_macro": f1.mean(),
        "precision_0": precision[0], "recall_0": recall[0], "f1_0": f1[0],
        "precision_1": precision[1], "recall_1": recall[1], "f1_1": f1[1],
        "precision_2": precision[2], "recall_2": recall[2], "f1_2": f1[2],
    }

# training arguments
training_args = TrainingArguments(
    report_to="wandb",
    output_dir="./results_sentiment_only",
    evaluation_strategy="epoch",
    save_strategy="epoch",
    per_device_train_batch_size=8,
    per_device_eval_batch_size=8,
    num_train_epochs=5,
    learning_rate=2e-5,
    warmup_steps=500,
    weight_decay=0.01,
    logging_steps=10,
    load_best_model_at_end=True,
    metric_for_best_model="f1_macro",
    greater_is_better=True,
    seed=42,
    fp16=True,
    gradient_accumulation_steps=1,
    max_grad_norm=1.0
)

# trainer
trainer = WeightedLossTrainer(
    callbacks=[EarlyStoppingCallback(early_stopping_patience=2)],
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=val_dataset,
    compute_metrics=compute_metrics,
    tokenizer=tokenizer
)

trainer.train()

# fine tune
finetune_args = TrainingArguments(
    output_dir="./results_sentiment_finetune",
    evaluation_strategy="epoch",
    save_strategy="epoch",
    per_device_train_batch_size=8,
    per_device_eval_batch_size=8,
    num_train_epochs=2,
    learning_rate=5e-6,
    weight_decay=0.01,
    logging_steps=10,
    load_best_model_at_end=True,
    metric_for_best_model="f1_macro",
    greater_is_better=True,
    seed=42,
    fp16=True,
    gradient_accumulation_steps=1,
    max_grad_norm=1.0
)

finetune_trainer = WeightedLossTrainer(
    model=model,
    args=finetune_args,
    train_dataset=train_dataset,
    eval_dataset=val_dataset,
    compute_metrics=compute_metrics,
    tokenizer=tokenizer
)

finetune_trainer.train()

# evaluate
logits, labels = finetune_trainer.predict(test_dataset)[:2]
probs = torch.nn.functional.softmax(torch.tensor(logits), dim=-1).numpy()
labels = torch.tensor(labels).numpy()
preds = np.argmax(probs, axis=1)

# save probability
pred_df = pd.DataFrame({
    "text": test_df["Text_Preprocessed"],
    "true_label": labels,
    "pred_label": preds,
    "prob_0": probs[:, 0],
    "prob_1": probs[:, 1],
    "prob_2": probs[:, 2],
})
pred_df.to_csv("sentiment_predictions.csv", index=False)

# report outputs
print("\nClassification Report:")
print(classification_report(labels, preds))
print("\nConfusion Matrix:")
print(confusion_matrix(labels, preds))

# save model
save_path = "sentiment_model_final"
os.makedirs(save_path, exist_ok=True)
model.save_pretrained(save_path)
tokenizer.save_pretrained(save_path)
print(f"Model and tokenizer saved to {save_path}")

Some weights of DebertaV2ForSequenceClassification were not initialized from the model checkpoint at microsoft/deberta-v3-base and are newly initialized: ['classifier.bias', 'classifier.weight', 'pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  trainer = WeightedLossTrainer(
[34m[1mwandb[0m: Using wandb-core as the SDK backend.  Please refer to https://wandb.me/wandb-core for more information.


<IPython.core.display.Javascript object>

[34m[1mwandb[0m: Logging into wandb.ai. (Learn how to deploy a W&B server locally: https://wandb.me/wandb-server)
[34m[1mwandb[0m: You can find your API key in your browser here: https://wandb.ai/authorize
wandb: Paste an API key from your profile and hit enter:

 ··········


[34m[1mwandb[0m: No netrc file found, creating one.
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc
[34m[1mwandb[0m: Currently logged in as: [33mbenicholson[0m ([33mbenicholson-university-of-chicago[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


Epoch,Training Loss,Validation Loss,Accuracy,F1 Macro,Precision 0,Recall 0,F1 0,Precision 1,Recall 1,F1 1,Precision 2,Recall 2,F1 2
1,1.0833,1.120531,0.266667,0.145552,0.0,0.0,0.0,1.0,0.010471,0.020725,0.26257,1.0,0.415929
2,1.098,1.060708,0.527778,0.37637,0.0,0.0,0.0,0.668342,0.696335,0.682051,0.354037,0.606383,0.447059
3,0.8222,0.965715,0.583333,0.540597,0.414141,0.546667,0.471264,0.728814,0.675393,0.701087,0.47619,0.425532,0.449438
4,0.5523,1.002746,0.608333,0.57217,0.467391,0.573333,0.51497,0.723757,0.685864,0.704301,0.517241,0.478723,0.497238
5,0.4054,1.123241,0.619444,0.590246,0.590164,0.48,0.529412,0.727273,0.670157,0.697548,0.479675,0.62766,0.543779


  finetune_trainer = WeightedLossTrainer(


Epoch,Training Loss,Validation Loss,Accuracy,F1 Macro,Precision 0,Recall 0,F1 0,Precision 1,Recall 1,F1 1,Precision 2,Recall 2,F1 2
1,0.45,1.320264,0.630556,0.609457,0.596774,0.493333,0.540146,0.795918,0.612565,0.692308,0.483444,0.776596,0.595918
2,0.2259,1.403127,0.641667,0.60913,0.62069,0.48,0.541353,0.733696,0.706806,0.72,0.508475,0.638298,0.566038



Classification Report:
              precision    recall  f1-score   support

           0       0.52      0.51      0.52       186
           1       0.77      0.60      0.67       478
           2       0.47      0.69      0.56       235

    accuracy                           0.60       899
   macro avg       0.59      0.60      0.58       899
weighted avg       0.64      0.60      0.61       899


Confusion Matrix:
[[ 95  36  55]
 [ 62 285 131]
 [ 25  47 163]]
Model and tokenizer saved to sentiment_model_final
