# Imports

In [None]:
import pandas as pd
import numpy as np
import torch
from torch import nn, optim
from safetensors.torch import load_file
import pandas as pd
import numpy as np
import kagglehub
from kagglehub import KaggleDatasetAdapter
import matplotlib.pyplot as plt, seaborn as sns
import matplotlib.ticker as mtick
from wordcloud import WordCloud
import nltk, re, string, warnings, textwrap, datetime as dt
warnings.filterwarnings("ignore")
import re
from collections import Counter
!pip install -q optuna
import optuna, wandb
from torch.utils.data import Dataset, DataLoader
from sklearn.metrics import precision_score, recall_score, f1_score, accuracy_score
from sklearn.model_selection import train_test_split
from huggingface_hub import upload_file, login, notebook_login, HfApi
from datasets import Dataset, Features, Sequence, Value
from transformers import (
    AutoTokenizer, AutoConfig, AutoModelForSequenceClassification,
    TrainingArguments, Trainer, EarlyStoppingCallback
)
from transformers.trainer_callback import TrainerCallback
import os

# GPU connection

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
device

# 1. Full-Code Fine-Tuned DeBERTa-v3-base

## Loading the original test data

In [None]:
test_df = pd.read_csv("/content/drive/MyDrive/ADV_DL/Data/Corona_NLP_test.csv")
test_df  = test_df.rename(columns={"OriginalTweet":"text", "Sentiment":"label"})

label_map = {'Extremely Negative':0,'Negative':1,'Neutral':2,
             'Positive':3,'Extremely Positive':4}

test_df["label"]  = test_df.label.map(label_map).astype(int)
test_df.head()

## Loading best model using checkpoints

In [None]:
tokenizer = AutoTokenizer.from_pretrained("microsoft/deberta-v3-base")

model = AutoModelForSequenceClassification.from_pretrained(
    "microsoft/deberta-v3-base",
    num_labels=5
)

state_dict = torch.load("/content/drive/MyDrive/Model_Checkpoints/full_ft_deberta_v3_base/best_model_trial_0.pt", map_location=device)
model.load_state_dict(state_dict)
model = model.to(device)

## Run inference

In [None]:
texts = test_df["text"].tolist()
true_labels = test_df["label"].tolist()

batch_size = 32
all_preds = []

use_amp = (device == "cuda")
autocast_dtype = torch.bfloat16 if torch.cuda.is_available() and torch.cuda.is_bf16_supported() else torch.float16

with torch.no_grad():
    for i in range(0, len(texts), batch_size):
        batch_texts = texts[i : i + batch_size]
        enc = tokenizer(
            batch_texts,
            truncation=True,
            padding=True,
            max_length=64,
            return_tensors="pt"
        )
        enc = {k: v.to(device) for k, v in enc.items()}

        if use_amp:
            with torch.autocast(device_type="cuda", dtype=autocast_dtype):
                logits = model(**enc).logits
        else:
            logits = model(**enc).logits

        batch_preds = torch.argmax(logits, dim=-1).cpu().numpy()
        all_preds.append(batch_preds)

preds = np.concatenate(all_preds, axis=0)

# --- Metrics ---
acc = accuracy_score(true_labels, preds)
f1  = f1_score(true_labels, preds, average="weighted")

print(f"Accuracy: {acc:.4f}")
print(f"Weighted F1: {f1:.4f}")

# 2. HF Fine-Tuned DeBERTa-v3-base

## Loading the original test data

In [None]:
test_df = pd.read_csv("/content/drive/MyDrive/ADV_DL/Data/Corona_NLP_test.csv")
test_df  = test_df.rename(columns={"OriginalTweet":"text", "Sentiment":"label"})

label_map = {'Extremely Negative':0,'Negative':1,'Neutral':2,
             'Positive':3,'Extremely Positive':4}

test_df["label"]  = test_df.label.map(label_map).astype(int)
test_df.head()

## Loading the best model

In [None]:
tokenizer = AutoTokenizer.from_pretrained("microsoft/deberta-v3-base")

model = AutoModelForSequenceClassification.from_pretrained(
    "microsoft/deberta-v3-base",
    num_labels=5
)

# Load safetensors weights
state_dict = load_file("/content/drive/MyDrive/Model_Checkpoints/hf_ft_deberta_v3_base/model.safetensors")
model.load_state_dict(state_dict)
model = model.to(device)

## Running Inference

In [None]:
texts = test_df["text"].tolist()
true_labels = test_df["label"].tolist()

batch_size = 32
all_preds = []

use_amp = (device == "cuda")
autocast_dtype = torch.bfloat16 if torch.cuda.is_available() and torch.cuda.is_bf16_supported() else torch.float16

with torch.no_grad():
    for i in range(0, len(texts), batch_size):
        batch_texts = texts[i : i + batch_size]
        enc = tokenizer(
            batch_texts,
            truncation=True,
            padding=True,
            max_length=128,
            return_tensors="pt"
        )
        enc = {k: v.to(device) for k, v in enc.items()}

        if use_amp:
            with torch.autocast(device_type="cuda", dtype=autocast_dtype):
                logits = model(**enc).logits
        else:
            logits = model(**enc).logits

        batch_preds = torch.argmax(logits, dim=-1).cpu().numpy()
        all_preds.append(batch_preds)

preds = np.concatenate(all_preds, axis=0)

# --- Metrics ---
acc = accuracy_score(true_labels, preds)
f1  = f1_score(true_labels, preds, average="weighted")

print(f"Accuracy: {acc:.4f}")
print(f"Weighted F1: {f1:.4f}")

# 3. Full-Code Fine-Tuned Twitter-RoBERTa

# 4. HF Fine-Tuned Twitter-RoBERTa