In [None]:
import torch
from datasets import Dataset
from transformers import AutoTokenizer, AutoModelForSequenceClassification, TrainingArguments, Trainer
from sklearn.metrics import accuracy_score, precision_recall_fscore_support
MODEL_PATH = "/content/drive/MyDrive/debert-v11"
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

model = AutoModelForSequenceClassification.from_pretrained(MODEL_PATH).to(device)
tokenizer = AutoTokenizer.from_pretrained(MODEL_PATH)


import pandas as pd
from datasets import Dataset

texts_c3 = [
    "Win a FREE iPhone today! Click here to claim your prize 🎁",
    "Join my Telegram channel for secret crypto tips 🚀📈",
    "Visit www.easycash.com and earn $1000 per day!",
    "Subscribe now and get rich quick, no experience needed!",
    "Limited offer: Buy 1, Get 3 FREE!!! 🔥🔥🔥",
    "Work from home and make $500 a day. Sign up now!",
    "Congratulations, you have been selected for a $100 gift card!",
    "Follow this link to unlock exclusive deals: www.scamdealz.net",
    "Your loan is approved! Just click here to claim your cash 💰",
    "Get FREE followers instantly! No password required.",
    "Download our app to win amazing prizes instantly 🎉",
    "Exclusive stock tips — guaranteed 200% returns in 1 week!",
    "Lose 10kg in 7 days with this magic pill 💊",
    "Cheap luxury watches available now! Visit www.fakewatch.shop",
    "Claim your free vacation to Bali 🌴 just by signing up today!",
    "This is not a scam — click now to change your life forever!",
    "Double your money overnight with this crypto bot 🤖",
    "Limited seats! Register now for the millionaire webinar 💼",
    "Click here to discover how I made $10,000 in one week!",
    "Join our VIP casino group — win big every night 🎲",
    "Exclusive: Free Netflix for 1 year! Claim before it ends 📺",
    "Hot singles in your area waiting to meet you ❤️",
    "Earn passive income with zero effort — join us now!",
    "Your account is compromised! Verify here immediately 🔑",
    "Congratulations, you are our lucky winner #7 today 🎉",
    "Buy followers and likes instantly for cheap prices!",
    "One-time offer: unlimited movies for free! Click now.",
    "Get a FREE credit score check today, no strings attached!",
    "Unlock premium access by entering your card details here.",
    "Stop wasting time — this system earns while you sleep 😴💵",
    "Special promo: Designer handbags for 90% off 💼",
    "Get rich with AI trading bot — results guaranteed 🤖",
    "Apply now for instant $5000 loan, no credit check!",
    "FREE samples shipped directly to your door, sign up now.",
    "Your dream car for only $99/month. Limited stock 🚗🔥",
    "Become a millionaire with this one secret trick!",
    "Claim your PayPal reward instantly at www.fakepaypal.com",
    "Don’t miss out on the hottest investment of 2025!",
    "Click here to reset your bank password securely.",
    "Flash sale! Electronics up to 95% off — today only ⚡",
    "Your account is eligible for a refund, claim now!",
    "Boost your testosterone levels with this magic pill 💊",
    "Congratulations, you qualify for a free home makeover 🏠",
    "Make $200 daily from home just by filling out surveys!",
    "Your parcel is waiting! Pay $2 to release your package 📦",
    "Earn crypto while you sleep — join the revolution 🚀",
    "100% natural cure for baldness — order now 🧴",
    "Join now and receive exclusive casino bonuses 🎰",
    "Your email has won the Microsoft lottery — claim now 💻"
]


df_c3 = pd.DataFrame({"text": texts_c3, "label": [1]*len(texts_c3)})

# make dataset
dataset = Dataset.from_pandas(df_c3)
def tokenize_fn(batch): return tokenizer(batch["text"], padding="max_length", truncation=True, max_length=256)
train_dataset = dataset.map(tokenize_fn, batched=True)
train_dataset.set_format(type="torch", columns=["input_ids", "attention_mask", "label"])

# training args: short extra fine-tune
training_args = TrainingArguments(
    output_dir="./results_continue_c3",
    per_device_train_batch_size=8,
    num_train_epochs=2,         
    learning_rate=1e-6,         
    save_strategy="no",
    logging_steps=10,
    report_to="none"
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    tokenizer=tokenizer
)

# continue fine-tuning
trainer.train()

# save updated model
SAVE_PATH = "/content/drive/MyDrive/debert-v12"
model.save_pretrained(SAVE_PATH)
tokenizer.save_pretrained(SAVE_PATH)
print(f"✅ Model updated with new category-3 data and saved at {SAVE_PATH}")
