In [None]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from torch.optim import AdamW
from torch.cuda.amp import GradScaler, autocast
from transformers import AutoTokenizer, AutoModelForSequenceClassification, get_linear_schedule_with_warmup
from sklearn.model_selection import train_test_split
from sklearn.metrics import f1_score
from sklearn.preprocessing import LabelEncoder
from sklearn.utils.class_weight import compute_class_weight
from tqdm.auto import tqdm
import warnings
import re
import os
import zipfile

# --- Basic Setup ---
warnings.filterwarnings('ignore')
torch.manual_seed(42)
np.random.seed(42)

# --- 1. Preprocessing and Dataset ---
def preprocess_text(text):
    if not isinstance(text, str): return ""
    text = re.sub(r'http\S+|www\S+|https\S+', '', text, flags=re.MULTILINE)
    text = re.sub(r'\@\w+', '', text)
    text = re.sub(r'#\w+', '', text)
    text = text.replace('<LF>', ' ')
    text = re.sub(r'[^\u0600-\u06FF\s]', '', text)
    text = re.sub(r'\s+', ' ', text).strip()
    return text

class SingleTaskDataset(Dataset):
    def __init__(self, texts, labels, tokenizer, max_length=128):
        self.texts = texts
        self.labels = labels
        self.tokenizer = tokenizer
        self.max_length = max_length

    def __len__(self):
        return len(self.texts)

    def __getitem__(self, idx):
        text = str(self.texts[idx])
        encoding = self.tokenizer(text, truncation=True, padding='max_length', max_length=self.max_length, return_tensors='pt')
        return {
            'input_ids': encoding['input_ids'].flatten(),
            'attention_mask': encoding['attention_mask'].flatten(),
            'labels': torch.tensor(self.labels[idx], dtype=torch.long)
        }

class InferenceDataset(Dataset):
    def __init__(self, texts, tokenizer, max_length=128):
        self.texts = texts
        self.tokenizer = tokenizer
        self.max_length = max_length

    def __len__(self):
        return len(self.texts)

    def __getitem__(self, idx):
        text = str(self.texts[idx])
        encoding = self.tokenizer(text, truncation=True, padding='max_length', max_length=self.max_length, return_tensors='pt')
        return {
            'input_ids': encoding['input_ids'].flatten(),
            'attention_mask': encoding['attention_mask'].flatten()
        }

# --- 2. Reusable Training and Prediction Functions ---
def train_single_task_model(df, text_col, label_col, model_name, num_labels, model_save_path, epochs=8, batch_size=16, lr=2e-5):
    """Trains a single, specialized model for one classification task."""
    print("-" * 50)
    print(f"🚀 Starting Training for Task: {label_col}")
    print("-" * 50)

    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    tokenizer = AutoTokenizer.from_pretrained(model_name)

    encoder = LabelEncoder()
    df[f'{label_col}_encoded'] = encoder.fit_transform(df[label_col])
    train_df, val_df = train_test_split(df, test_size=0.1, random_state=42, stratify=df[f'{label_col}_encoded'])
    train_dataset = SingleTaskDataset(train_df[text_col].values, train_df[f'{label_col}_encoded'].values, tokenizer)
    val_dataset = SingleTaskDataset(val_df[text_col].values, val_df[f'{label_col}_encoded'].values, tokenizer)
    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
    val_loader = DataLoader(val_dataset, batch_size=batch_size * 2)

    class_weights = compute_class_weight('balanced', classes=np.unique(train_df[f'{label_col}_encoded']), y=train_df[f'{label_col}_encoded'])
    weights = torch.tensor(class_weights, dtype=torch.float).to(device)

    model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=num_labels, ignore_mismatched_sizes=True).to(device)
    optimizer = AdamW(model.parameters(), lr=lr)
    scaler = GradScaler()
    best_f1 = 0.0

    for epoch in range(epochs):
        model.train()
        for batch in tqdm(train_loader, desc=f"Epoch {epoch + 1}/{epochs}"):
            optimizer.zero_grad()
            input_ids = batch['input_ids'].to(device)
            attention_mask = batch['attention_mask'].to(device)
            labels = batch['labels'].to(device)
            with autocast():
                outputs = model(input_ids, attention_mask=attention_mask, labels=labels)
                loss = outputs.loss
            scaler.scale(loss).backward()
            scaler.step(optimizer)
            scaler.update()

        model.eval()
        all_preds, all_labels = [], []
        with torch.no_grad():
            for batch in val_loader:
                input_ids = batch['input_ids'].to(device)
                attention_mask = batch['attention_mask'].to(device)
                with autocast():
                    outputs = model(input_ids, attention_mask=attention_mask)
                all_preds.extend(torch.argmax(outputs.logits, dim=1).cpu().numpy())
                all_labels.extend(batch['labels'].cpu().numpy())

        f1 = f1_score(all_labels, all_preds, average='macro')
        print(f"Validation Macro F1 for {label_col}: {f1:.4f}")

        if f1 > best_f1:
            best_f1 = f1
            torch.save(model.state_dict(), model_save_path)
            print(f"✅ New best model for {label_col} saved to {model_save_path} with F1: {best_f1:.4f}")
    return encoder

def predict_single_task(texts, model, tokenizer, encoder, device):
    """Generates predictions for a list of texts using a single trained model."""
    model.eval()
    all_preds = []
    dataset = InferenceDataset(texts, tokenizer)
    loader = DataLoader(dataset, batch_size=32, shuffle=False)

    with torch.no_grad():
        for batch in tqdm(loader, desc="Predicting"):
            input_ids = batch['input_ids'].to(device)
            attention_mask = batch['attention_mask'].to(device)
            with autocast():
                outputs = model(input_ids, attention_mask=attention_mask)
            all_preds.extend(torch.argmax(outputs.logits, dim=1).cpu().numpy())
    return encoder.inverse_transform(all_preds)

# --- 3. Main Execution Pipeline ---
def main():
    MODEL_NAME = 'aubmindlab/bert-base-arabertv2'
    DATA_DIR = '/kaggle/input/mahed-task-2'
    TRAIN_FILE = os.path.join(DATA_DIR, 'train.csv')
    TEST_FILE = os.path.join(DATA_DIR, 'test.csv') 
    TEXT_COL = 'text_cleaned'

    print("Loading and preprocessing all data...")
    train_df = pd.read_csv(TRAIN_FILE)
    test_df = pd.read_csv(TEST_FILE)

    for df in [train_df, test_df]:
        df[TEXT_COL] = df['text'].apply(preprocess_text)
    train_df['Offensive'] = train_df['Offensive'].fillna('no')

    # === Train all three specialized models ===
    emotion_encoder = train_single_task_model(
        df=train_df, text_col=TEXT_COL, label_col='Emotion', model_name=MODEL_NAME,
        num_labels=train_df['Emotion'].nunique(), model_save_path='emotion_model.pth'
    )
    offensive_encoder = train_single_task_model(
        df=train_df, text_col=TEXT_COL, label_col='Offensive', model_name=MODEL_NAME,
        num_labels=train_df['Offensive'].nunique(), model_save_path='offensive_model.pth'
    )
    hate_train_df = train_df[train_df['Offensive'] == 'yes'].dropna(subset=['Hate']).copy()
    hate_encoder = train_single_task_model(
        df=hate_train_df, text_col=TEXT_COL, label_col='Hate', model_name=MODEL_NAME,
        num_labels=hate_train_df['Hate'].nunique(), model_save_path='hate_model.pth'
    )

    # --- Prediction and Submission File Generation ---
    print("\n" + "="*50)
    print("🏁 Starting Final Prediction Pipeline")
    print("="*50)
    
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
    
    emotion_model = AutoModelForSequenceClassification.from_pretrained(MODEL_NAME, num_labels=len(emotion_encoder.classes_))
    emotion_model.load_state_dict(torch.load('emotion_model.pth'))
    emotion_model.to(device)
    
    offensive_model = AutoModelForSequenceClassification.from_pretrained(MODEL_NAME, num_labels=len(offensive_encoder.classes_))
    offensive_model.load_state_dict(torch.load('offensive_model.pth'))
    offensive_model.to(device)

    hate_model = AutoModelForSequenceClassification.from_pretrained(MODEL_NAME, num_labels=len(hate_encoder.classes_))
    hate_model.load_state_dict(torch.load('hate_model.pth'))
    hate_model.to(device)

    print(f"Generating predictions for the official test set ({len(test_df)} rows)...")
    target_df = test_df
    
    # Predict each task on the test set
    emotion_preds = predict_single_task(target_df[TEXT_COL].values, emotion_model, tokenizer, emotion_encoder, device)
    offensive_preds = predict_single_task(target_df[TEXT_COL].values, offensive_model, tokenizer, offensive_encoder, device)
    
    submission_df = pd.DataFrame({
        'id': target_df['id'], 
        'Emotion_Predicted': emotion_preds,
        'Offensive_Predicted': offensive_preds
    })

    # Hierarchical prediction for 'Hate'
    hate_preds_final = np.full(len(submission_df), np.nan, dtype=object)
    offensive_texts_indices = submission_df[submission_df['Offensive_Predicted'] == 'yes'].index
    
    if not offensive_texts_indices.empty:
        offensive_texts = target_df.loc[offensive_texts_indices, TEXT_COL].values
        hate_preds_subset = predict_single_task(offensive_texts, hate_model, tokenizer, hate_encoder, device)
        # Use .loc for safe assignment
        submission_df.loc[offensive_texts_indices, 'Hate_Predicted'] = hate_preds_subset
    else:
        submission_df['Hate_Predicted'] = np.nan

    # Final check and save
    if 'Hate_Predicted' not in submission_df.columns:
        submission_df['Hate_Predicted'] = np.nan
        
    final_submission_df = submission_df[['id', 'Emotion_Predicted', 'Offensive_Predicted', 'Hate_Predicted']]
    
    PREDICTION_FILE = 'prediction.csv'
    final_submission_df.to_csv(PREDICTION_FILE, index=False)
    print(f"\nSubmission file '{PREDICTION_FILE}' created successfully.")
    
    with zipfile.ZipFile('prediction.zip', 'w', zipfile.ZIP_DEFLATED) as zf:
        zf.write(PREDICTION_FILE)
    print("`prediction.zip` created for submission.")

if __name__ == '__main__':
    main()

Loading and preprocessing all data...
--------------------------------------------------
🚀 Starting Training for Task: Emotion
--------------------------------------------------


tokenizer_config.json:   0%|          | 0.00/611 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/384 [00:00<?, ?B/s]

vocab.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

2025-07-22 12:10:37.882465: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1753186238.069200      19 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1753186238.128155      19 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


model.safetensors:   0%|          | 0.00/543M [00:00<?, ?B/s]

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at aubmindlab/bert-base-arabertv2 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/8:   0%|          | 0/336 [00:00<?, ?it/s]

Validation Macro F1 for Emotion: 0.2590
✅ New best model for Emotion saved to emotion_model.pth with F1: 0.2590


Epoch 2/8:   0%|          | 0/336 [00:00<?, ?it/s]

Validation Macro F1 for Emotion: 0.2809
✅ New best model for Emotion saved to emotion_model.pth with F1: 0.2809


Epoch 3/8:   0%|          | 0/336 [00:00<?, ?it/s]

Validation Macro F1 for Emotion: 0.3343
✅ New best model for Emotion saved to emotion_model.pth with F1: 0.3343


Epoch 4/8:   0%|          | 0/336 [00:00<?, ?it/s]

Validation Macro F1 for Emotion: 0.3647
✅ New best model for Emotion saved to emotion_model.pth with F1: 0.3647


Epoch 5/8:   0%|          | 0/336 [00:00<?, ?it/s]

Validation Macro F1 for Emotion: 0.4048
✅ New best model for Emotion saved to emotion_model.pth with F1: 0.4048


Epoch 6/8:   0%|          | 0/336 [00:00<?, ?it/s]

Validation Macro F1 for Emotion: 0.3596


Epoch 7/8:   0%|          | 0/336 [00:00<?, ?it/s]

Validation Macro F1 for Emotion: 0.3794


Epoch 8/8:   0%|          | 0/336 [00:00<?, ?it/s]

Validation Macro F1 for Emotion: 0.3615
--------------------------------------------------
🚀 Starting Training for Task: Offensive
--------------------------------------------------


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at aubmindlab/bert-base-arabertv2 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/8:   0%|          | 0/336 [00:00<?, ?it/s]

Validation Macro F1 for Offensive: 0.7841
✅ New best model for Offensive saved to offensive_model.pth with F1: 0.7841


Epoch 2/8:   0%|          | 0/336 [00:00<?, ?it/s]

Validation Macro F1 for Offensive: 0.7787


Epoch 3/8:   0%|          | 0/336 [00:00<?, ?it/s]

Validation Macro F1 for Offensive: 0.7770


Epoch 4/8:   0%|          | 0/336 [00:00<?, ?it/s]

Validation Macro F1 for Offensive: 0.6798


Epoch 5/8:   0%|          | 0/336 [00:00<?, ?it/s]

Validation Macro F1 for Offensive: 0.7643


Epoch 6/8:   0%|          | 0/336 [00:00<?, ?it/s]

Validation Macro F1 for Offensive: 0.7855
✅ New best model for Offensive saved to offensive_model.pth with F1: 0.7855


Epoch 7/8:   0%|          | 0/336 [00:00<?, ?it/s]

Validation Macro F1 for Offensive: 0.7660


Epoch 8/8:   0%|          | 0/336 [00:00<?, ?it/s]

Validation Macro F1 for Offensive: 0.7622
--------------------------------------------------
🚀 Starting Training for Task: Hate
--------------------------------------------------


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at aubmindlab/bert-base-arabertv2 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/8:   0%|          | 0/99 [00:00<?, ?it/s]

Validation Macro F1 for Hate: 0.4531
✅ New best model for Hate saved to hate_model.pth with F1: 0.4531


Epoch 2/8:   0%|          | 0/99 [00:00<?, ?it/s]

Validation Macro F1 for Hate: 0.4868
✅ New best model for Hate saved to hate_model.pth with F1: 0.4868


Epoch 3/8:   0%|          | 0/99 [00:00<?, ?it/s]

Validation Macro F1 for Hate: 0.5316
✅ New best model for Hate saved to hate_model.pth with F1: 0.5316


Epoch 4/8:   0%|          | 0/99 [00:00<?, ?it/s]

Validation Macro F1 for Hate: 0.5994
✅ New best model for Hate saved to hate_model.pth with F1: 0.5994


Epoch 5/8:   0%|          | 0/99 [00:00<?, ?it/s]

Validation Macro F1 for Hate: 0.5495


Epoch 6/8:   0%|          | 0/99 [00:00<?, ?it/s]

Validation Macro F1 for Hate: 0.5942


Epoch 7/8:   0%|          | 0/99 [00:00<?, ?it/s]

Validation Macro F1 for Hate: 0.5610


Epoch 8/8:   0%|          | 0/99 [00:00<?, ?it/s]

Validation Macro F1 for Hate: 0.5567

🏁 Starting Final Prediction Pipeline


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at aubmindlab/bert-base-arabertv2 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at aubmindlab/bert-base-arabertv2 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at aubmindlab/bert-base-arabertv2 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Generating predictions for the official test set (1278 rows)...


Predicting:   0%|          | 0/40 [00:00<?, ?it/s]

Predicting:   0%|          | 0/40 [00:00<?, ?it/s]

Predicting:   0%|          | 0/10 [00:00<?, ?it/s]


Submission file 'prediction.csv' created successfully.
`prediction.zip` created for submission.
