In [None]:
import pandas as pd
import numpy as np
import torch
from torch.utils.data import Dataset, DataLoader
from sklearn.metrics import classification_report, f1_score, accuracy_score, confusion_matrix
from transformers import (
AutoModelForSequenceClassification,
AutoTokenizer,
get_linear_schedule_with_warmup,
set_seed
)
from torch.optim import AdamW
import warnings
import os
from tqdm.notebook import tqdm
import zipfile
import gc
import matplotlib.pyplot as plt
import seaborn as sns

warnings.filterwarnings('ignore')

TRAIN_FILE = '/kaggle/input/racial-hoaxes-ldk/Racial_train.csv'
DEV_FILE = '/kaggle/input/racial-hoaxes-ldk/Racial_val.csv'
TEST_FILE_WITH_LABELS = '/kaggle/input/racial-hoaxes-ldk/Racial_test.csv'
ORIGINAL_TEST_FILE_FOR_SUBMISSION_STRUCTURE = '/kaggle/input/racial-hoaxes-ldk/Racial_test_without_labels.csv'
MODEL_NAME = "l3cube-pune/hing-roberta-mixed"
OUTPUT_DIR = './results_racial_hoax_hing_roberta_mixed_tuned'
SUBMISSION_FILE = 'submission.csv'
TEAM_NAME = 'YourTeamName_HoaxMix'
RUN_ID = 'run_4_hing_roberta_mixed_tuned_lr_wd'
MAX_LEN = 128
BATCH_SIZE = 16
LEARNING_RATE = 3e-6
WEIGHT_DECAY = 0.05
EPOCHS = 10
PATIENCE = 3
SEED = 42

def set_seeds(seed=42):
    np.random.seed(seed)
    set_seed(seed)
    torch.manual_seed(seed)
    if torch.cuda.is_available():
        torch.cuda.manual_seed(seed)
        torch.cuda.manual_seed_all(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False
    os.environ['PYTHONHASHSEED'] = str(seed)
    print(f"Seeds set to {seed}")

set_seeds(SEED)

if torch.cuda.is_available():
    device = torch.device("cuda")
    print(f"Using GPU: {torch.cuda.get_device_name(0)}")
else:
    device = torch.device("cpu")
    print("Using CPU")

os.makedirs(OUTPUT_DIR, exist_ok=True)

def sanitize_columns(df, df_name="DataFrame"):
    print(f"Columns in {df_name} BEFORE sanitization: {df.columns.tolist()}")
    new_columns = []
    for col_name in df.columns:
        name = str(col_name).strip()
        if name.endswith('|'):
            name = name[:-1].strip()
        new_columns.append(name)
    df.columns = new_columns
    print(f"Columns in {df_name} AFTER sanitization: {df.columns.tolist()}")
    return df

print("Loading HoaxMixPlus data...")
try:
    train_df = pd.read_csv(TRAIN_FILE)
    train_df = sanitize_columns(train_df, "train_df")

    dev_df = pd.read_csv(DEV_FILE)
    dev_df = sanitize_columns(dev_df, "dev_df")

    test_df_with_labels = pd.read_csv(TEST_FILE_WITH_LABELS)
    test_df_with_labels = sanitize_columns(test_df_with_labels, "test_df_with_labels")

    original_test_df_for_submission = pd.read_csv(ORIGINAL_TEST_FILE_FOR_SUBMISSION_STRUCTURE)
    original_test_df_for_submission = sanitize_columns(original_test_df_for_submission, "original_test_df_for_submission")

    print("Data loaded successfully.")
    print(f"Train samples: {len(train_df)}, Dev samples: {len(dev_df)}, Test (with labels) samples: {len(test_df_with_labels)}, Original Test (for submission) samples: {len(original_test_df_for_submission)}")

except FileNotFoundError as e:
    print(f"Error loading data file: {e}")
    print(f"Please ensure all CSV files are in their correct paths.")
    exit()
except Exception as e:
    print(f"An error occurred during data loading: {e}")
    exit()

expected_train_dev_cols = ['clean_text', 'labels']
expected_test_cols_with_labels = ['ID', 'clean_text', 'labels']
expected_original_test_cols = ['ID', 'clean_text']

if not all(col in train_df.columns for col in expected_train_dev_cols):
    print(f"Error: Train CSV missing one of required columns: {expected_train_dev_cols}. Found: {train_df.columns.tolist()}")
    exit()
if not all(col in dev_df.columns for col in expected_train_dev_cols):
    print(f"Error: Dev (val) CSV missing one of required columns: {expected_train_dev_cols}. Found: {dev_df.columns.tolist()}")
    exit()
if not all(col in test_df_with_labels.columns for col in expected_test_cols_with_labels):
    print(f"Error: Test CSV with labels missing one of required columns: {expected_test_cols_with_labels}. Found: {test_df_with_labels.columns.tolist()}")
    exit()
if not all(col in original_test_df_for_submission.columns for col in expected_original_test_cols):
    print(f"Error: Original Test CSV (for submission) missing one of required columns: {expected_original_test_cols}. Found: {original_test_df_for_submission.columns.tolist()}")
    exit()

print("Column names verified successfully.")

train_df['clean_text'] = train_df['clean_text'].fillna('').astype(str)
dev_df['clean_text'] = dev_df['clean_text'].fillna('').astype(str)
test_df_with_labels['clean_text'] = test_df_with_labels['clean_text'].fillna('').astype(str)
original_test_df_for_submission['clean_text'] = original_test_df_for_submission['clean_text'].fillna('').astype(str)

print("\nTrain Data Class Distribution (0: Non-Hoax, 1: Racial Hoax):")
if 'labels' in train_df.columns:
    print(train_df['labels'].value_counts(normalize=True))
    if not set(train_df['labels'].unique()).issubset({0, 1}):
        print(f"Warning: Unexpected labels found in train_df: {train_df['labels'].unique()}. Expected 0 and 1.")
else:
    print("Label column ('labels') not found in training data. Cannot proceed.")
    exit()

print(f"\nLoading Tokenizer: {MODEL_NAME}")
try:
    tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
except Exception as e:
    print(f"Error loading tokenizer {MODEL_NAME}: {e}")
    exit()

class RacialHoaxDataset(Dataset):
    def __init__(self, texts, labels, tokenizer, max_len, ids=None):
        self.ids = ids; self.texts = texts; self.labels = labels
        self.tokenizer = tokenizer; self.max_len = max_len
    def __len__(self): return len(self.texts)
    def __getitem__(self, item):
        text = self.texts[item]
        item_id = self.ids[item] if self.ids is not None else item
        try:
            encoding = self.tokenizer.encode_plus(
                text, add_special_tokens=True, max_length=self.max_len,
                return_token_type_ids=False, padding='max_length', truncation=True,
                return_attention_mask=True, return_tensors='pt')
            data = {'text': text, 'input_ids': encoding['input_ids'].flatten(),
                    'attention_mask': encoding['attention_mask'].flatten()}
            if self.ids is not None: data['original_id'] = self.ids[item]
            if self.labels is not None:
                if item < len(self.labels): data['labels'] = torch.tensor(self.labels[item], dtype=torch.long)
                else:
                    print(f"Warning: Label index {item} out of bounds for labels len {len(self.labels)}")
                    data['labels'] = torch.tensor(0, dtype=torch.long)
        except Exception as e:
            print(f"Error tokenizing text at index {item} (ID: {item_id}): {text}\nError: {e}")
            data = {'text': text, 'input_ids': torch.zeros(self.max_len, dtype=torch.long),
                    'attention_mask': torch.zeros(self.max_len, dtype=torch.long)}
            if self.ids is not None: data['original_id'] = self.ids[item]
            if self.labels is not None: data['labels'] = torch.tensor(0, dtype=torch.long)
        return data

print("\nCreating Datasets and Dataloaders...")
train_labels = train_df['labels'].to_numpy()
dev_labels = dev_df['labels'].to_numpy()
test_labels_eval = test_df_with_labels['labels'].to_numpy()
train_texts = train_df['clean_text'].to_numpy()
dev_texts = dev_df['clean_text'].to_numpy()
test_texts_eval = test_df_with_labels['clean_text'].to_numpy()
test_ids_eval = test_df_with_labels['ID'].to_numpy()
submission_test_texts = original_test_df_for_submission['clean_text'].to_numpy()
submission_test_ids = original_test_df_for_submission['ID'].to_numpy()

train_dataset = RacialHoaxDataset(texts=train_texts, labels=train_labels, tokenizer=tokenizer, max_len=MAX_LEN, ids=None)
dev_dataset = RacialHoaxDataset(texts=dev_texts, labels=dev_labels, tokenizer=tokenizer, max_len=MAX_LEN, ids=None)
eval_test_dataset = RacialHoaxDataset(texts=test_texts_eval, labels=test_labels_eval, tokenizer=tokenizer, max_len=MAX_LEN, ids=test_ids_eval)
submission_generation_test_dataset = RacialHoaxDataset(texts=submission_test_texts, labels=None, tokenizer=tokenizer, max_len=MAX_LEN, ids=submission_test_ids)

def collate_fn(batch):
    batch = list(filter(lambda x: x is not None, batch))
    if not batch: return None
    return torch.utils.data.dataloader.default_collate(batch)

train_dataloader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=2, pin_memory=True, collate_fn=collate_fn)
dev_dataloader = DataLoader(dev_dataset, batch_size=BATCH_SIZE*2, shuffle=False, num_workers=2, pin_memory=True, collate_fn=collate_fn)
eval_test_dataloader = DataLoader(eval_test_dataset, batch_size=BATCH_SIZE*2, shuffle=False, num_workers=2, pin_memory=True, collate_fn=collate_fn)
submission_generation_test_dataloader = DataLoader(submission_generation_test_dataset, batch_size=BATCH_SIZE*2, shuffle=False, num_workers=2, pin_memory=True, collate_fn=collate_fn)

print(f"\nLoading Model: {MODEL_NAME}")
try:
    model = AutoModelForSequenceClassification.from_pretrained(MODEL_NAME, num_labels=2)
    model.to(device)
except Exception as e:
    print(f"Error loading model {MODEL_NAME}: {e}")
    exit()

print(f"Optimizer: AdamW, LR: {LEARNING_RATE}, Weight Decay: {WEIGHT_DECAY}")
optimizer = AdamW(model.parameters(), lr=LEARNING_RATE, weight_decay=WEIGHT_DECAY)
total_steps = len(train_dataloader) * EPOCHS
num_warmup_steps = int(total_steps * 0.1)
print(f"Total steps: {total_steps}, Warmup steps: {num_warmup_steps}")
scheduler = get_linear_schedule_with_warmup(optimizer, num_warmup_steps=num_warmup_steps, num_training_steps=total_steps)

def train_epoch(model, data_loader, optimizer, device, scheduler):
    model.train(); total_loss = 0; processed_batches = 0
    progress_bar = tqdm(data_loader, desc="Training", leave=False)
    for batch in progress_bar:
        if batch is None: continue
        optimizer.zero_grad()
        try:
            input_ids=batch["input_ids"].to(device); attention_mask=batch["attention_mask"].to(device); labels=batch["labels"].to(device)
            outputs = model(input_ids=input_ids, attention_mask=attention_mask, labels=labels)
            loss = outputs.loss
            if torch.isnan(loss): print("Warning: NaN loss detected during training, skipping batch."); gc.collect(); torch.cuda.empty_cache(); continue
            total_loss += loss.item(); loss.backward()
            torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)
            optimizer.step(); scheduler.step()
            progress_bar.set_postfix({'loss': loss.item()}); processed_batches += 1
        except RuntimeError as e:
            if "out of memory" in str(e): print("WARNING: CUDA out of memory during training. Skipping batch."); optimizer.zero_grad(); gc.collect(); torch.cuda.empty_cache()
            else: print(f"Runtime error during training batch: {e}")
            continue
        except Exception as e: print(f"Unhandled error during training batch: {e}"); continue
    if processed_batches == 0: return 0.0
    return total_loss / processed_batches

def eval_model(model, data_loader, device, context="Validation"):
    model.eval(); total_loss = 0; all_preds, all_labels = [], []; processed_batches = 0
    progress_bar = tqdm(data_loader, desc=f"Evaluating ({context})", leave=False)
    with torch.no_grad():
        for batch in progress_bar:
            if batch is None: continue
            try:
                input_ids=batch["input_ids"].to(device); attention_mask=batch["attention_mask"].to(device); labels=batch["labels"].to(device)
                outputs = model(input_ids=input_ids, attention_mask=attention_mask, labels=labels)
                loss = outputs.loss
                if torch.isnan(loss): print(f"Warning: NaN loss detected during {context} evaluation, skipping batch."); continue
                total_loss += loss.item(); logits = outputs.logits; predictions = torch.argmax(logits, dim=1)
                all_preds.extend(predictions.cpu().numpy()); all_labels.extend(labels.cpu().numpy()); processed_batches += 1
            except RuntimeError as e:
                if "out of memory" in str(e): print(f"WARNING: CUDA out of memory during {context} evaluation. Skipping batch."); gc.collect(); torch.cuda.empty_cache()
                else: print(f"Error during {context} evaluation batch: {e}")
                continue
            except Exception as e: print(f"Unhandled error during {context} evaluation batch: {e}"); continue
    if processed_batches == 0: return 0.0, 0.0, "No batches processed.", [], []
    avg_loss = total_loss / processed_batches
    if not all_labels or not all_preds: return avg_loss, 0.0, "No predictions collected.", [], []
    try:
        macro_f1 = f1_score(all_labels, all_preds, average='macro', zero_division=0)
        report = classification_report(all_labels, all_preds, digits=4, target_names=['Non-Hoax (0)', 'Racial Hoax (1)'], zero_division=0)
    except Exception as e: print(f"Error calculating metrics: {e}"); macro_f1=0.0; report="Error calculating metrics."
    return avg_loss, macro_f1, report, all_labels, all_preds

print(f"\nStarting Training for Racial Hoax Detection ({MODEL_NAME} Tuned Run)...")
best_macro_f1 = 0.0; epochs_no_improve = 0
history = {'train_loss': [], 'val_loss': [], 'val_macro_f1': []}
best_model_path = os.path.join(OUTPUT_DIR, 'best_model_racial_hoax.pt')
print(f"Early stopping based on Validation Macro F1, patience={PATIENCE}.")

for epoch in range(EPOCHS):
    print(f"\n--- Epoch {epoch + 1}/{EPOCHS} ---")
    train_loss = train_epoch(model, train_dataloader, optimizer, device, scheduler)
    print(f"Average Training Loss: {train_loss:.4f}")
    val_loss, val_macro_f1, report, _, _ = eval_model(model, dev_dataloader, device, context="Validation")
    print(f"Validation Loss: {val_loss:.4f}\nValidation Macro F1: {val_macro_f1:.4f}\nValidation Classification Report:\n{report}")
    history['train_loss'].append(train_loss); history['val_loss'].append(val_loss); history['val_macro_f1'].append(val_macro_f1)
    if val_macro_f1 > best_macro_f1 + 1e-5:
        print(f"Validation Macro F1 improved ({best_macro_f1:.4f} --> {val_macro_f1:.4f}). Saving model to {best_model_path}")
        best_macro_f1 = val_macro_f1
        try: torch.save(model.state_dict(), best_model_path)
        except Exception as e: print(f"Error saving model: {e}")
        epochs_no_improve = 0
    else:
        epochs_no_improve += 1
        print(f"Validation Macro F1 did not improve for {epochs_no_improve} epoch(s). Best: {best_macro_f1:.4f}")
    if epochs_no_improve >= PATIENCE: print(f"\nEarly stopping triggered after {epoch + 1} epochs."); break
    gc.collect(); torch.cuda.empty_cache()

print(f"\nTraining finished. Best validation Macro F1 achieved: {best_macro_f1:.4f}")
if os.path.exists(best_model_path): print(f"Best model saved at: {best_model_path}")
else: print("Warning: No best model was saved. Predictions will use the model's final state."); best_model_path = None

print("\nLoading best model (if available) for final evaluation and prediction...")
eval_predict_model = AutoModelForSequenceClassification.from_pretrained(MODEL_NAME, num_labels=2)
if best_model_path and os.path.exists(best_model_path):
    try:
        eval_predict_model.load_state_dict(torch.load(best_model_path, map_location=device))
        print("Best model weights loaded successfully for evaluation and prediction.")
    except Exception as e:
        print(f"Error loading best model state dict from {best_model_path}: {e}. Using final trained model state.")
        eval_predict_model.load_state_dict(model.state_dict())
else:
    print("Best model file not found or wasn't saved. Using final trained model state for evaluation and prediction.")
    eval_predict_model.load_state_dict(model.state_dict())
eval_predict_model.to(device); eval_predict_model.eval()

print("\n--- Evaluating on Test Set (with labels) ---")
test_loss, test_macro_f1, test_report, test_true_labels, test_predictions_eval = eval_model(
    eval_predict_model, eval_test_dataloader, device, context="Test")

print(f"\nTest Set Loss: {test_loss:.4f}\nTest Set Macro F1: {test_macro_f1:.4f}\n\nTest Set Classification Report:\n{test_report}")

if test_true_labels and test_predictions_eval:
    test_accuracy = accuracy_score(test_true_labels, test_predictions_eval)
    print(f"\nTest Set Accuracy: {test_accuracy:.4f}")
    print("\nTest Set Confusion Matrix:")
    cm = confusion_matrix(test_true_labels, test_predictions_eval)
    print(cm)
    try:
        plt.figure(figsize=(8, 6))
        sns.heatmap(cm, annot=True, fmt='d', cmap='Blues',
                    xticklabels=['Non-Hoax (0)', 'Racial Hoax (1)'], yticklabels=['Non-Hoax (0)', 'Racial Hoax (1)'])
        plt.xlabel('Predicted Label'); plt.ylabel('True Label'); plt.title('Confusion Matrix for Test Set')
        cm_plot_path = os.path.join(OUTPUT_DIR, 'test_set_confusion_matrix.png')
        plt.savefig(cm_plot_path); print(f"Confusion matrix plot saved to {cm_plot_path}")
    except Exception as e: print(f"Could not plot confusion matrix: {e}")
else:
    print("Could not generate Test Set Accuracy or Confusion Matrix: true labels or predictions missing.")

submission_predictions_list = []; submission_ids_list = []
print("\nGenerating predictions on the original test set for submission file...")
submission_progress_bar = tqdm(submission_generation_test_dataloader, desc="Generating Submission Predictions")
with torch.no_grad():
    for batch in submission_progress_bar:
        if batch is None: continue
        batch_ids = batch['original_id']
        try:
            input_ids=batch["input_ids"].to(device); attention_mask=batch["attention_mask"].to(device)
            outputs = eval_predict_model(input_ids=input_ids, attention_mask=attention_mask)
            logits = outputs.logits; predictions = torch.argmax(logits, dim=1)
            preds_np = predictions.cpu().numpy(); ids_np = batch_ids.cpu().numpy() if isinstance(batch_ids, torch.Tensor) else batch_ids
            for i in range(len(preds_np)):
                submission_ids_list.append(ids_np[i]); submission_predictions_list.append(preds_np[i])
        except RuntimeError as e:
            if "out of memory" in str(e): print(f"WARNING: CUDA out of memory during submission prediction. Skipping batch."); gc.collect(); torch.cuda.empty_cache()
            else: print(f"Runtime error during submission prediction batch: {e}. Skipping batch.")
            continue
        except Exception as e: print(f"Unhandled error during submission prediction batch: {e}. Skipping batch."); continue

print(f"\nCreating submission file: {SUBMISSION_FILE}")
results_df = pd.DataFrame({'ID': submission_ids_list, 'predictions': submission_predictions_list})
submission_df = pd.merge(original_test_df_for_submission[['ID']], results_df, on='ID', how='left')

missing_preds = submission_df['predictions'].isnull().sum()
if missing_preds > 0:
    print(f"Warning: {missing_preds} predictions were missing for submission. Filling with 0.")
    submission_df['predictions'].fillna(0, inplace=True)
submission_df['predictions'] = submission_df['predictions'].astype(int)

if len(submission_df) != len(original_test_df_for_submission):
    print(f"CRITICAL WARNING: Final submission row count ({len(submission_df)}) does not match original test set size ({len(original_test_df_for_submission)}).")

submission_path = os.path.join(OUTPUT_DIR, SUBMISSION_FILE)
try:
    submission_df[['ID', 'predictions']].to_csv(submission_path, index=False, header=False)
    print(f"Submission file saved to {submission_path}\nSubmission file format check: Contains {len(submission_df)} lines.")
    with open(submission_path, 'r') as f:
        print("First 5 lines of submission file:")
        for i, line in enumerate(f):
            if i >= 5: break
            print(line.strip())
except Exception as e: print(f"Error saving submission file: {e}")

zip_filename = f"{TEAM_NAME}.{RUN_ID}.zip"; zip_filepath = os.path.join(OUTPUT_DIR, zip_filename)
print(f"\nCreating zip file for submission: {zip_filename}")
try:
    with zipfile.ZipFile(zip_filepath, 'w', zipfile.ZIP_DEFLATED) as zipf:
        if os.path.exists(submission_path):
            zipf.write(submission_path, arcname=SUBMISSION_FILE); print(f"Submission zip file created at: {zip_filepath}")
        else: print(f"Error: Submission file {submission_path} not found. Cannot create zip file.")
except Exception as e: print(f"Error creating zip file: {e}")

print("\nProcess completed.")
print("\nPlotting training history...")
try:
    plt.figure(figsize=(12, 5))
    plt.subplot(1, 2, 1); plt.plot(history['train_loss'], label='Train Loss'); plt.plot(history['val_loss'], label='Validation Loss')
    plt.title('Loss History'); plt.xlabel('Epoch'); plt.ylabel('Loss'); plt.legend(); plt.grid(True)
    plt.subplot(1, 2, 2); plt.plot(history['val_macro_f1'], label='Validation Macro F1')
    plt.title('Validation Macro F1 History'); plt.xlabel('Epoch'); plt.ylabel('Macro F1'); plt.legend(); plt.grid(True)
    plt.tight_layout(); plot_path = os.path.join(OUTPUT_DIR, 'training_history_racial_hoax.png')
    plt.savefig(plot_path); print(f"Training history plot saved to {plot_path}")
except Exception as e: print(f"Could not plot training history: {e}")
plt.close('all')

2025-05-12 07:27:13.094856: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1747034833.302211      19 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1747034833.359460      19 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


Seeds set to 42
Using GPU: Tesla P100-PCIE-16GB
Loading HoaxMixPlus data...
Columns in train_df BEFORE sanitization: ['clean_text', 'labels']
Columns in train_df AFTER sanitization: ['clean_text', 'labels']
Columns in dev_df BEFORE sanitization: ['clean_text', 'labels']
Columns in dev_df AFTER sanitization: ['clean_text', 'labels']
Columns in test_df_with_labels BEFORE sanitization: ['ID', 'clean_text', 'labels']
Columns in test_df_with_labels AFTER sanitization: ['ID', 'clean_text', 'labels']
Columns in original_test_df_for_submission BEFORE sanitization: ['ID', 'clean_text']
Columns in original_test_df_for_submission AFTER sanitization: ['ID', 'clean_text']
Data loaded successfully.
Train samples: 3060, Dev samples: 1021, Test (with labels) samples: 1021, Original Test (for submission) samples: 1021
Column names verified successfully.

Train Data Class Distribution (0: Non-Hoax, 1: Racial Hoax):
labels
0    0.757843
1    0.242157
Name: proportion, dtype: float64

Loading Tokenizer: l

tokenizer_config.json:   0%|          | 0.00/406 [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/17.1M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/239 [00:00<?, ?B/s]


Creating Datasets and Dataloaders...

Loading Model: l3cube-pune/hing-roberta-mixed


config.json:   0%|          | 0.00/756 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/1.11G [00:00<?, ?B/s]

Some weights of XLMRobertaForSequenceClassification were not initialized from the model checkpoint at l3cube-pune/hing-roberta-mixed and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Optimizer: AdamW, LR: 3e-06, Weight Decay: 0.05
Total steps: 1920, Warmup steps: 192

Starting Training for Racial Hoax Detection (l3cube-pune/hing-roberta-mixed Tuned Run)...
Early stopping based on Validation Macro F1, patience=3.

--- Epoch 1/10 ---


Training:   0%|          | 0/192 [00:00<?, ?it/s]

Average Training Loss: 0.6189


Evaluating (Validation):   0%|          | 0/32 [00:00<?, ?it/s]

Validation Loss: 0.5276
Validation Macro F1: 0.4312
Validation Classification Report:
                 precision    recall  f1-score   support

   Non-Hoax (0)     0.7581    1.0000    0.8624       774
Racial Hoax (1)     0.0000    0.0000    0.0000       247

       accuracy                         0.7581      1021
      macro avg     0.3790    0.5000    0.4312      1021
   weighted avg     0.5747    0.7581    0.6538      1021

Validation Macro F1 improved (0.0000 --> 0.4312). Saving model to ./results_racial_hoax_hing_roberta_mixed_tuned/best_model_racial_hoax.pt

--- Epoch 2/10 ---


Training:   0%|          | 0/192 [00:00<?, ?it/s]

Average Training Loss: 0.4751


Evaluating (Validation):   0%|          | 0/32 [00:00<?, ?it/s]

Validation Loss: 0.4303
Validation Macro F1: 0.6684
Validation Classification Report:
                 precision    recall  f1-score   support

   Non-Hoax (0)     0.8197    0.9457    0.8782       774
Racial Hoax (1)     0.6719    0.3482    0.4587       247

       accuracy                         0.8012      1021
      macro avg     0.7458    0.6470    0.6684      1021
   weighted avg     0.7839    0.8012    0.7767      1021

Validation Macro F1 improved (0.4312 --> 0.6684). Saving model to ./results_racial_hoax_hing_roberta_mixed_tuned/best_model_racial_hoax.pt

--- Epoch 3/10 ---


Training:   0%|          | 0/192 [00:00<?, ?it/s]

Average Training Loss: 0.4043


Evaluating (Validation):   0%|          | 0/32 [00:00<?, ?it/s]

Validation Loss: 0.4383
Validation Macro F1: 0.6844
Validation Classification Report:
                 precision    recall  f1-score   support

   Non-Hoax (0)     0.8234    0.9638    0.8881       774
Racial Hoax (1)     0.7565    0.3522    0.4807       247

       accuracy                         0.8159      1021
      macro avg     0.7900    0.6580    0.6844      1021
   weighted avg     0.8072    0.8159    0.7895      1021

Validation Macro F1 improved (0.6684 --> 0.6844). Saving model to ./results_racial_hoax_hing_roberta_mixed_tuned/best_model_racial_hoax.pt

--- Epoch 4/10 ---


Training:   0%|          | 0/192 [00:00<?, ?it/s]

Average Training Loss: 0.3742


Evaluating (Validation):   0%|          | 0/32 [00:00<?, ?it/s]

Validation Loss: 0.4129
Validation Macro F1: 0.7352
Validation Classification Report:
                 precision    recall  f1-score   support

   Non-Hoax (0)     0.8625    0.8992    0.8805       774
Racial Hoax (1)     0.6355    0.5506    0.5900       247

       accuracy                         0.8149      1021
      macro avg     0.7490    0.7249    0.7352      1021
   weighted avg     0.8076    0.8149    0.8102      1021

Validation Macro F1 improved (0.6844 --> 0.7352). Saving model to ./results_racial_hoax_hing_roberta_mixed_tuned/best_model_racial_hoax.pt

--- Epoch 5/10 ---


Training:   0%|          | 0/192 [00:00<?, ?it/s]

Average Training Loss: 0.3423


Evaluating (Validation):   0%|          | 0/32 [00:00<?, ?it/s]

Validation Loss: 0.4178
Validation Macro F1: 0.7378
Validation Classification Report:
                 precision    recall  f1-score   support

   Non-Hoax (0)     0.8556    0.9264    0.8896       774
Racial Hoax (1)     0.6885    0.5101    0.5860       247

       accuracy                         0.8257      1021
      macro avg     0.7721    0.7182    0.7378      1021
   weighted avg     0.8152    0.8257    0.8161      1021

Validation Macro F1 improved (0.7352 --> 0.7378). Saving model to ./results_racial_hoax_hing_roberta_mixed_tuned/best_model_racial_hoax.pt

--- Epoch 6/10 ---


Training:   0%|          | 0/192 [00:00<?, ?it/s]

Average Training Loss: 0.3127


Evaluating (Validation):   0%|          | 0/32 [00:00<?, ?it/s]

Validation Loss: 0.4276
Validation Macro F1: 0.7356
Validation Classification Report:
                 precision    recall  f1-score   support

   Non-Hoax (0)     0.8559    0.9212    0.8874       774
Racial Hoax (1)     0.6755    0.5142    0.5839       247

       accuracy                         0.8227      1021
      macro avg     0.7657    0.7177    0.7356      1021
   weighted avg     0.8123    0.8227    0.8140      1021

Validation Macro F1 did not improve for 1 epoch(s). Best: 0.7378

--- Epoch 7/10 ---


Training:   0%|          | 0/192 [00:00<?, ?it/s]

Average Training Loss: 0.2944


Evaluating (Validation):   0%|          | 0/32 [00:00<?, ?it/s]

Validation Loss: 0.4376
Validation Macro F1: 0.7308
Validation Classification Report:
                 precision    recall  f1-score   support

   Non-Hoax (0)     0.8537    0.9199    0.8856       774
Racial Hoax (1)     0.6684    0.5061    0.5760       247

       accuracy                         0.8198      1021
      macro avg     0.7611    0.7130    0.7308      1021
   weighted avg     0.8089    0.8198    0.8107      1021

Validation Macro F1 did not improve for 2 epoch(s). Best: 0.7378

--- Epoch 8/10 ---


Training:   0%|          | 0/192 [00:00<?, ?it/s]

Average Training Loss: 0.2769


Evaluating (Validation):   0%|          | 0/32 [00:00<?, ?it/s]

Validation Loss: 0.4412
Validation Macro F1: 0.7435
Validation Classification Report:
                 precision    recall  f1-score   support

   Non-Hoax (0)     0.8631    0.9121    0.8869       774
Racial Hoax (1)     0.6650    0.5466    0.6000       247

       accuracy                         0.8237      1021
      macro avg     0.7641    0.7294    0.7435      1021
   weighted avg     0.8152    0.8237    0.8175      1021

Validation Macro F1 improved (0.7378 --> 0.7435). Saving model to ./results_racial_hoax_hing_roberta_mixed_tuned/best_model_racial_hoax.pt

--- Epoch 9/10 ---


Training:   0%|          | 0/192 [00:00<?, ?it/s]

Average Training Loss: 0.2633


Evaluating (Validation):   0%|          | 0/32 [00:00<?, ?it/s]

Validation Loss: 0.4520
Validation Macro F1: 0.7355
Validation Classification Report:
                 precision    recall  f1-score   support

   Non-Hoax (0)     0.8566    0.9186    0.8865       774
Racial Hoax (1)     0.6702    0.5182    0.5845       247

       accuracy                         0.8217      1021
      macro avg     0.7634    0.7184    0.7355      1021
   weighted avg     0.8115    0.8217    0.8135      1021

Validation Macro F1 did not improve for 1 epoch(s). Best: 0.7435

--- Epoch 10/10 ---


Training:   0%|          | 0/192 [00:00<?, ?it/s]

Average Training Loss: 0.2613


Evaluating (Validation):   0%|          | 0/32 [00:00<?, ?it/s]

Validation Loss: 0.4502
Validation Macro F1: 0.7389
Validation Classification Report:
                 precision    recall  f1-score   support

   Non-Hoax (0)     0.8601    0.9134    0.8860       774
Racial Hoax (1)     0.6633    0.5344    0.5919       247

       accuracy                         0.8217      1021
      macro avg     0.7617    0.7239    0.7389      1021
   weighted avg     0.8125    0.8217    0.8148      1021

Validation Macro F1 did not improve for 2 epoch(s). Best: 0.7435

Training finished. Best validation Macro F1 achieved: 0.7435
Best model saved at: ./results_racial_hoax_hing_roberta_mixed_tuned/best_model_racial_hoax.pt

Loading best model (if available) for final evaluation and prediction...


Some weights of XLMRobertaForSequenceClassification were not initialized from the model checkpoint at l3cube-pune/hing-roberta-mixed and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Best model weights loaded successfully for evaluation and prediction.

--- Evaluating on Test Set (with labels) ---


Evaluating (Test):   0%|          | 0/32 [00:00<?, ?it/s]


Test Set Loss: 0.4182
Test Set Macro F1: 0.7505

Test Set Classification Report:
                 precision    recall  f1-score   support

   Non-Hoax (0)     0.8681    0.9096    0.8883       774
Racial Hoax (1)     0.6667    0.5668    0.6127       247

       accuracy                         0.8266      1021
      macro avg     0.7674    0.7382    0.7505      1021
   weighted avg     0.8193    0.8266    0.8216      1021


Test Set Accuracy: 0.8266

Test Set Confusion Matrix:
[[704  70]
 [107 140]]
Confusion matrix plot saved to ./results_racial_hoax_hing_roberta_mixed_tuned/test_set_confusion_matrix.png

Generating predictions on the original test set for submission file...


Generating Submission Predictions:   0%|          | 0/32 [00:00<?, ?it/s]


Creating submission file: submission.csv
Submission file saved to ./results_racial_hoax_hing_roberta_mixed_tuned/submission.csv
Submission file format check: Contains 1021 lines.
First 5 lines of submission file:
RH_01,0
RH_02,0
RH_03,0
RH_04,0
RH_05,1

Creating zip file for submission: YourTeamName_HoaxMix.run_4_hing_roberta_mixed_tuned_lr_wd.zip
Submission zip file created at: ./results_racial_hoax_hing_roberta_mixed_tuned/YourTeamName_HoaxMix.run_4_hing_roberta_mixed_tuned_lr_wd.zip

Process completed.

Plotting training history...
Training history plot saved to ./results_racial_hoax_hing_roberta_mixed_tuned/training_history_racial_hoax.png
