In [None]:
# -*- coding: utf-8 -*-
import pandas as pd
import numpy as np
import torch
from torch.utils.data import Dataset, DataLoader
from transformers import AutoTokenizer, AutoModelForSequenceClassification, get_linear_schedule_with_warmup
from torch.optim import AdamW
from sklearn.metrics import f1_score, classification_report
import warnings
import csv
import os

warnings.filterwarnings('ignore')

def set_seed(seed=42):
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

set_seed()

LANGUAGE = 'italian'

TRAIN_PATH = '/kaggle/input/clef2025-checkthat-lab-track-01/italian/train_it.tsv'
DEV_PATH = '/kaggle/input/clef2025-checkthat-lab-track-01/italian/dev_it.tsv'
TEST_PATH = '/kaggle/input/clef2025-checkthat-lab-track-01/italian/test_it_unlabeled.tsv'
OUTPUT_DIR = './'
OUTPUT_FILENAME = f'subtask_{LANGUAGE}.tsv'
OUTPUT_PATH = os.path.join(OUTPUT_DIR, OUTPUT_FILENAME)

MODEL_NAME = 'FacebookAI/xlm-roberta-large'
print(f"Using model: {MODEL_NAME}")

BATCH_SIZE = 16
MAX_LENGTH = 128
EPOCHS = 5
FINAL_EPOCHS = 3
LEARNING_RATE = 1.8e-5
FINAL_LR_FACTOR = 0.5
WEIGHT_DECAY = 0.01
WARMUP_PROPORTION = 0.1

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Using device: {device}")

os.makedirs(OUTPUT_DIR, exist_ok=True)

print(f"Loading tokenizer for {MODEL_NAME}...")
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
print("Tokenizer loaded.")

class SubjectivityDataset(Dataset):
    def __init__(self, dataframe, tokenizer, max_length, has_labels=True):
        self.tokenizer = tokenizer
        if 'sentence' not in dataframe.columns:
            raise ValueError("DataFrame must contain a 'sentence' column.")
        dataframe = dataframe.dropna(subset=['sentence'])
        self.text = dataframe['sentence'].tolist()

        if 'sentence_id' not in dataframe.columns:
             raise ValueError("DataFrame must contain a 'sentence_id' column.")
        self.ids = dataframe['sentence_id'].astype(str).tolist()

        self.max_length = max_length
        self.has_labels = has_labels
        original_indices = dataframe.index

        if has_labels:
            if 'label' not in dataframe.columns:
                raise ValueError("DataFrame must contain a 'label' column when has_labels=True.")
            valid_label_indices = dataframe.dropna(subset=['label']).index
            dataframe = dataframe.loc[valid_label_indices]
            self.text = dataframe['sentence'].tolist()
            self.ids = dataframe['sentence_id'].astype(str).tolist()
            self.labels = [1 if str(label).strip().upper() == 'SUBJ' else 0 for label in dataframe['label'].tolist()]
            if len(self.text) != len(self.labels):
                 raise ValueError(f"Mismatch between text count ({len(self.text)}) and label count ({len(self.labels)}) after filtering.")

    def __len__(self):
        return len(self.text)

    def __getitem__(self, index):
        text = str(self.text[index])
        sentence_id = str(self.ids[index])
        encoding = self.tokenizer.encode_plus(
            text,
            add_special_tokens=True,
            max_length=self.max_length,
            padding='max_length',
            truncation=True,
            return_attention_mask=True,
            return_tensors='pt'
        )
        result = {
            'input_ids': encoding['input_ids'].flatten(),
            'attention_mask': encoding['attention_mask'].flatten(),
            'sentence_id': sentence_id
        }
        if self.has_labels:
            if index < len(self.labels):
                 result['labels'] = torch.tensor(self.labels[index], dtype=torch.long)
            else:
                raise IndexError(f"Index {index} out of bounds for labels list of length {len(self.labels)}")
        return result

def clean_text(text):
    if isinstance(text, str):
        text = text.strip()
    else:
        text = str(text).strip()
    return text

def load_and_preprocess_data(file_path, has_labels=True):
    print(f"Attempting to load data from: {file_path}")
    required_cols = ['sentence_id', 'sentence']
    if has_labels:
        required_cols.append('label')
    try:
        df = pd.read_csv(file_path, sep='\t', quoting=csv.QUOTE_NONE, on_bad_lines='warn', dtype={'sentence_id': str})
        if not all(col in df.columns for col in required_cols):
             print(f"Warning: QUOTE_NONE loaded but missing columns. Required: {required_cols}. Found: {df.columns}. Trying QUOTE_MINIMAL.")
             raise ValueError("Missing columns with QUOTE_NONE")
    except (pd.errors.ParserError, ValueError) as e:
        print(f"Info: Reading with QUOTE_NONE failed ({e}). Trying QUOTE_MINIMAL.")
        try:
             df = pd.read_csv(file_path, sep='\t', quoting=csv.QUOTE_MINIMAL, on_bad_lines='warn', dtype={'sentence_id': str})
             if not all(col in df.columns for col in required_cols):
                 raise ValueError(f"File {file_path} (QUOTE_MINIMAL) is missing required columns. Found: {df.columns}. Required: {required_cols}")
        except Exception as e_minimal:
             print(f"Error loading {file_path} even with QUOTE_MINIMAL: {e_minimal}")
             return None
    except FileNotFoundError:
        print(f"Error: File not found at {file_path}")
        return None
    except Exception as e:
        print(f"An unexpected error occurred loading {file_path}: {e}")
        return None
    print(f"Successfully loaded {file_path}. Initial shape: {df.shape}")
    if 'sentence' in df.columns:
        df['sentence'] = df['sentence'].apply(clean_text)
        df = df[df['sentence'].fillna('').astype(str).str.strip() != '']
        print(f"Shape after cleaning and removing empty sentences: {df.shape}")
    else:
        print(f"Warning: 'sentence' column not found in {file_path}. Skipping cleaning.")
    df = df.dropna(subset=['sentence_id', 'sentence'])
    if has_labels:
        df = df.dropna(subset=['label'])
        df['label'] = df['label'].astype(str).str.strip().str.upper()
        df = df[df['label'].isin(['OBJ', 'SUBJ'])]
    print(f"Shape after dropping NaNs in required columns: {df.shape}")
    return df

print("\n--- Loading and Preprocessing Data ---")
train_df = load_and_preprocess_data(TRAIN_PATH, has_labels=True)
dev_df = load_and_preprocess_data(DEV_PATH, has_labels=True)
test_df = load_and_preprocess_data(TEST_PATH, has_labels=False)
if train_df is None or dev_df is None:
    print("CRITICAL ERROR: Could not load train or dev data files. Please check paths and file integrity. Exiting.")
    exit()
if train_df.empty or dev_df.empty:
    print("CRITICAL ERROR: Train or Dev DataFrame is empty after loading/preprocessing. Cannot proceed. Exiting.")
    exit()
test_data_available = False
if test_df is not None and not test_df.empty:
    test_data_available = True
    print(f"Test data loaded successfully. Shape: {test_df.shape}")
elif test_df is not None and test_df.empty:
     print(f"Warning: Test DataFrame ({TEST_PATH}) loaded but is empty after preprocessing.")
elif TEST_PATH and not os.path.exists(TEST_PATH):
     print(f"Warning: Test file specified ({TEST_PATH}) but not found. Prediction will be skipped.")
else:
     print(f"Warning: Test file ({TEST_PATH}) could not be loaded or is None. Prediction will be skipped.")
print(f"\nTrain data shape after final checks: {train_df.shape}")
print(f"Dev data shape after final checks: {dev_df.shape}")

print("\nCombining train and dev sets for final training phase...")
combined_train_df = pd.concat([train_df, dev_df], ignore_index=True)
print(f"Combined train data shape: {combined_train_df.shape}")

print("\n--- Creating Datasets ---")
try:
    train_dataset = SubjectivityDataset(train_df, tokenizer, MAX_LENGTH, has_labels=True)
    dev_dataset = SubjectivityDataset(dev_df, tokenizer, MAX_LENGTH, has_labels=True)
    combined_dataset = SubjectivityDataset(combined_train_df, tokenizer, MAX_LENGTH, has_labels=True)
    test_dataset = SubjectivityDataset(test_df, tokenizer, MAX_LENGTH, has_labels=False) if test_data_available else None
    print("Datasets created successfully.")
except ValueError as e:
    print(f"CRITICAL ERROR creating Dataset: {e}. Exiting.")
    exit()
except Exception as e:
    print(f"CRITICAL UNEXPECTED ERROR during Dataset creation: {e}. Exiting.")
    exit()

print("\n--- Creating DataLoaders ---")
train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)
dev_loader = DataLoader(dev_dataset, batch_size=BATCH_SIZE, shuffle=False)
combined_loader = DataLoader(combined_dataset, batch_size=BATCH_SIZE, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False) if test_dataset else None
print(f"DataLoaders created. Batch size: {BATCH_SIZE}")

print(f"\n--- Loading Model: {MODEL_NAME} ---")
model = AutoModelForSequenceClassification.from_pretrained(
    MODEL_NAME,
    num_labels=2,
    id2label={0: "OBJ", 1: "SUBJ"},
    label2id={"OBJ": 0, "SUBJ": 1}
)
print("Model loaded successfully.")

model = model.to(device)

def train_model(model, dataloader, optimizer, scheduler, device, epoch_num, total_epochs):
    model.train()
    total_loss = 0
    num_batches = len(dataloader)
    for batch_idx, batch in enumerate(dataloader):
        optimizer.zero_grad()
        try:
            input_ids = batch['input_ids'].to(device)
            attention_mask = batch['attention_mask'].to(device)
            labels = batch['labels'].to(device)
        except KeyError as e:
            print(f"ERROR: Missing key in train batch {batch_idx+1}/{num_batches}: {e}. Skipping batch.")
            continue
        except Exception as e:
            print(f"ERROR moving train batch {batch_idx+1}/{num_batches} to device: {e}. Skipping batch.")
            continue
        try:
            outputs = model(
                input_ids=input_ids,
                attention_mask=attention_mask,
                labels=labels
            )
            loss = outputs.loss
            if loss is None:
                 print(f"Warning: Loss is None for train batch {batch_idx+1}/{num_batches}. Skipping backward pass.")
                 continue
            total_loss += loss.item()
            loss.backward()
            torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)
            optimizer.step()
            scheduler.step()
            if (batch_idx + 1) % 50 == 0 or batch_idx == num_batches - 1:
                print(f"  Epoch {epoch_num}/{total_epochs} - Batch {batch_idx+1}/{num_batches} - Current Avg Loss: {total_loss / (batch_idx + 1):.4f}")
        except Exception as e:
             print(f"ERROR during train forward/backward pass for batch {batch_idx+1}/{num_batches}: {e}. Skipping batch.")
             continue
    avg_loss = total_loss / num_batches if num_batches > 0 else 0
    return avg_loss

def evaluate_model(model, dataloader, device):
    model.eval()
    predictions = []
    actual_labels = []
    all_sentence_ids = []
    has_labels_in_data = True
    num_batches = len(dataloader)
    with torch.no_grad():
        for batch_idx, batch in enumerate(dataloader):
            try:
                input_ids = batch['input_ids'].to(device)
                attention_mask = batch['attention_mask'].to(device)
                batch_sentence_ids = batch['sentence_id']
                if 'labels' not in batch:
                    has_labels_in_data = False
                else:
                     labels = batch['labels'].to(device)
            except KeyError as e:
                print(f"ERROR: Missing key in eval batch {batch_idx+1}/{num_batches}: {e}. Skipping batch.")
                continue
            except Exception as e:
                 print(f"ERROR moving eval batch {batch_idx+1}/{num_batches} to device: {e}. Skipping batch.")
                 continue
            try:
                outputs = model(
                    input_ids=input_ids,
                    attention_mask=attention_mask
                )
                logits = outputs.logits
                preds = torch.argmax(logits, dim=1)
                predictions.extend(preds.cpu().tolist())
                all_sentence_ids.extend(batch_sentence_ids)
                if has_labels_in_data and 'labels' in batch:
                    actual_labels.extend(labels.cpu().tolist())
            except Exception as e:
                 print(f"ERROR during evaluation forward pass for batch {batch_idx+1}/{num_batches}: {e}. Skipping batch.")
                 continue
    if has_labels_in_data and actual_labels:
        if len(predictions) != len(actual_labels):
             print(f"Warning: Mismatch in prediction ({len(predictions)}) and label ({len(actual_labels)}) counts during evaluation. Cannot calculate F1 reliably.")
             return 0.0
        if not actual_labels or not predictions:
             print("Warning: No actual labels or predictions collected during evaluation. Cannot calculate F1.")
             return 0.0
        f1 = f1_score(actual_labels, predictions, average='macro', zero_division=0)
        try:
            unique_labels = np.unique(actual_labels + predictions)
            target_names = [model.config.id2label[label] for label in sorted(unique_labels)]
            if len(target_names) < 2:
                 print(f"Warning: Only one class ({target_names[0]}) present in labels/predictions. Full report not possible.")
                 print(f"Macro F1: {f1:.4f}")
            else:
                report = classification_report(actual_labels, predictions, target_names=['OBJ', 'SUBJ'], labels=[0, 1], zero_division=0)
                print("\n--- Evaluation Report ---")
                print(report)
                print(f"Macro F1 Score: {f1:.4f}")
                print("-------------------------\n")
        except Exception as e:
            print(f"Could not generate classification report: {e}")
            print(f"Actual labels unique: {np.unique(actual_labels)}")
            print(f"Predictions unique: {np.unique(predictions)}")
            print(f"Macro F1 Score: {f1:.4f}")
        return f1
    else:
         print("No labels found in evaluation data. Returning sentence IDs and predictions.")
         label_map = {0: 'OBJ', 1: 'SUBJ'}
         label_predictions = [label_map[pred] for pred in predictions]
         if len(all_sentence_ids) != len(label_predictions):
              print(f"Warning: Mismatch in sentence ID count ({len(all_sentence_ids)}) and prediction count ({len(label_predictions)}) during label-less evaluation.")
         return all_sentence_ids, label_predictions

def predict(model, dataloader, device):
    if dataloader is None:
        print("Test dataloader is None. Skipping prediction.")
        return [], []
    model.eval()
    predictions = []
    sentence_ids = []
    num_batches = len(dataloader)
    print(f"Starting prediction on {num_batches} batches...")
    with torch.no_grad():
        for batch_idx, batch in enumerate(dataloader):
            try:
                input_ids = batch['input_ids'].to(device)
                attention_mask = batch['attention_mask'].to(device)
                batch_sentence_ids = batch['sentence_id']
            except KeyError as e:
                print(f"ERROR: Missing key in prediction batch {batch_idx+1}/{num_batches}: {e}. Skipping batch.")
                continue
            except Exception as e:
                print(f"ERROR processing prediction batch {batch_idx+1}/{num_batches}: {e}. Skipping batch.")
                continue
            try:
                outputs = model(
                    input_ids=input_ids,
                    attention_mask=attention_mask
                )
                logits = outputs.logits
                preds = torch.argmax(logits, dim=1)
                predictions.extend(preds.cpu().tolist())
                sentence_ids.extend(batch_sentence_ids)
                if (batch_idx + 1) % 50 == 0 or batch_idx == num_batches - 1:
                     print(f"  Predicted batch {batch_idx+1}/{num_batches}")
            except Exception as e:
                 print(f"ERROR during prediction forward pass for batch {batch_idx+1}/{num_batches}: {e}. Skipping batch.")
                 continue
    label_map = {0: 'OBJ', 1: 'SUBJ'}
    label_predictions = [label_map[pred] for pred in predictions]
    if len(sentence_ids) != len(label_predictions):
        print(f"CRITICAL WARNING: Mismatch in sentence ID count ({len(sentence_ids)}) and prediction count ({len(label_predictions)}). Submission file might be incorrect!")
    else:
        print(f"Prediction complete. Generated {len(label_predictions)} predictions.")
    return sentence_ids, label_predictions

print("\n--- Initial Training Phase ---")
if not train_loader or not dev_loader:
     print("CRITICAL ERROR: Train or Dev loader is empty or None. Cannot start training. Exiting.")
     exit()
optimizer = AdamW(model.parameters(), lr=LEARNING_RATE, weight_decay=WEIGHT_DECAY)
num_train_batches = len(train_loader)
if num_train_batches == 0:
    print("CRITICAL ERROR: Training loader has zero batches. Cannot train. Exiting.")
    exit()
total_steps = num_train_batches * EPOCHS
warmup_steps = int(total_steps * WARMUP_PROPORTION)
scheduler = get_linear_schedule_with_warmup(
    optimizer,
    num_warmup_steps=warmup_steps,
    num_training_steps=total_steps
)
print(f"Optimizer: AdamW, LR: {LEARNING_RATE}, Weight Decay: {WEIGHT_DECAY}")
print(f"Scheduler: Linear Warmup, Total Steps: {total_steps}, Warmup Steps: {warmup_steps}")
best_f1 = 0.0
best_model_state = None
for epoch in range(EPOCHS):
    print(f"\n--- Epoch {epoch + 1}/{EPOCHS} ---")
    print("Training...")
    train_loss = train_model(model, train_loader, optimizer, scheduler, device, epoch + 1, EPOCHS)
    print(f"Epoch {epoch + 1} Average Training Loss: {train_loss:.4f}")
    print("\nEvaluating on Dev Set...")
    eval_result = evaluate_model(model, dev_loader, device)
    if isinstance(eval_result, float):
        dev_f1 = eval_result
        if dev_f1 > best_f1:
            best_f1 = dev_f1
            best_model_state = {k: v.cpu() for k, v in model.state_dict().items()}
            print(f"*** New best F1 found: {best_f1:.4f}. Model state saved. ***")
        else:
            print(f"Dev F1 ({dev_f1:.4f}) did not improve from best ({best_f1:.4f}).")
    else:
         print("Warning: Evaluation on dev set did not return an F1 score. Cannot determine best model based on F1.")
         if best_model_state is None:
              print("Saving current model state as the first available state.")
              best_model_state = {k: v.cpu() for k, v in model.state_dict().items()}
print(f"\nBest validation F1 achieved during initial training: {best_f1:.4f}")

if best_model_state:
    print("\n--- Final Training on Combined Data (Train + Dev) ---")
    print("Loading best model state from initial training...")
    model.load_state_dict(best_model_state)
    model.to(device)
    print("Best model loaded successfully.")
    final_lr = LEARNING_RATE * FINAL_LR_FACTOR
    optimizer_final = AdamW(model.parameters(), lr=final_lr, weight_decay=WEIGHT_DECAY)
    num_combined_batches = len(combined_loader)
    if num_combined_batches == 0:
        print("Warning: Combined loader is empty. Skipping final training.")
    else:
        total_steps_final = num_combined_batches * FINAL_EPOCHS
        warmup_steps_final = int(total_steps_final * WARMUP_PROPORTION)
        scheduler_final = get_linear_schedule_with_warmup(
            optimizer_final,
            num_warmup_steps=warmup_steps_final,
            num_training_steps=total_steps_final
        )
        print(f"Final Training Setup: Optimizer: AdamW, LR: {final_lr}, Weight Decay: {WEIGHT_DECAY}")
        print(f"Scheduler: Linear Warmup, Total Steps: {total_steps_final}, Warmup Steps: {warmup_steps_final}")
        print(f"Training for {FINAL_EPOCHS} epochs...")
        for epoch in range(FINAL_EPOCHS):
            print(f"\n--- Final Training Epoch {epoch + 1}/{FINAL_EPOCHS} ---")
            train_loss = train_model(model, combined_loader, optimizer_final, scheduler_final, device, epoch + 1, FINAL_EPOCHS)
            print(f"Final Epoch {epoch + 1} Average Training Loss: {train_loss:.4f}")
        print("Final training phase completed.")
else:
    print("\nWarning: No best model state was saved during initial training.")
    print("Proceeding to prediction with the model state after the last initial epoch.")
    model.to(device)

print("\n--- Generating Predictions on Test Set ---")
if test_loader:
    sentence_ids, predictions = predict(model, test_loader, device)
    if sentence_ids and predictions:
        print(f"\n--- Creating Submission File: {OUTPUT_PATH} ---")
        if len(sentence_ids) == len(predictions):
            submission_df = pd.DataFrame({
                'sentence_id': sentence_ids,
                'label': predictions
            })
            submission_df = submission_df[['sentence_id', 'label']]
            try:
                 submission_df.to_csv(OUTPUT_PATH, sep='\t', index=False, quoting=csv.QUOTE_MINIMAL, lineterminator='\n')
                 print(f"Predictions successfully saved to {OUTPUT_PATH}")
                 print("\nSubmission File Preview (first 5 rows):")
                 print(submission_df.head().to_string())
                 print(f"\nIMPORTANT: Remember to ZIP the '{OUTPUT_FILENAME}' file into '{os.path.splitext(OUTPUT_FILENAME)[0]}.zip' for submission to CodaLab.")
            except Exception as e:
                 print(f"ERROR saving submission file to {OUTPUT_PATH}: {e}")
        else:
             print("CRITICAL ERROR: Mismatch between number of sentence IDs and predictions. Cannot create valid submission file.")
    elif not sentence_ids and not predictions and test_dataset is not None:
         print("Warning: Prediction resulted in empty lists, possibly due to errors during the prediction loop or an empty test set after filtering.")
    else:
        print("Warning: No predictions were generated. Submission file not created. (Test data might be empty, errors occurred, or IDs/Predictions mismatch).")
else:
     print("\nTest loader was not created (Test data likely missing, empty, or failed to load). No prediction performed and no submission file generated.")

print("\n--- Script Finished ---")

Using model: FacebookAI/xlm-roberta-large
Using device: cuda
Loading tokenizer for FacebookAI/xlm-roberta-large...


tokenizer_config.json:   0%|          | 0.00/25.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/616 [00:00<?, ?B/s]

sentencepiece.bpe.model:   0%|          | 0.00/5.07M [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/9.10M [00:00<?, ?B/s]

Tokenizer loaded.

--- Loading and Preprocessing Data ---
Attempting to load data from: /kaggle/input/clef2025-checkthat-lab-track-01/italian/train_it.tsv
Successfully loaded /kaggle/input/clef2025-checkthat-lab-track-01/italian/train_it.tsv. Initial shape: (1613, 3)
Shape after cleaning and removing empty sentences: (1613, 3)
Shape after dropping NaNs in required columns: (1613, 3)
Attempting to load data from: /kaggle/input/clef2025-checkthat-lab-track-01/italian/dev_it.tsv
Successfully loaded /kaggle/input/clef2025-checkthat-lab-track-01/italian/dev_it.tsv. Initial shape: (667, 3)
Shape after cleaning and removing empty sentences: (667, 3)
Shape after dropping NaNs in required columns: (667, 3)
Attempting to load data from: /kaggle/input/clef2025-checkthat-lab-track-01/italian/test_it_unlabeled.tsv
Successfully loaded /kaggle/input/clef2025-checkthat-lab-track-01/italian/test_it_unlabeled.tsv. Initial shape: (299, 2)
Shape after cleaning and removing empty sentences: (299, 2)
Shape 

2025-05-01 11:21:03.703628: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1746098463.892237      19 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1746098463.946486      19 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`


model.safetensors:   0%|          | 0.00/2.24G [00:00<?, ?B/s]

Some weights of XLMRobertaForSequenceClassification were not initialized from the model checkpoint at FacebookAI/xlm-roberta-large and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Model loaded successfully.

--- Initial Training Phase ---
Optimizer: AdamW, LR: 1.8e-05, Weight Decay: 0.01
Scheduler: Linear Warmup, Total Steps: 505, Warmup Steps: 50

--- Epoch 1/5 ---
Training...
  Epoch 1/5 - Batch 50/101 - Current Avg Loss: 0.6193
  Epoch 1/5 - Batch 100/101 - Current Avg Loss: 0.5904
  Epoch 1/5 - Batch 101/101 - Current Avg Loss: 0.5882
Epoch 1 Average Training Loss: 0.5882

Evaluating on Dev Set...

--- Evaluation Report ---
              precision    recall  f1-score   support

         OBJ       0.73      1.00      0.85       490
        SUBJ       0.00      0.00      0.00       177

    accuracy                           0.73       667
   macro avg       0.37      0.50      0.42       667
weighted avg       0.54      0.73      0.62       667

Macro F1 Score: 0.4235
-------------------------

*** New best F1 found: 0.4235. Model state saved. ***

--- Epoch 2/5 ---
Training...
  Epoch 2/5 - Batch 50/101 - Current Avg Loss: 0.5486
  Epoch 2/5 - Batch 100/101 