In [None]:
import pandas as pd
import numpy as np
import torch
from torch.utils.data import Dataset, DataLoader
from transformers import AutoTokenizer, AutoModelForSequenceClassification, get_linear_schedule_with_warmup
from torch.optim import AdamW
from sklearn.metrics import f1_score, classification_report
import warnings
import csv
import os

warnings.filterwarnings('ignore')

AR_TRAIN_PATH = "/kaggle/input/clef2025-checkthat-lab-track-01/arabic/train_ar.tsv"
AR_DEV_PATH = "/kaggle/input/clef2025-checkthat-lab-track-01/arabic/dev_ar.tsv"
BG_TRAIN_PATH = "/kaggle/input/clef2025-checkthat-lab-track-01/bulgarian/train_bg.tsv"
BG_DEV_PATH = "/kaggle/input/clef2025-checkthat-lab-track-01/bulgarian/dev_bg.tsv"
EN_TRAIN_PATH = "/kaggle/input/clef2025-checkthat-lab-track-01/english/train_en.tsv"
EN_DEV_PATH = "/kaggle/input/clef2025-checkthat-lab-track-01/english/dev_en.tsv"
DE_TRAIN_PATH = "/kaggle/input/clef2025-checkthat-lab-track-01/german/train_de.tsv"
DE_DEV_PATH = "/kaggle/input/clef2025-checkthat-lab-track-01/german/dev_de.tsv"
IT_TRAIN_PATH = "/kaggle/input/clef2025-checkthat-lab-track-01/italian/train_it.tsv"
IT_DEV_PATH = "/kaggle/input/clef2025-checkthat-lab-track-01/italian/dev_it.tsv"

MULTILINGUAL_TEST_PATH = "/kaggle/input/clef2025-checkthat-lab-track-01/multilingual/test_multilingual_unlabeled.tsv"
OUTPUT_PATH = "subtask_multilingual.tsv"
MODEL_NAME = 'microsoft/infoxlm-large'
BATCH_SIZE = 16
MAX_LENGTH = 128
EPOCHS = 5
FINAL_EPOCHS = 3
LEARNING_RATE = 1.5e-5
WEIGHT_DECAY = 0.01
WARMUP_PROPORTION = 0.1
SEED = 42

def set_seed(seed=SEED):
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

set_seed()
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Using device: {device}")
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
print("Tokenizer loaded.")

class SubjectivityDataset(Dataset):
    def __init__(self, dataframe, tokenizer, max_length, has_labels=True):
        self.tokenizer = tokenizer
        if 'sentence' not in dataframe.columns:
            raise ValueError("DataFrame must contain a 'sentence' column.")
        dataframe = dataframe.dropna(subset=['sentence'])
        self.text = dataframe['sentence'].tolist()
        if 'sentence_id' not in dataframe.columns:
             raise ValueError("DataFrame must contain a 'sentence_id' column.")
        self.ids = dataframe['sentence_id'].tolist()
        self.max_length = max_length
        self.has_labels = has_labels
        if has_labels:
            if 'label' not in dataframe.columns:
                raise ValueError("DataFrame must contain a 'label' column when has_labels=True.")
            valid_label_indices = dataframe.dropna(subset=['label']).index
            dataframe = dataframe.loc[valid_label_indices]
            self.text = dataframe['sentence'].tolist()
            self.ids = dataframe['sentence_id'].tolist()
            self.labels = [1 if str(label).upper() == 'SUBJ' else 0 for label in dataframe['label'].tolist()]
        else:
             self.ids = dataframe['sentence_id'].tolist()
             self.text = dataframe['sentence'].tolist()
    def __len__(self):
        return len(self.text)
    def __getitem__(self, index):
        text = str(self.text[index])
        sentence_id = self.ids[index]
        encoding = self.tokenizer.encode_plus(
            text,
            add_special_tokens=True,
            max_length=self.max_length,
            padding='max_length',
            truncation=True,
            return_attention_mask=True,
            return_tensors='pt'
        )
        result = {
            'input_ids': encoding['input_ids'].flatten(),
            'attention_mask': encoding['attention_mask'].flatten(),
            'sentence_id': str(sentence_id)
        }
        if self.has_labels:
            if index < len(self.labels):
                 result['labels'] = torch.tensor(self.labels[index], dtype=torch.long)
            else:
                raise IndexError(f"Index {index} out of bounds for labels list of length {len(self.labels)}")
        return result

def clean_text(text):
    if isinstance(text, str):
        text = text.strip()
    else:
        text = str(text)
    return text

def load_and_preprocess_data(file_path, has_labels=True, language_name="N/A"):
    print(f"Loading {language_name} data from: {file_path}")
    if not os.path.exists(file_path):
        print(f"Error: File not found at {file_path}")
        return None
    try:
        df = pd.read_csv(file_path, sep='\t', quoting=csv.QUOTE_NONE, on_bad_lines='warn', dtype={'sentence_id': str})
        print(f"  Initial rows loaded: {len(df)}")
        required_cols = ['sentence_id', 'sentence']
        if has_labels: required_cols.append('label')
        if not all(col in df.columns for col in required_cols) or len(df.columns) < len(required_cols):
             print(f"  Warning: Columns might be misparsed with QUOTE_NONE (found: {df.columns}). Trying QUOTE_MINIMAL.")
             raise pd.errors.ParserError("Potential column parsing issue with QUOTE_NONE")
    except (pd.errors.ParserError) as e:
        print(f"  Error/Warning loading {file_path} with QUOTE_NONE: {e}. Trying QUOTE_MINIMAL.")
        try:
             df = pd.read_csv(file_path, sep='\t', quoting=csv.QUOTE_MINIMAL, on_bad_lines='warn', dtype={'sentence_id': str})
             print(f"  Initial rows loaded (QUOTE_MINIMAL): {len(df)}")
             required_cols = ['sentence_id', 'sentence']
             if has_labels: required_cols.append('label')
             is_english = 'english' in language_name.lower()
             has_extra_col = 'solved_conflict' in df.columns
             cols_ok_for_english = is_english and has_extra_col and all(col in df.columns for col in ['sentence_id', 'sentence', 'label'])
             if not all(col in df.columns for col in required_cols) and not cols_ok_for_english:
                 raise ValueError(f"File {file_path} (QUOTE_MINIMAL) is missing required columns. Found: {df.columns}. Required: {required_cols}")
        except Exception as e_minimal:
             print(f"  Error loading {file_path} even with QUOTE_MINIMAL: {e_minimal}")
             return None
    except Exception as e:
        print(f"  An unexpected error occurred loading {file_path}: {e}")
        return None
    if 'sentence' in df.columns:
        df['sentence'] = df['sentence'].apply(clean_text)
        df = df[df['sentence'].fillna('').astype(str).str.strip() != '']
        print(f"  Rows after cleaning 'sentence': {len(df)}")
    else:
        print(f"  Warning: 'sentence' column not found in {file_path}. Skipping cleaning.")
    essential_cols = ['sentence_id', 'sentence']
    if has_labels: essential_cols.append('label')
    initial_rows_before_na_drop = len(df)
    df = df.dropna(subset=essential_cols)
    print(f"  Rows after dropping NAs in essential columns: {len(df)} (dropped {initial_rows_before_na_drop - len(df)})")
    if not has_labels and len(df.columns) > 0 and df.columns[0].startswith('Unnamed: '):
        print(f"  Detected potential index column '{df.columns[0]}'. Removing it.")
        df = df.iloc[:, 1:]
    df = df[df['sentence_id'].fillna('').astype(str).str.strip() != '']
    print(f"  Rows after ensuring non-empty 'sentence_id': {len(df)}")
    final_cols = ['sentence_id', 'sentence']
    if has_labels:
        final_cols.append('label')
    missing_final_cols = [col for col in final_cols if col not in df.columns]
    if missing_final_cols:
        if not (has_labels and 'label' in missing_final_cols and 'english' in language_name.lower()) and \
           not (not has_labels and all(c in df.columns for c in ['sentence_id', 'sentence'])):
             print(f"  Error: Final required columns missing after processing: {missing_final_cols}. Columns found: {df.columns}")
             return None
    df = df[[col for col in final_cols if col in df.columns]]
    return df

all_train_dfs = []
all_dev_dfs = []

train_paths = {
    "Arabic": AR_TRAIN_PATH, "Bulgarian": BG_TRAIN_PATH, "English": EN_TRAIN_PATH,
    "German": DE_TRAIN_PATH, "Italian": IT_TRAIN_PATH
}
dev_paths = {
    "Arabic": AR_DEV_PATH, "Bulgarian": BG_DEV_PATH, "English": EN_DEV_PATH,
    "German": DE_DEV_PATH, "Italian": IT_DEV_PATH
}

print("\n--- Loading Multilingual Training Data (Explicit Paths) ---")
for lang_name, path in train_paths.items():
    df = load_and_preprocess_data(path, has_labels=True, language_name=lang_name)
    if df is not None and not df.empty:
        all_train_dfs.append(df)
    else:
        print(f"Warning: Could not load or process train data for {lang_name} from {path}.")

print("\n--- Loading Multilingual Development Data (Explicit Paths) ---")
for lang_name, path in dev_paths.items():
    df = load_and_preprocess_data(path, has_labels=True, language_name=lang_name)
    if df is not None and not df.empty:
        all_dev_dfs.append(df)
    else:
        print(f"Warning: Could not load or process dev data for {lang_name} from {path}.")

if not all_train_dfs or not all_dev_dfs:
    print("Error: No valid training or development data loaded from the specified paths. Exiting.")
    exit()

all_train_df = pd.concat(all_train_dfs, ignore_index=True)
all_dev_df = pd.concat(all_dev_dfs, ignore_index=True)
print(f"\nTotal combined train data shape: {all_train_df.shape}")
print(f"Total combined dev data shape: {all_dev_df.shape}")

print("\n--- Loading Multilingual Test Data (Explicit Path) ---")
test_df = load_and_preprocess_data(MULTILINGUAL_TEST_PATH, has_labels=False, language_name="Multilingual Test")

if test_df is None:
    print(f"Warning: Multilingual test file {MULTILINGUAL_TEST_PATH} could not be loaded or processed. Prediction will be skipped.")
elif test_df.empty:
    print(f"Warning: Multilingual test DataFrame ({MULTILINGUAL_TEST_PATH}) is empty after loading/preprocessing.")
else:
    print(f"Test data shape after preprocessing: {test_df.shape}")
    if not all(col in test_df.columns for col in ['sentence_id', 'sentence']):
        print(f"Error: Test DataFrame from {MULTILINGUAL_TEST_PATH} is missing 'sentence_id' or 'sentence' column. Found columns: {test_df.columns}. Prediction will fail.")
        test_df = None

combined_train_df = pd.concat([all_train_df, all_dev_df], ignore_index=True)
print(f"\nCombined train+dev data shape for final training: {combined_train_df.shape}")

try:
    print("\nCreating Train Dataset...")
    train_dataset = SubjectivityDataset(all_train_df, tokenizer, MAX_LENGTH)
    print(f"  Train dataset size: {len(train_dataset)}")
    print("Creating Dev Dataset...")
    dev_dataset = SubjectivityDataset(all_dev_df, tokenizer, MAX_LENGTH)
    print(f"  Dev dataset size: {len(dev_dataset)}")
    print("Creating Combined Dataset (Train+Dev)...")
    combined_dataset = SubjectivityDataset(combined_train_df, tokenizer, MAX_LENGTH)
    print(f"  Combined dataset size: {len(combined_dataset)}")
    test_dataset = None
    if test_df is not None and not test_df.empty:
        print("Creating Test Dataset...")
        test_dataset = SubjectivityDataset(test_df, tokenizer, MAX_LENGTH, has_labels=False)
        print(f"  Test dataset size: {len(test_dataset)}")
    else:
        print("Skipping Test Dataset creation (data not available, empty, or failed column check).")
except ValueError as e:
    print(f"Error creating Dataset: {e}")
    exit()
except Exception as e:
    print(f"An unexpected error occurred during Dataset creation: {e}")
    exit()

train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)
dev_loader = DataLoader(dev_dataset, batch_size=BATCH_SIZE)
combined_loader = DataLoader(combined_dataset, batch_size=BATCH_SIZE, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=BATCH_SIZE) if test_dataset else None
print("DataLoaders created.")

print(f"\nLoading model: {MODEL_NAME}")
model = AutoModelForSequenceClassification.from_pretrained(
    MODEL_NAME,
    num_labels=2
)
print("Model loaded.")
model = model.to(device)

def train_model(model, dataloader, optimizer, scheduler, device):
    model.train()
    total_loss = 0
    batch_count = 0
    print_every = max(1, len(dataloader) // 10)
    for batch_idx, batch in enumerate(dataloader):
        optimizer.zero_grad()
        try:
            input_ids = batch['input_ids'].to(device)
            attention_mask = batch['attention_mask'].to(device)
            labels = batch['labels'].to(device)
        except KeyError as e:
            print(f"Error: Missing key in train batch {batch_idx}: {e}")
            continue
        except Exception as e:
            print(f"Error moving train batch {batch_idx} to device: {e}")
            continue
        try:
            outputs = model(input_ids=input_ids, attention_mask=attention_mask, labels=labels)
            loss = outputs.loss
            if loss is None:
                 print(f"Warning: Loss is None for train batch {batch_idx}. Skipping backward pass.")
                 continue
            total_loss += loss.item()
            batch_count += 1
            loss.backward()
            torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)
            optimizer.step()
            scheduler.step()
        except Exception as e:
             print(f"Error during train forward/backward pass for batch {batch_idx}: {e}")
             if "CUDA out of memory" in str(e):
                 print("CUDA OOM Error detected. Try reducing BATCH_SIZE.")
             continue
    avg_loss = total_loss / batch_count if batch_count > 0 else 0
    return avg_loss

def evaluate_model(model, dataloader, device):
    model.eval()
    predictions = []
    actual_labels = []
    all_sentence_ids = []
    with torch.no_grad():
        for batch_idx, batch in enumerate(dataloader):
            try:
                input_ids = batch['input_ids'].to(device)
                attention_mask = batch['attention_mask'].to(device)
                has_labels_in_batch = 'labels' in batch
                if has_labels_in_batch:
                    labels = batch['labels'].to(device)
            except KeyError as e:
                print(f"Error: Missing key in eval batch {batch_idx}: {e}")
                continue
            except Exception as e:
                 print(f"Error moving eval batch {batch_idx} to device: {e}")
                 if "CUDA out of memory" in str(e): print("CUDA OOM Error during evaluation.")
                 continue
            try:
                outputs = model(input_ids=input_ids, attention_mask=attention_mask)
                logits = outputs.logits
                _, preds = torch.max(logits, dim=1)
                predictions.extend(preds.cpu().tolist())
                if has_labels_in_batch:
                    actual_labels.extend(labels.cpu().tolist())
            except Exception as e:
                 print(f"Error during evaluation forward pass for batch {batch_idx}: {e}")
                 if "CUDA out of memory" in str(e): print("CUDA OOM Error during evaluation inference.")
                 continue
    if actual_labels:
        f1 = 0.0
        report = "No report generated (mismatch or error)."
        if len(predictions) == len(actual_labels) and len(actual_labels) > 0:
             f1 = f1_score(actual_labels, predictions, average='macro', zero_division=0)
             try:
                 report = classification_report(actual_labels, predictions, target_names=['OBJ', 'SUBJ'], zero_division=0)
                 print("Evaluation Report:")
                 print(report)
             except ValueError as e:
                 print(f"Could not generate classification report: {e}")
                 print(f"Actual labels unique counts: {np.unique(actual_labels, return_counts=True)}")
                 print(f"Predictions unique counts: {np.unique(predictions, return_counts=True)}")
        elif len(predictions) != len(actual_labels):
             print(f"Warning: Mismatch in prediction ({len(predictions)}) and label ({len(actual_labels)}) counts during evaluation.")
             min_len = min(len(predictions), len(actual_labels))
             if min_len > 0:
                 f1 = f1_score(actual_labels[:min_len], predictions[:min_len], average='macro', zero_division=0)
                 print(f"Calculated F1 on minimum aligned length ({min_len}): {f1:.4f}")
             else:
                 f1 = 0.0
        else:
            print("Warning: No predictions or labels found during evaluation.")
            f1 = 0.0
        return f1
    else:
         print("No labels found in evaluation data. Cannot calculate F1.")
         return 0.0

def predict(model, dataloader, device):
    if dataloader is None:
        print("Test dataloader is None. Skipping prediction.")
        return [], []
    model.eval()
    predictions = []
    sentence_ids = []
    print("Starting prediction loop...")
    print_every = max(1, len(dataloader) // 10)
    with torch.no_grad():
        for batch_idx, batch in enumerate(dataloader):
            try:
                input_ids = batch['input_ids'].to(device)
                attention_mask = batch['attention_mask'].to(device)
                batch_sentence_ids = batch['sentence_id']
            except KeyError as e:
                print(f"Error: Missing key in prediction batch {batch_idx}: {e}")
                continue
            except Exception as e:
                print(f"Error processing prediction batch {batch_idx}: {e}")
                if "CUDA out of memory" in str(e): print("CUDA OOM Error during prediction.")
                continue
            try:
                outputs = model(input_ids=input_ids, attention_mask=attention_mask)
                logits = outputs.logits
                _, preds = torch.max(logits, dim=1)
                predictions.extend(preds.cpu().tolist())
                sentence_ids.extend(batch_sentence_ids)
            except Exception as e:
                 print(f"Error during prediction forward pass for batch {batch_idx}: {e}")
                 if "CUDA out of memory" in str(e): print("CUDA OOM Error during prediction inference.")
                 continue
    label_predictions = ['SUBJ' if pred == 1 else 'OBJ' for pred in predictions]
    print(f"Prediction loop finished. Generated {len(label_predictions)} predictions for {len(sentence_ids)} IDs.")
    if len(sentence_ids) != len(label_predictions):
        print(f"CRITICAL WARNING: Mismatch in sentence ID count ({len(sentence_ids)}) and prediction count ({len(label_predictions)}). Submission file might be incorrect.")
        min_len = min(len(sentence_ids), len(label_predictions))
        print(f"Returning aligned results up to length {min_len}")
        return sentence_ids[:min_len], label_predictions[:min_len]
    return sentence_ids, label_predictions

optimizer = AdamW(model.parameters(), lr=LEARNING_RATE, weight_decay=WEIGHT_DECAY)

if len(train_loader) == 0:
    print("Error: Combined training loader has zero batches. Cannot train.")
    exit()

total_steps = len(train_loader) * EPOCHS
warmup_steps = int(total_steps * WARMUP_PROPORTION)

scheduler = get_linear_schedule_with_warmup(
    optimizer,
    num_warmup_steps=warmup_steps,
    num_training_steps=total_steps
)
print(f"\nOptimizer and Scheduler configured for initial training ({total_steps} total steps, {warmup_steps} warmup steps).")

best_f1 = 0.0
best_model_state = None

print("\n--- Starting Initial Training Phase ---")
if len(train_loader) == 0 or len(dev_loader) == 0:
     print("Error: Train or Dev loader is empty. Cannot proceed with training.")
     exit()

for epoch in range(EPOCHS):
    print(f"\nEpoch {epoch + 1}/{EPOCHS}")
    print("Training...")
    train_loss = train_model(model, train_loader, optimizer, scheduler, device)
    print(f"Training loss: {train_loss:.4f}")
    print("Evaluating on combined dev set...")
    dev_f1 = evaluate_model(model, dev_loader, device)
    if dev_f1 is not None:
        print(f"Combined Dev F1 Score: {dev_f1:.4f}")
        if dev_f1 > best_f1:
            best_f1 = dev_f1
            best_model_state = {k: v.cpu() for k, v in model.state_dict().items()}
            print(f"*** New best F1: {best_f1:.4f}. Model state saved. ***")
        else:
            print(f"F1 did not improve from {best_f1:.4f}")
    else:
         print("Evaluation on dev set did not return a valid F1 score. Cannot determine best model based on F1.")
         pass

print(f"\n--- Initial Training Finished ---")
print(f"Best combined validation F1 achieved: {best_f1:.4f}")

if best_model_state:
    print("Loading best model state for final training...")
    model.cpu()
    model.load_state_dict(best_model_state)
    model.to(device)
    print("Best model loaded successfully.")
else:
    print("Warning: No best model state was saved (perhaps F1 never improved or dev set issues). Proceeding with the final model state from initial training.")

print("\n--- Starting Final Training on Combined Data (train + dev) ---")
if len(combined_loader) == 0:
    print("Error: Combined train+dev loader is empty. Skipping final training.")
else:
    final_lr = LEARNING_RATE / 2
    print(f"Using final learning rate: {final_lr}")
    optimizer_final = AdamW(model.parameters(), lr=final_lr, weight_decay=WEIGHT_DECAY)
    total_steps_final = len(combined_loader) * FINAL_EPOCHS
    warmup_steps_final = int(total_steps_final * WARMUP_PROPORTION)
    scheduler_final = get_linear_schedule_with_warmup(
        optimizer_final,
        num_warmup_steps=warmup_steps_final,
        num_training_steps=total_steps_final
    )
    print(f"Optimizer and Scheduler reconfigured for final training ({total_steps_final} total steps, {warmup_steps_final} warmup steps).")
    for epoch in range(FINAL_EPOCHS):
        print(f"\nFinal training - Epoch {epoch + 1}/{FINAL_EPOCHS}")
        train_loss = train_model(model, combined_loader, optimizer_final, scheduler_final, device)
        print(f"Training loss: {train_loss:.4f}")

print("\n--- Final Training Finished ---")

print(f"\n--- Generating Predictions on Multilingual Test Set ({MULTILINGUAL_TEST_PATH}) ---")
if test_loader:
    sentence_ids, predictions = predict(model, test_loader, device)
    if sentence_ids and predictions:
        if len(sentence_ids) == len(predictions):
            print(f"Successfully generated {len(predictions)} predictions.")
            submission_df = pd.DataFrame({
                'sentence_id': sentence_ids,
                'label': predictions
            })
            try:
                 print(f"Saving predictions to: {OUTPUT_PATH}")
                 submission_df[['sentence_id', 'label']].to_csv(
                     OUTPUT_PATH,
                     sep='\t',
                     index=False,
                     quoting=csv.QUOTE_MINIMAL,
                     header=True
                 )
                 print(f"Predictions successfully saved to {OUTPUT_PATH}")
                 print("\nSubmission File Head:")
                 print(submission_df.head())
                 print(f"\nReminder: Zip this file into '{OUTPUT_PATH.replace('.tsv', '.zip')}' for submission.")
            except Exception as e:
                 print(f"Error saving submission file to {OUTPUT_PATH}: {e}")
        else:
             print(f"Error: Mismatch between sentence IDs ({len(sentence_ids)}) and predictions ({len(predictions)}) after predict function returned. Submission file not generated.")
    elif test_dataset is not None:
         print("Prediction resulted in empty lists, possibly due to errors during prediction loop or issues with test data processing.")
    else:
        print("No predictions were generated because test data could not be loaded or processed correctly. No submission file generated.")
else:
     print(f"Test loader was not created (Test data likely missing from {MULTILINGUAL_TEST_PATH}, empty, or failed loading checks). No submission file generated.")

print("\n--- Script Finished ---")

Using device: cuda
Loading tokenizer: microsoft/infoxlm-large


config.json:   0%|          | 0.00/513 [00:00<?, ?B/s]

sentencepiece.bpe.model:   0%|          | 0.00/5.07M [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/9.10M [00:00<?, ?B/s]

Tokenizer loaded.

--- Loading Multilingual Training Data (Explicit Paths) ---
Loading Arabic data from: /kaggle/input/clef2025-checkthat-lab-track-01/arabic/train_ar.tsv
  Initial rows loaded: 2446
  Rows after cleaning 'sentence': 2446
  Rows after dropping NAs in essential columns: 2446 (dropped 0)
  Rows after ensuring non-empty 'sentence_id': 2446
Loading Bulgarian data from: /kaggle/input/clef2025-checkthat-lab-track-01/bulgarian/train_bg.tsv
  Initial rows loaded: 729
  Rows after cleaning 'sentence': 729
  Rows after dropping NAs in essential columns: 729 (dropped 0)
  Rows after ensuring non-empty 'sentence_id': 729
Loading English data from: /kaggle/input/clef2025-checkthat-lab-track-01/english/train_en.tsv
  Initial rows loaded: 830
  Rows after cleaning 'sentence': 830
  Rows after dropping NAs in essential columns: 830 (dropped 0)
  Rows after ensuring non-empty 'sentence_id': 830
Loading German data from: /kaggle/input/clef2025-checkthat-lab-track-01/german/train_de.tsv
 

2025-05-01 12:09:08.149027: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1746101348.391350      19 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1746101348.459928      19 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


pytorch_model.bin:   0%|          | 0.00/2.24G [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/2.24G [00:00<?, ?B/s]

Some weights of XLMRobertaForSequenceClassification were not initialized from the model checkpoint at microsoft/infoxlm-large and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Model loaded.

Optimizer and Scheduler configured for initial training (2010 total steps, 201 warmup steps).

--- Starting Initial Training Phase ---

Epoch 1/5
Training...
Training loss: 0.6670
Evaluating on combined dev set...
Evaluation Report:
              precision    recall  f1-score   support

         OBJ       0.62      0.99      0.76      1470
        SUBJ       0.62      0.04      0.07       931

    accuracy                           0.62      2401
   macro avg       0.62      0.51      0.42      2401
weighted avg       0.62      0.62      0.49      2401

Combined Dev F1 Score: 0.4162
*** New best F1: 0.4162. Model state saved. ***

Epoch 2/5
Training...
Training loss: 0.6705
Evaluating on combined dev set...
Evaluation Report:
              precision    recall  f1-score   support

         OBJ       0.61      1.00      0.76      1470
        SUBJ       0.00      0.00      0.00       931

    accuracy                           0.61      2401
   macro avg       0.31      0.