In [None]:
import pandas as pd
import numpy as np
import torch
from torch.utils.data import Dataset, DataLoader
from transformers import AutoTokenizer, AutoModelForSequenceClassification, get_linear_schedule_with_warmup
from torch.optim import AdamW
from sklearn.metrics import f1_score, classification_report
import warnings
import csv
import os
import gc

warnings.filterwarnings('ignore')

AR_TRAIN_PATH = "/kaggle/input/clef2025-checkthat-lab-track-01/arabic/train_ar.tsv"
AR_DEV_PATH = "/kaggle/input/clef2025-checkthat-lab-track-01/arabic/dev_ar.tsv"
BG_TRAIN_PATH = "/kaggle/input/clef2025-checkthat-lab-track-01/bulgarian/train_bg.tsv"
BG_DEV_PATH = "/kaggle/input/clef2025-checkthat-lab-track-01/bulgarian/dev_bg.tsv"
EN_TRAIN_PATH = "/kaggle/input/clef2025-checkthat-lab-track-01/english/train_en.tsv"
EN_DEV_PATH = "/kaggle/input/clef2025-checkthat-lab-track-01/english/dev_en.tsv"
DE_TRAIN_PATH = "/kaggle/input/clef2025-checkthat-lab-track-01/german/train_de.tsv"
DE_DEV_PATH = "/kaggle/input/clef2025-checkthat-lab-track-01/german/dev_de.tsv"
IT_TRAIN_PATH = "/kaggle/input/clef2025-checkthat-lab-track-01/italian/train_it.tsv"
IT_DEV_PATH = "/kaggle/input/clef2025-checkthat-lab-track-01/italian/dev_it.tsv"
ZERO_SHOT_TEST_PATH = "/kaggle/input/clef2025-checkthat-lab-track-01/polish/test_pol_unlabeled.tsv"
OUTPUT_PATH = "subtask_polish.tsv"
MODEL_NAME = 'microsoft/mdeberta-v3-base'
BATCH_SIZE = 16
MAX_LENGTH = 128
EPOCHS = 5
FINAL_EPOCHS = 3
LEARNING_RATE = 1.5e-5
WEIGHT_DECAY = 0.01
WARMUP_PROPORTION = 0.1
SEED = 42

def set_seed(seed=SEED):
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

set_seed()

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Using device: {device}")

print(f"Loading tokenizer: {MODEL_NAME}")
try:
    tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
    print("Tokenizer loaded.")
except Exception as e:
    print(f"Error loading tokenizer {MODEL_NAME}: {e}")
    exit()

class SubjectivityDataset(Dataset):
    def __init__(self, dataframe, tokenizer, max_length, has_labels=True):
        self.tokenizer = tokenizer
        if 'sentence' not in dataframe.columns:
            raise ValueError("DataFrame must contain a 'sentence' column.")
        dataframe['sentence'] = dataframe['sentence'].astype(str).str.strip()
        dataframe = dataframe.dropna(subset=['sentence'])
        dataframe = dataframe[dataframe['sentence'] != '']
        self.text = dataframe['sentence'].tolist()

        if 'sentence_id' not in dataframe.columns:
             raise ValueError("DataFrame must contain a 'sentence_id' column.")
        dataframe['sentence_id'] = dataframe['sentence_id'].astype(str).str.strip()
        dataframe = dataframe.dropna(subset=['sentence_id'])
        dataframe = dataframe[dataframe['sentence_id'] != '']
        self.ids = dataframe['sentence_id'].tolist()

        self.max_length = max_length
        self.has_labels = has_labels

        if has_labels:
            if 'label' not in dataframe.columns:
                raise ValueError("DataFrame must contain a 'label' column when has_labels=True.")
            dataframe = dataframe.dropna(subset=['label'])
            self.labels = [1 if str(label).upper() == 'SUBJ' else 0 for label in dataframe['label'].tolist()]
            self.text = dataframe['sentence'].tolist()
            self.ids = dataframe['sentence_id'].tolist()
            if len(self.text) != len(self.labels):
                 raise ValueError(f"Mismatch between number of texts ({len(self.text)}) and labels ({len(self.labels)}) after processing.")
        else:
            self.text = dataframe['sentence'].tolist()
            self.ids = dataframe['sentence_id'].tolist()

    def __len__(self):
        return len(self.text)

    def __getitem__(self, index):
        text = str(self.text[index])
        sentence_id = str(self.ids[index])

        encoding = self.tokenizer.encode_plus(
            text,
            add_special_tokens=True,
            max_length=self.max_length,
            padding='max_length',
            truncation=True,
            return_attention_mask=True,
            return_tensors='pt'
        )

        result = {
            'input_ids': encoding['input_ids'].flatten(),
            'attention_mask': encoding['attention_mask'].flatten(),
            'sentence_id': sentence_id
        }

        if self.has_labels:
            if index < len(self.labels):
                 result['labels'] = torch.tensor(self.labels[index], dtype=torch.long)
            else:
                raise IndexError(f"Index {index} out of bounds for labels list of length {len(self.labels)}")

        return result

def clean_text(text):
    if isinstance(text, str):
        text = text.strip()
    else:
        text = str(text).strip()
    return text

def load_and_preprocess_data(file_path, has_labels=True, language_name="N/A"):
    print(f"Loading {language_name} data from: {file_path}")
    if not os.path.exists(file_path):
        print(f"Error: File not found at {file_path}")
        return None

    try:
        df = pd.read_csv(file_path, sep='\t', quoting=csv.QUOTE_MINIMAL, on_bad_lines='warn', dtype={'sentence_id': str}, keep_default_na=False)
        print(f"  Initial rows loaded (QUOTE_MINIMAL): {len(df)}")

        required_cols = ['sentence_id', 'sentence']
        if has_labels:
            required_cols.append('label')

        is_english = 'english' in language_name.lower()
        has_extra_col = 'solved_conflict' in df.columns
        cols_ok_for_english = is_english and has_extra_col and all(col in df.columns for col in ['sentence_id', 'sentence', 'label'])

        if not all(col in df.columns for col in required_cols) and not cols_ok_for_english:
             print(f"  Warning: Required columns missing with QUOTE_MINIMAL (Found: {df.columns}). Required: {required_cols}. Trying QUOTE_NONE.")
             df = pd.read_csv(file_path, sep='\t', quoting=csv.QUOTE_NONE, on_bad_lines='warn', dtype={'sentence_id': str}, keep_default_na=False)
             print(f"  Initial rows loaded (QUOTE_NONE): {len(df)}")
             if not all(col in df.columns for col in required_cols) and not cols_ok_for_english:
                 raise ValueError(f"File {file_path} is missing required columns even with QUOTE_NONE. Found: {df.columns}. Required: {required_cols}")

    except Exception as e:
        print(f"  Error loading {file_path}: {e}")
        return None

    if 'sentence' in df.columns:
        df['sentence'] = df['sentence'].apply(clean_text)
    else:
        print(f"  Warning: 'sentence' column not found in {file_path}. Cannot clean.")

    if 'sentence_id' in df.columns:
         df['sentence_id'] = df['sentence_id'].astype(str).apply(clean_text)
    else:
        print(f"  Warning: 'sentence_id' column not found in {file_path}.")

    essential_cols = ['sentence_id', 'sentence']
    if has_labels: essential_cols.append('label')

    df[essential_cols] = df[essential_cols].replace('', np.nan)
    initial_rows_before_na_drop = len(df)
    df = df.dropna(subset=essential_cols)
    dropped_count = initial_rows_before_na_drop - len(df)
    if dropped_count > 0:
        print(f"  Rows after dropping NAs/empty in essential columns: {len(df)} (dropped {dropped_count})")

    if len(df.columns) > 0 and df.columns[0].startswith('Unnamed: '):
        print(f"  Detected and removing potential index column '{df.columns[0]}'.")
        df = df.iloc[:, 1:]

    final_cols = ['sentence_id', 'sentence']
    if has_labels:
        final_cols.append('label')

    missing_final_cols = [col for col in final_cols if col not in df.columns]
    if missing_final_cols:
         print(f"  Error: Final required columns missing after processing: {missing_final_cols}. Columns available: {df.columns}")
         return None

    df = df[final_cols]
    print(f"  Finished processing. Final rows: {len(df)}")
    return df

all_train_dfs = []
all_dev_dfs = []

train_paths = {
    "Arabic": AR_TRAIN_PATH, "Bulgarian": BG_TRAIN_PATH, "English": EN_TRAIN_PATH,
    "German": DE_TRAIN_PATH, "Italian": IT_TRAIN_PATH
}
dev_paths = {
    "Arabic": AR_DEV_PATH, "Bulgarian": BG_DEV_PATH, "English": EN_DEV_PATH,
    "German": DE_DEV_PATH, "Italian": IT_DEV_PATH
}

print("\n--- Loading SEEN Language Training Data ---")
for lang_name, path in train_paths.items():
    df = load_and_preprocess_data(path, has_labels=True, language_name=lang_name)
    if df is not None and not df.empty:
        all_train_dfs.append(df)
    else:
        print(f"Warning: Could not load or process train data for {lang_name} from {path}.")

print("\n--- Loading SEEN Language Development Data ---")
for lang_name, path in dev_paths.items():
    df = load_and_preprocess_data(path, has_labels=True, language_name=lang_name)
    if df is not None and not df.empty:
        all_dev_dfs.append(df)
    else:
        print(f"Warning: Could not load or process dev data for {lang_name} from {path}.")

if not all_train_dfs or not all_dev_dfs:
    print("Error: No valid training or development data loaded from the specified paths for seen languages. Exiting.")
    exit()

all_train_df = pd.concat(all_train_dfs, ignore_index=True)
all_dev_df = pd.concat(all_dev_dfs, ignore_index=True)
print(f"\nTotal combined SEEN languages train data shape: {all_train_df.shape}")
print(f"Total combined SEEN languages dev data shape: {all_dev_df.shape}")
combined_train_dev_df = pd.concat([all_train_df, all_dev_df], ignore_index=True)
print(f"Combined SEEN train+dev data shape for final training: {combined_train_dev_df.shape}")

print("\n--- Loading UNSEEN Language (Polish) Test Data ---")
test_df_zeroshot = load_and_preprocess_data(ZERO_SHOT_TEST_PATH, has_labels=False, language_name="ZeroShot Test (Polish)")

if test_df_zeroshot is None or test_df_zeroshot.empty:
    print(f"Warning: Zero-shot Polish test file {ZERO_SHOT_TEST_PATH} could not be loaded, is empty, or failed processing. Prediction will be skipped.")
elif not all(col in test_df_zeroshot.columns for col in ['sentence_id', 'sentence']):
     print(f"Error: Zero-shot Polish Test DataFrame from {ZERO_SHOT_TEST_PATH} is missing 'sentence_id' or 'sentence' column. Found columns: {test_df_zeroshot.columns}. Prediction will fail.")
     test_df_zeroshot = None
else:
    print(f"Zero-shot Polish test data shape after preprocessing: {test_df_zeroshot.shape}")

try:
    print("\nCreating Datasets for SEEN languages...")
    train_dataset = SubjectivityDataset(all_train_df, tokenizer, MAX_LENGTH, has_labels=True)
    print(f"  Train dataset size (Seen Languages): {len(train_dataset)}")
    dev_dataset = SubjectivityDataset(all_dev_df, tokenizer, MAX_LENGTH, has_labels=True)
    print(f"  Dev dataset size (Seen Languages): {len(dev_dataset)}")
    combined_dataset = SubjectivityDataset(combined_train_dev_df, tokenizer, MAX_LENGTH, has_labels=True)
    print(f"  Combined Train+Dev dataset size (Seen Languages): {len(combined_dataset)}")

    print("\nCreating Dataset for UNSEEN language (Polish Test)...")
    test_dataset_zeroshot = None
    if test_df_zeroshot is not None and not test_df_zeroshot.empty:
        test_dataset_zeroshot = SubjectivityDataset(test_df_zeroshot, tokenizer, MAX_LENGTH, has_labels=False)
        print(f"  Test dataset size (Unseen Language - Polish): {len(test_dataset_zeroshot)}")
    else:
        print("  Skipping Zero-Shot Polish Test Dataset creation (data not available or failed checks).")

except ValueError as e:
    print(f"Error creating Dataset: {e}")
    exit()
except Exception as e:
    print(f"An unexpected error occurred during Dataset creation: {e}")
    exit()

train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=2, pin_memory=True)
dev_loader = DataLoader(dev_dataset, batch_size=BATCH_SIZE * 2, shuffle=False, num_workers=2, pin_memory=True)
combined_loader = DataLoader(combined_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=2, pin_memory=True)
test_loader_zeroshot = DataLoader(test_dataset_zeroshot, batch_size=BATCH_SIZE * 2, shuffle=False, num_workers=2, pin_memory=True) if test_dataset_zeroshot else None

print("DataLoaders created.")

print(f"\nLoading model: {MODEL_NAME}")
try:
    model = AutoModelForSequenceClassification.from_pretrained(
        MODEL_NAME,
        num_labels=2,
    )
    print("Model loaded.")
    model = model.to(device)
except Exception as e:
    print(f"Error loading model {MODEL_NAME}: {e}")
    exit()

def train_model(model, dataloader, optimizer, scheduler, device):
    model.train()
    total_loss = 0
    batch_count = 0
    for batch_idx, batch in enumerate(dataloader):
        optimizer.zero_grad()
        try:
            input_ids = batch['input_ids'].to(device, non_blocking=True)
            attention_mask = batch['attention_mask'].to(device, non_blocking=True)
            labels = batch['labels'].to(device, non_blocking=True)
        except KeyError as e:
            print(f"Error: Missing key in train batch {batch_idx}: {e}. Skipping batch.")
            continue
        except Exception as e:
            print(f"Error moving train batch {batch_idx} to device: {e}. Skipping batch.")
            continue
        try:
            outputs = model(input_ids=input_ids, attention_mask=attention_mask, labels=labels)
            loss = outputs.loss
            if loss is None: continue
            loss.backward()
            total_loss += loss.item()
            batch_count += 1
            torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)
            optimizer.step()
            scheduler.step()
        except RuntimeError as e:
             if "CUDA out of memory" in str(e):
                 print(f"ERROR: CUDA out of memory during training batch {batch_idx}. Try reducing BATCH_SIZE.")
                 torch.cuda.empty_cache()
                 return None
             else: print(f"Error during train forward/backward pass for batch {batch_idx}: {e}"); continue
        except Exception as e: print(f"Unexpected error during training batch {batch_idx}: {e}"); continue
    avg_loss = total_loss / batch_count if batch_count > 0 else 0
    del batch, input_ids, attention_mask, labels, outputs, loss; gc.collect(); torch.cuda.empty_cache()
    return avg_loss

def evaluate_model(model, dataloader, device):
    model.eval()
    predictions = []
    actual_labels = []
    with torch.no_grad():
        for batch_idx, batch in enumerate(dataloader):
            try:
                input_ids = batch['input_ids'].to(device, non_blocking=True)
                attention_mask = batch['attention_mask'].to(device, non_blocking=True)
                labels = batch['labels'].to(device, non_blocking=True)
            except KeyError as e: print(f"Error: Missing key in eval batch {batch_idx}: {e}. Skipping."); continue
            except Exception as e:
                 print(f"Error moving eval batch {batch_idx} to device: {e}. Skipping.")
                 if "CUDA out of memory" in str(e): print("CUDA OOM Error during evaluation."); torch.cuda.empty_cache(); return None
                 continue
            try:
                outputs = model(input_ids=input_ids, attention_mask=attention_mask)
                logits = outputs.logits
                preds = torch.argmax(logits, dim=1)
                predictions.extend(preds.cpu().tolist())
                actual_labels.extend(labels.cpu().tolist())
            except RuntimeError as e:
                 if "CUDA out of memory" in str(e): print("ERROR: CUDA out of memory during evaluation."); torch.cuda.empty_cache(); return None
                 else: print(f"Error during evaluation forward pass for batch {batch_idx}: {e}"); continue
            except Exception as e: print(f"Unexpected error during evaluation batch {batch_idx}: {e}"); continue
    f1 = 0.0
    report = "No report generated (no labels or error)."
    if actual_labels and len(predictions) == len(actual_labels):
         try:
             f1 = f1_score(actual_labels, predictions, average='macro', zero_division=0)
             report = classification_report(actual_labels, predictions, target_names=['OBJ', 'SUBJ'], zero_division=0)
             print("\nEvaluation Report (Seen Languages Dev Set):"); print(report); print(f"Macro F1 Score: {f1:.4f}")
         except ValueError as e: print(f"Could not generate classification report or F1 score: {e}")
    elif len(predictions) != len(actual_labels): print(f"Warning: Mismatch in prediction ({len(predictions)}) and label ({len(actual_labels)}) counts during evaluation.")
    else: print("Warning: No predictions or labels found/processed during evaluation.")
    del batch, input_ids, attention_mask, labels, outputs, logits, preds; gc.collect(); torch.cuda.empty_cache()
    return f1

def predict(model, dataloader, device):
    if dataloader is None: print("Test dataloader is None. Skipping prediction."); return [], []
    model.eval()
    all_predictions = []
    all_sentence_ids = []
    print("Starting prediction loop on UNSEEN (Polish) data...")
    with torch.no_grad():
        for batch_idx, batch in enumerate(dataloader):
            try:
                input_ids = batch['input_ids'].to(device, non_blocking=True)
                attention_mask = batch['attention_mask'].to(device, non_blocking=True)
                batch_sentence_ids = batch['sentence_id']
            except KeyError as e: print(f"Error: Missing key in prediction batch {batch_idx}: {e}. Skipping."); continue
            except Exception as e:
                print(f"Error processing prediction batch {batch_idx}: {e}")
                if "CUDA out of memory" in str(e): print("CUDA OOM Error during prediction."); torch.cuda.empty_cache(); return all_sentence_ids, ['ERROR'] * len(all_sentence_ids)
                continue
            try:
                outputs = model(input_ids=input_ids, attention_mask=attention_mask)
                logits = outputs.logits
                preds = torch.argmax(logits, dim=1)
                all_predictions.extend(preds.cpu().tolist())
                all_sentence_ids.extend(batch_sentence_ids)
            except RuntimeError as e:
                 if "CUDA out of memory" in str(e): print("ERROR: CUDA out of memory during prediction inference."); torch.cuda.empty_cache(); return all_sentence_ids, ['ERROR'] * len(all_sentence_ids)
                 else: print(f"Error during prediction forward pass for batch {batch_idx}: {e}"); continue
            except Exception as e: print(f"Unexpected error during prediction batch {batch_idx}: {e}"); continue
    label_map = {0: 'OBJ', 1: 'SUBJ'}
    final_predictions = [label_map[pred] for pred in all_predictions]
    print(f"Prediction loop finished. Generated {len(final_predictions)} predictions for {len(all_sentence_ids)} IDs.")
    if len(all_sentence_ids) != len(final_predictions):
        print(f"CRITICAL WARNING: Mismatch in sentence ID count ({len(all_sentence_ids)}) and prediction count ({len(final_predictions)}).");
        min_len = min(len(all_sentence_ids), len(final_predictions))
        print(f"Returning aligned results up to length {min_len}")
        return all_sentence_ids[:min_len], final_predictions[:min_len]
    del batch, input_ids, attention_mask, outputs, logits, preds; gc.collect(); torch.cuda.empty_cache()
    return all_sentence_ids, final_predictions

optimizer_grouped_parameters = [
    {'params': [p for n, p in model.named_parameters() if not any(nd in n for nd in ['bias', 'LayerNorm.weight'])], 'weight_decay': WEIGHT_DECAY},
    {'params': [p for n, p in model.named_parameters() if any(nd in n for nd in ['bias', 'LayerNorm.weight'])], 'weight_decay': 0.0}
]
optimizer = AdamW(optimizer_grouped_parameters, lr=LEARNING_RATE)

if len(train_loader) == 0: print("Error: Training loader for seen languages has zero batches. Cannot train."); exit()
total_steps = len(train_loader) * EPOCHS
warmup_steps = int(total_steps * WARMUP_PROPORTION)
scheduler = get_linear_schedule_with_warmup(optimizer, num_warmup_steps=warmup_steps, num_training_steps=total_steps)
print(f"\nOptimizer and Scheduler configured for initial training on SEEN languages ({total_steps} total steps, {warmup_steps} warmup steps).")

best_f1 = 0.0
best_model_state = None
epochs_no_improve = 0
early_stopping_patience = 2

print("\n--- Starting Initial Training Phase on SEEN Languages ---")
if len(train_loader) == 0 or len(dev_loader) == 0: print("Error: Train or Dev loader for seen languages is empty. Cannot proceed."); exit()

for epoch in range(EPOCHS):
    print(f"\nEpoch {epoch + 1}/{EPOCHS}")
    print("Training on SEEN languages...")
    train_loss = train_model(model, train_loader, optimizer, scheduler, device)
    if train_loss is None: print(f"Training failed for Epoch {epoch + 1}. Stopping."); break
    print(f"Training loss: {train_loss:.4f}")

    print("Evaluating on SEEN languages dev set...")
    dev_f1 = evaluate_model(model, dev_loader, device)
    if dev_f1 is None: print(f"Evaluation failed for Epoch {epoch + 1}. Continuing without model saving."); continue

    print(f"Combined SEEN Dev F1 Score: {dev_f1:.4f}")

    if dev_f1 > best_f1:
        best_f1 = dev_f1
        best_model_state = {k: v.cpu().clone() for k, v in model.state_dict().items()}
        print(f"*** New best F1: {best_f1:.4f}. Model state saved. ***")
        epochs_no_improve = 0
    else:
        epochs_no_improve += 1
        print(f"F1 did not improve from {best_f1:.4f}. Epochs without improvement: {epochs_no_improve}")
        if epochs_no_improve >= early_stopping_patience:
            print(f"Early stopping triggered after {early_stopping_patience} epochs without improvement.")
            break

print(f"\n--- Initial Training Finished ---")
print(f"Best SEEN validation F1 achieved: {best_f1:.4f}")

if best_model_state:
    print("Loading best model state for final training...")
    model.cpu(); model.load_state_dict(best_model_state); model.to(device)
    print("Best model loaded successfully.")
    del best_model_state; gc.collect()
else:
    print("Warning: No best model state was saved. Proceeding with the model's current state.")

print("\n--- Starting Final Training on Combined SEEN Data (train + dev) ---")
if len(combined_loader) == 0:
    print("Error: Combined train+dev loader for seen languages is empty. Skipping final training.")
else:
    final_lr = LEARNING_RATE / 2
    print(f"Using final learning rate: {final_lr}")
    optimizer_final_grouped_parameters = [
        {'params': [p for n, p in model.named_parameters() if not any(nd in n for nd in ['bias', 'LayerNorm.weight'])], 'weight_decay': WEIGHT_DECAY},
        {'params': [p for n, p in model.named_parameters() if any(nd in n for nd in ['bias', 'LayerNorm.weight'])], 'weight_decay': 0.0}
    ]
    optimizer_final = AdamW(optimizer_final_grouped_parameters, lr=final_lr)
    total_steps_final = len(combined_loader) * FINAL_EPOCHS
    warmup_steps_final = 0
    scheduler_final = get_linear_schedule_with_warmup(optimizer_final, num_warmup_steps=warmup_steps_final, num_training_steps=total_steps_final)
    print(f"Optimizer and Scheduler reconfigured for final training ({total_steps_final} total steps, {warmup_steps_final} warmup steps).")

    for epoch in range(FINAL_EPOCHS):
        print(f"\nFinal training on combined SEEN data - Epoch {epoch + 1}/{FINAL_EPOCHS}")
        train_loss = train_model(model, combined_loader, optimizer_final, scheduler_final, device)
        if train_loss is None: print(f"Final training failed for Epoch {epoch + 1}. Stopping final training."); break
        print(f"Training loss: {train_loss:.4f}")

print("\n--- Final Training Finished ---")

print(f"\n--- Generating Predictions for Zero-Shot Polish Test Set ({ZERO_SHOT_TEST_PATH}) ---")
if test_loader_zeroshot:
    sentence_ids, predictions = predict(model, test_loader_zeroshot, device)

    if sentence_ids and predictions and 'ERROR' not in predictions:
        if len(sentence_ids) == len(predictions):
            print(f"Successfully generated {len(predictions)} predictions for the zero-shot Polish test set.")
            submission_df = pd.DataFrame({'sentence_id': sentence_ids, 'label': predictions})
            try:
                 print(f"Saving predictions to: {OUTPUT_PATH}")
                 submission_df[['sentence_id', 'label']].to_csv(
                     OUTPUT_PATH, sep='\t', index=False, quoting=csv.QUOTE_MINIMAL, header=True
                 )
                 print(f"Predictions successfully saved to {OUTPUT_PATH}")
                 print("\nSubmission File Head:")
                 print(submission_df.head())
                 print(f"\nReminder: Zip this file into '{OUTPUT_PATH.replace('.tsv', '.zip')}' for submission.")
            except Exception as e:
                 print(f"Error saving submission file to {OUTPUT_PATH}: {e}")
        else:
             print(f"Error: Mismatch between sentence IDs ({len(sentence_ids)}) and predictions ({len(predictions)}). Submission file not generated.")
    elif test_dataset_zeroshot is not None:
         print("Prediction resulted in empty or error lists for Polish test set.")
    else:
        print("No predictions generated because the zero-shot Polish test data could not be loaded/processed.")
else:
     print(f"Zero-shot Polish test loader was not created (Test data likely missing/failed checks). No submission file generated.")

print("\n--- Zero-Shot Script (Polish Target) Finished ---")

Using device: cuda
Loading tokenizer: microsoft/mdeberta-v3-base


tokenizer_config.json:   0%|          | 0.00/52.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/579 [00:00<?, ?B/s]

Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`


spm.model:   0%|          | 0.00/4.31M [00:00<?, ?B/s]

Tokenizer loaded.

--- Loading SEEN Language Training Data ---
Loading Arabic data from: /kaggle/input/clef2025-checkthat-lab-track-01/arabic/train_ar.tsv
  Initial rows loaded (QUOTE_MINIMAL): 2446
  Finished processing. Final rows: 2446
Loading Bulgarian data from: /kaggle/input/clef2025-checkthat-lab-track-01/bulgarian/train_bg.tsv
  Initial rows loaded (QUOTE_MINIMAL): 691
  Finished processing. Final rows: 691
Loading English data from: /kaggle/input/clef2025-checkthat-lab-track-01/english/train_en.tsv
  Initial rows loaded (QUOTE_MINIMAL): 830
  Finished processing. Final rows: 830
Loading German data from: /kaggle/input/clef2025-checkthat-lab-track-01/german/train_de.tsv
  Initial rows loaded (QUOTE_MINIMAL): 800
  Finished processing. Final rows: 800
Loading Italian data from: /kaggle/input/clef2025-checkthat-lab-track-01/italian/train_it.tsv
  Initial rows loaded (QUOTE_MINIMAL): 1613
  Finished processing. Final rows: 1613

--- Loading SEEN Language Development Data ---
Loadi

2025-05-01 16:28:01.190151: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1746116881.403391      19 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1746116881.460773      19 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`


pytorch_model.bin:   0%|          | 0.00/1.33G [00:00<?, ?B/s]

Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`


model.safetensors:   0%|          | 0.00/1.33G [00:00<?, ?B/s]

Some weights of DebertaV2ForSequenceClassification were not initialized from the model checkpoint at microsoft/mdeberta-v3-base and are newly initialized: ['classifier.bias', 'classifier.weight', 'pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Model loaded.

Optimizer and Scheduler configured for initial training on SEEN languages (1995 total steps, 199 warmup steps).

--- Starting Initial Training Phase on SEEN Languages ---

Epoch 1/5
Training on SEEN languages...
Training loss: 0.6178
Evaluating on SEEN languages dev set...

Evaluation Report (Seen Languages Dev Set):
              precision    recall  f1-score   support

         OBJ       0.77      0.85      0.81      1462
        SUBJ       0.72      0.61      0.66       931

    accuracy                           0.76      2393
   macro avg       0.75      0.73      0.74      2393
weighted avg       0.75      0.76      0.75      2393

Macro F1 Score: 0.7363
Combined SEEN Dev F1 Score: 0.7363
*** New best F1: 0.7363. Model state saved. ***

Epoch 2/5
Training on SEEN languages...
Training loss: 0.4956
Evaluating on SEEN languages dev set...

Evaluation Report (Seen Languages Dev Set):
              precision    recall  f1-score   support

         OBJ       0.79      0