In [1]:
import os
import pandas as pd
import numpy as np
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import accuracy_score
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from transformers import (
    AutoTokenizer, AutoModelForSequenceClassification,
    GPT2LMHeadModel, GPT2TokenizerFast,
    get_linear_schedule_with_warmup
)
from torch.optim import AdamW
from datetime import datetime
import warnings
warnings.filterwarnings('ignore')

def read_texts_from_dir(base_dir, labels_csv):
    """Load texts and labels from directory structure - EXACTLY like your original"""
    labels_df = pd.read_csv(labels_csv)
    print(f"Loaded labels, shape: {labels_df.shape}")
    records = []
    
    for folder in sorted(os.listdir(base_dir)):
        folder_path = os.path.join(base_dir, folder)
        if not os.path.isdir(folder_path):
            continue
        try:
            article_id = int(folder.split("_")[-1])
        except Exception as e:
            continue
            
        file1_path = os.path.join(folder_path, "file_1.txt")
        file2_path = os.path.join(folder_path, "file_2.txt")
        if not (os.path.exists(file1_path) and os.path.exists(file2_path)):
            continue
            
        with open(file1_path, "r", encoding="utf-8") as f1, open(file2_path, "r", encoding="utf-8") as f2:
            text1 = f1.read()
            text2 = f2.read()
            
        real_text_id = labels_df.loc[labels_df['id'] == article_id, 'real_text_id'].values
        if len(real_text_id) == 0:
            continue
            
        label = 0 if real_text_id[0] == 1 else 1
        records.append({
            "id": article_id,
            "file_1": text1,
            "file_2": text2,
            "label": label
        })
        
    df = pd.DataFrame(records).sort_values("id").reset_index(drop=True)
    return df

class PerplexityCalculator:
    """Calculate perplexity features that showed high correlation"""
    
    def __init__(self):
        self.gpt2_model = GPT2LMHeadModel.from_pretrained('gpt2')
        self.gpt2_tokenizer = GPT2TokenizerFast.from_pretrained('gpt2')
        self.gpt2_model.eval()
        
    def get_perplexity(self, text):
        """Get perplexity score for text"""
        if not text or len(text.strip()) < 10:
            return 1000.0  # Very high perplexity for empty/short texts
        
        try:
            with torch.no_grad():
                # Use first 400 words
                words = text.split()[:400]
                truncated_text = ' '.join(words)
                
                inputs = self.gpt2_tokenizer(truncated_text, return_tensors='pt', max_length=512, truncation=True)
                if inputs['input_ids'].size(1) <= 1:
                    return 1000.0
                
                outputs = self.gpt2_model(**inputs, labels=inputs['input_ids'])
                perplexity = torch.exp(outputs.loss).item()
                
                # Clamp to reasonable range
                return max(1.0, min(perplexity, 1000.0))
                
        except Exception:
            return 500.0  # High perplexity if calculation fails

class TextChunkDataset(Dataset):
    """EXACTLY your original chunking dataset"""
    
    def __init__(self, texts, labels, tokenizer, perplexity_calc, max_length=512, chunk_overlap=50):
        self.tokenizer = tokenizer
        self.max_length = max_length
        self.chunk_overlap = chunk_overlap
        self.perplexity_calc = perplexity_calc
        
        self.data = []
        for i, (text, label) in enumerate(zip(texts, labels)):
            # Calculate perplexity ONCE per text
            text_perplexity = self.perplexity_calc.get_perplexity(text)
            
            chunks = self.chunk_text(text)
            for chunk in chunks:
                self.data.append({
                    'text': chunk,
                    'label': label,
                    'text_id': i,
                    'perplexity': text_perplexity  # Add perplexity as feature
                })
    
    def chunk_text(self, text, max_words=400):
        """Split text into overlapping chunks - EXACTLY like your original"""
        words = text.split()
        if len(words) <= max_words:
            return [text]
        
        chunks = []
        start = 0
        while start < len(words):
            end = min(start + max_words, len(words))
            chunk = ' '.join(words[start:end])
            chunks.append(chunk)
            if end >= len(words):
                break
            start = end - self.chunk_overlap
        return chunks
    
    def __len__(self):
        return len(self.data)
    
    def __getitem__(self, idx):
        item = self.data[idx]
        
        encoding = self.tokenizer(
            item['text'],
            add_special_tokens=True,
            max_length=self.max_length,
            padding='max_length',
            truncation=True,
            return_tensors='pt'
        )
        
        return {
            'input_ids': encoding['input_ids'].flatten(),
            'attention_mask': encoding['attention_mask'].flatten(),
            'labels': torch.tensor(item['label'], dtype=torch.long),
            'text_id': item['text_id'],
            'perplexity': torch.tensor(item['perplexity'], dtype=torch.float)
        }

class EnhancedRobertaModel(nn.Module):
    """Enhanced model + perplexity feature - works with RoBERTa or DeBERTa"""
    
    def __init__(self, model_name='roberta-large'):
        super().__init__()
        self.base_model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=2)
        
        # Get the correct hidden size from the model config
        hidden_size = self.base_model.config.hidden_size
        
        # Add a small network to combine model output with perplexity
        self.perplexity_layer = nn.Linear(1, 64)
        self.combine_layer = nn.Linear(hidden_size + 64, 2)
        
    def forward(self, input_ids, attention_mask, perplexity, labels=None):
        # Get base model features (not final classification)
        # Handle both RoBERTa and DeBERTa architectures
        if hasattr(self.base_model, 'roberta'):
            # RoBERTa
            base_outputs = self.base_model.roberta(input_ids=input_ids, attention_mask=attention_mask)
        elif hasattr(self.base_model, 'deberta'):
            # DeBERTa
            base_outputs = self.base_model.deberta(input_ids=input_ids, attention_mask=attention_mask)
        else:
            # Fallback - get hidden states from the base model
            base_outputs = self.base_model.base_model(input_ids=input_ids, attention_mask=attention_mask)
        
        # Average pooling of last hidden state
        base_features = base_outputs.last_hidden_state.mean(dim=1)
        
        # Process perplexity
        perplexity_features = torch.relu(self.perplexity_layer(perplexity.unsqueeze(-1)))
        
        # Combine features
        combined = torch.cat([base_features, perplexity_features], dim=1)
        logits = self.combine_layer(combined)
        
        loss = None
        if labels is not None:
            loss_fn = nn.CrossEntropyLoss()
            loss = loss_fn(logits, labels)
        
        return {'loss': loss, 'logits': logits}

class ImprovedRobertaDetector:
    """Your original approach + perplexity features"""
    
    def __init__(self, model_name='roberta-large', max_length=512):
        self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
        print(f"Using device: {self.device}")
        
        self.tokenizer = AutoTokenizer.from_pretrained(model_name)
        self.model = EnhancedRobertaModel(model_name).to(self.device)
        self.max_length = max_length
        self.perplexity_calc = PerplexityCalculator()
        
    def prepare_data(self, data_df):
        """Prepare training data from DataFrame - EXACTLY like your original"""
        texts = []
        labels = []
        
        for _, row in data_df.iterrows():
            # File 1
            texts.append(row['file_1'])
            labels.append(1 if row['label'] == 0 else 0)  # 1 if file_1 is real
            
            # File 2  
            texts.append(row['file_2'])
            labels.append(1 if row['label'] == 1 else 0)  # 1 if file_2 is real
            
        return texts, labels
    
    def train(self, data_df, epochs=10, batch_size=3, learning_rate=2e-6, warmup_ratio=0.15):
        """Train with your original hyperparameters"""
        print(f"Preparing training data...")
        texts, labels = self.prepare_data(data_df)
        print(f"Created {len(texts)} text samples")
        
        # Create dataset with chunking + perplexity
        dataset = TextChunkDataset(texts, labels, self.tokenizer, self.perplexity_calc, self.max_length)
        print(f"Created {len(dataset)} chunks from texts")
        
        # Create DataLoader
        dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=True)
        
        # Setup training - EXACTLY like your original
        self.model.train()
        optimizer = AdamW(self.model.parameters(), lr=learning_rate, weight_decay=0.1)
        total_steps = len(dataloader) * epochs
        warmup_steps = int(total_steps * warmup_ratio)
        scheduler = get_linear_schedule_with_warmup(
            optimizer, 
            num_warmup_steps=warmup_steps,
            num_training_steps=total_steps
        )
        
        print(f"Training for {epochs} epochs with {warmup_steps} warmup steps...")
        best_accuracy = 0
        
        for epoch in range(epochs):
            total_loss = 0
            correct_predictions = 0
            total_predictions = 0
            
            for batch in dataloader:
                optimizer.zero_grad()
                
                input_ids = batch['input_ids'].to(self.device)
                attention_mask = batch['attention_mask'].to(self.device)
                labels = batch['labels'].to(self.device)
                perplexity = batch['perplexity'].to(self.device)
                
                outputs = self.model(
                    input_ids=input_ids,
                    attention_mask=attention_mask,
                    perplexity=perplexity,
                    labels=labels
                )
                
                loss = outputs['loss']
                logits = outputs['logits']
                
                loss.backward()
                torch.nn.utils.clip_grad_norm_(self.model.parameters(), 1.0)
                optimizer.step()
                scheduler.step()
                
                total_loss += loss.item()
                
                # Calculate accuracy
                predictions = torch.argmax(logits, dim=1)
                correct_predictions += (predictions == labels).sum().item()
                total_predictions += labels.size(0)
            
            avg_loss = total_loss / len(dataloader)
            accuracy = correct_predictions / total_predictions
            
            if accuracy > best_accuracy:
                best_accuracy = accuracy
            
            print(f"Epoch {epoch+1}/{epochs} - Loss: {avg_loss:.4f}, Accuracy: {accuracy:.4f}, Best: {best_accuracy:.4f}")
        
        print(f"Training completed. Best accuracy: {best_accuracy:.4f}")
    
    def predict_text(self, text):
        """Predict probability that a single text is real"""
        self.model.eval()
        
        # Get perplexity for the full text
        text_perplexity = self.perplexity_calc.get_perplexity(text)
        
        # Chunk the text
        chunks = self.chunk_text(text)
        chunk_probs = []
        chunk_confidences = []
        
        with torch.no_grad():
            for chunk in chunks:
                encoding = self.tokenizer(
                    chunk,
                    add_special_tokens=True,
                    max_length=self.max_length,
                    padding='max_length',
                    truncation=True,
                    return_tensors='pt'
                ).to(self.device)
                
                perplexity_tensor = torch.tensor([text_perplexity], dtype=torch.float).to(self.device)
                
                outputs = self.model(
                    input_ids=encoding['input_ids'],
                    attention_mask=encoding['attention_mask'],
                    perplexity=perplexity_tensor
                )
                
                probs = torch.softmax(outputs['logits'], dim=1)
                prob_real = probs[0][1].item()  # Probability of being real
                confidence = max(probs[0]).item()  # Confidence
                
                chunk_probs.append(prob_real)
                chunk_confidences.append(confidence)
        
        if not chunk_probs:
            return 0.5
        
        # Weighted average by confidence - EXACTLY like your original
        total_weight = sum(chunk_confidences)
        if total_weight > 0:
            weighted_prob = sum(p * c for p, c in zip(chunk_probs, chunk_confidences)) / total_weight
        else:
            weighted_prob = np.mean(chunk_probs)
        
        return weighted_prob
    
    def chunk_text(self, text, max_words=400, overlap=50):
        """Split text into overlapping chunks - EXACTLY like your original"""
        words = text.split()
        if len(words) <= max_words:
            return [text]
        
        chunks = []
        start = 0
        while start < len(words):
            end = min(start + max_words, len(words))
            chunk = ' '.join(words[start:end])
            chunks.append(chunk)
            if end >= len(words):
                break
            start = end - overlap
        return chunks
    
    def predict_pairs(self, text1, text2):
        """Predict which text in a pair is more likely to be real"""
        prob1 = self.predict_text(text1)
        prob2 = self.predict_text(text2)
        
        return [prob1, prob2]
    
    def predict_test_data(self, test_dir, output_file=None):
        """Make predictions on test data - EXACTLY like your original"""
        print("Making predictions on test data...")
        results = []
        
        for folder in sorted(os.listdir(test_dir)):
            folder_path = os.path.join(test_dir, folder)
            if not os.path.isdir(folder_path):
                continue
            
            try:
                folder_id = int(folder.split("_")[-1])
            except:
                continue
            
            file1_path = os.path.join(folder_path, "file_1.txt")
            file2_path = os.path.join(folder_path, "file_2.txt")
            
            if not (os.path.exists(file1_path) and os.path.exists(file2_path)):
                continue
            
            # Read texts
            with open(file1_path, "r", encoding="utf-8") as f1:
                text1 = f1.read()
            with open(file2_path, "r", encoding="utf-8") as f2:
                text2 = f2.read()
            
            # Get probabilities
            probs = self.predict_pairs(text1, text2)
            
            # Determine real text
            real_text_id = 1 if probs[0] > probs[1] else 2
            
            results.append({
                'id': folder_id,
                'real_text_id': int(real_text_id)
            })
        
        # Create submission
        submission = pd.DataFrame(results).sort_values('id')
        
        if output_file is None:
            timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
            output_file = f"roberta_plus_perplexity_{timestamp}.csv"
        
        submission.to_csv(output_file, index=False)
        print(f"Submission saved to: {output_file}")
        print(f"Predicted {len(submission)} test samples")
        
        return submission

class EnsembleDetector:
    """Ensemble of multiple RoBERTa + Perplexity models"""
    
    def __init__(self, model_name='roberta-large', n_models=3):
        self.model_name = model_name
        self.n_models = n_models
        self.models = []
        self.seeds = [42, 98, 123, 225, 456]  # Fixed seeds for reproducibility
        
    def train_ensemble(self, data_df, epochs=10, batch_size=3, learning_rate=2e-6, warmup_ratio=0.15):
        """Train multiple models with different random seeds"""
        print(f"Training ensemble of {self.n_models} models...")
        
        for i, seed in enumerate(self.seeds[:self.n_models]):
            print(f"\n=== Training Model {i+1}/{self.n_models} (seed={seed}) ===")
            
            # Set random seeds
            torch.manual_seed(seed)
            np.random.seed(seed)
            import random
            random.seed(seed)
            
            # Create and train model
            detector = ImprovedRobertaDetector(model_name=self.model_name)
            detector.train(data_df, epochs=epochs, batch_size=batch_size, 
                          learning_rate=learning_rate, warmup_ratio=warmup_ratio)
            
            self.models.append(detector)
            print(f"Model {i+1} training completed!")
        
        print(f"\nEnsemble training completed! Trained {len(self.models)} models.")
    
    def predict_pairs(self, text1, text2):
        """Predict using ensemble - average predictions from all models"""
        if not self.models:
            raise ValueError("No models trained! Call train_ensemble first.")
        
        all_probs = []
        
        for i, model in enumerate(self.models):
            probs = model.predict_pairs(text1, text2)
            all_probs.append(probs)
        
        # Average predictions across all models
        avg_probs = np.mean(all_probs, axis=0)
        return avg_probs.tolist()
    
    def predict_test_data(self, test_dir, output_file=None):
        """Make ensemble predictions on test data"""
        print("Making ensemble predictions on test data...")
        results = []
        
        for folder in sorted(os.listdir(test_dir)):
            folder_path = os.path.join(test_dir, folder)
            if not os.path.isdir(folder_path):
                continue
            
            try:
                folder_id = int(folder.split("_")[-1])
            except:
                continue
            
            file1_path = os.path.join(folder_path, "file_1.txt")
            file2_path = os.path.join(folder_path, "file_2.txt")
            
            if not (os.path.exists(file1_path) and os.path.exists(file2_path)):
                continue
            
            # Read texts
            with open(file1_path, "r", encoding="utf-8") as f1:
                text1 = f1.read()
            with open(file2_path, "r", encoding="utf-8") as f2:
                text2 = f2.read()
            
            # Get ensemble probabilities
            probs = self.predict_pairs(text1, text2)
            
            # Determine real text
            real_text_id = 1 if probs[0] > probs[1] else 2
            
            results.append({
                'id': folder_id,
                'real_text_id': int(real_text_id),
                'confidence': max(probs)
            })
        
        # Create submission
        submission = pd.DataFrame(results)[['id', 'real_text_id']].sort_values('id')
        
        if output_file is None:
            timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
            output_file = f"ensemble_roberta_perplexity_{timestamp}.csv"
        
        submission.to_csv(output_file, index=False)
        print(f"Ensemble submission saved to: {output_file}")
        print(f"Predicted {len(submission)} test samples")
        
        return submission

# Main execution
print("RoBERTa + Perplexity Ensemble")
print("=============================")

# Choose between single model or ensemble
USE_ENSEMBLE = True  # Set to False for single model

train_dir = "/mnt/Monolith/ML/kaggle/input/fake-or-real-the-impostor-hunt/data/train"
labels_csv = "/mnt/Monolith/ML/kaggle/input/fake-or-real-the-impostor-hunt/data/train.csv"
test_dir = "/mnt/Monolith/ML/kaggle/input/fake-or-real-the-impostor-hunt/data/test"

if os.path.exists(train_dir) and os.path.exists(labels_csv):
    print("Loading training data...")
    data_df = read_texts_from_dir(train_dir, labels_csv)
    print(f"Loaded {len(data_df)} training pairs")
    
    if len(data_df) > 0:
        if USE_ENSEMBLE:
            # Train ensemble
            ensemble = EnsembleDetector(model_name='google/electra-large-discriminator', n_models=5)
            ensemble.train_ensemble(data_df, epochs=10, batch_size=3, learning_rate=2e-6, warmup_ratio=0.15)
            
            if os.path.exists(test_dir):
                submission = ensemble.predict_test_data(test_dir)
                print("Ensemble training and prediction completed!")
        else:
            # Train single model (your original approach)
            detector = ImprovedRobertaDetector()
            detector.train(data_df, epochs=10, batch_size=3, learning_rate=2e-6, warmup_ratio=0.15)
            
            if os.path.exists(test_dir):
                submission = detector.predict_test_data(test_dir)
                print("Single model training and prediction completed!")
    else:
        print("No training data found")
else:
    print("Training data not found")

RoBERTa + Perplexity Ensemble
Loading training data...
Loaded labels, shape: (95, 2)
Loaded 95 training pairs
Training ensemble of 5 models...

=== Training Model 1/5 (seed=42) ===
Using device: cuda


Some weights of ElectraForSequenceClassification were not initialized from the model checkpoint at google/electra-large-discriminator and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Preparing training data...
Created 190 text samples


`loss_type=None` was set in the config but it is unrecognised.Using the default loss: `ForCausalLMLoss`.


Created 251 chunks from texts
Training for 10 epochs with 126 warmup steps...
Epoch 1/10 - Loss: 2.4382, Accuracy: 0.6056, Best: 0.6056
Epoch 2/10 - Loss: 2.0032, Accuracy: 0.6016, Best: 0.6056
Epoch 3/10 - Loss: 1.2904, Accuracy: 0.6215, Best: 0.6215
Epoch 4/10 - Loss: 0.8144, Accuracy: 0.7012, Best: 0.7012
Epoch 5/10 - Loss: 0.6956, Accuracy: 0.7729, Best: 0.7729
Epoch 6/10 - Loss: 0.6234, Accuracy: 0.8207, Best: 0.8207
Epoch 7/10 - Loss: 0.5593, Accuracy: 0.8247, Best: 0.8247
Epoch 8/10 - Loss: 0.5006, Accuracy: 0.8606, Best: 0.8606
Epoch 9/10 - Loss: 0.4887, Accuracy: 0.8606, Best: 0.8606
Epoch 10/10 - Loss: 0.4806, Accuracy: 0.8685, Best: 0.8685
Training completed. Best accuracy: 0.8685
Model 1 training completed!

=== Training Model 2/5 (seed=98) ===
Using device: cuda


Some weights of ElectraForSequenceClassification were not initialized from the model checkpoint at google/electra-large-discriminator and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Preparing training data...
Created 190 text samples
Created 251 chunks from texts
Training for 10 epochs with 126 warmup steps...
Epoch 1/10 - Loss: 1.0699, Accuracy: 0.6016, Best: 0.6016
Epoch 2/10 - Loss: 0.7335, Accuracy: 0.6733, Best: 0.6733
Epoch 3/10 - Loss: 0.5433, Accuracy: 0.7371, Best: 0.7371
Epoch 4/10 - Loss: 0.4448, Accuracy: 0.8287, Best: 0.8287
Epoch 5/10 - Loss: 0.3925, Accuracy: 0.8725, Best: 0.8725
Epoch 6/10 - Loss: 0.3128, Accuracy: 0.8964, Best: 0.8964
Epoch 7/10 - Loss: 0.2951, Accuracy: 0.9124, Best: 0.9124
Epoch 8/10 - Loss: 0.2868, Accuracy: 0.9084, Best: 0.9124
Epoch 9/10 - Loss: 0.2226, Accuracy: 0.9283, Best: 0.9283
Epoch 10/10 - Loss: 0.2021, Accuracy: 0.9323, Best: 0.9323
Training completed. Best accuracy: 0.9323
Model 2 training completed!

=== Training Model 3/5 (seed=123) ===
Using device: cuda


Some weights of ElectraForSequenceClassification were not initialized from the model checkpoint at google/electra-large-discriminator and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Preparing training data...
Created 190 text samples
Created 251 chunks from texts
Training for 10 epochs with 126 warmup steps...
Epoch 1/10 - Loss: 0.6681, Accuracy: 0.6056, Best: 0.6056
Epoch 2/10 - Loss: 0.5452, Accuracy: 0.6733, Best: 0.6733
Epoch 3/10 - Loss: 0.4509, Accuracy: 0.7490, Best: 0.7490
Epoch 4/10 - Loss: 0.4254, Accuracy: 0.7689, Best: 0.7689
Epoch 5/10 - Loss: 0.3586, Accuracy: 0.8486, Best: 0.8486
Epoch 6/10 - Loss: 0.2916, Accuracy: 0.8924, Best: 0.8924
Epoch 7/10 - Loss: 0.2524, Accuracy: 0.8924, Best: 0.8924
Epoch 8/10 - Loss: 0.2424, Accuracy: 0.9044, Best: 0.9044
Epoch 9/10 - Loss: 0.2303, Accuracy: 0.9163, Best: 0.9163
Epoch 10/10 - Loss: 0.2233, Accuracy: 0.9124, Best: 0.9163
Training completed. Best accuracy: 0.9163
Model 3 training completed!

=== Training Model 4/5 (seed=225) ===
Using device: cuda


Some weights of ElectraForSequenceClassification were not initialized from the model checkpoint at google/electra-large-discriminator and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Preparing training data...
Created 190 text samples
Created 251 chunks from texts
Training for 10 epochs with 126 warmup steps...
Epoch 1/10 - Loss: 0.7910, Accuracy: 0.3267, Best: 0.3267
Epoch 2/10 - Loss: 0.5756, Accuracy: 0.6016, Best: 0.6016
Epoch 3/10 - Loss: 0.4716, Accuracy: 0.7131, Best: 0.7131
Epoch 4/10 - Loss: 0.3917, Accuracy: 0.8486, Best: 0.8486
Epoch 5/10 - Loss: 0.3262, Accuracy: 0.8606, Best: 0.8606
Epoch 6/10 - Loss: 0.2776, Accuracy: 0.8845, Best: 0.8845
Epoch 7/10 - Loss: 0.2325, Accuracy: 0.9084, Best: 0.9084
Epoch 8/10 - Loss: 0.2497, Accuracy: 0.9163, Best: 0.9163
Epoch 9/10 - Loss: 0.1985, Accuracy: 0.9323, Best: 0.9323
Epoch 10/10 - Loss: 0.2225, Accuracy: 0.9243, Best: 0.9323
Training completed. Best accuracy: 0.9323
Model 4 training completed!

=== Training Model 5/5 (seed=456) ===
Using device: cuda


Some weights of ElectraForSequenceClassification were not initialized from the model checkpoint at google/electra-large-discriminator and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Preparing training data...
Created 190 text samples
Created 251 chunks from texts
Training for 10 epochs with 126 warmup steps...
Epoch 1/10 - Loss: 0.5558, Accuracy: 0.6175, Best: 0.6175
Epoch 2/10 - Loss: 0.5051, Accuracy: 0.6972, Best: 0.6972
Epoch 3/10 - Loss: 0.4666, Accuracy: 0.7251, Best: 0.7251
Epoch 4/10 - Loss: 0.3899, Accuracy: 0.8367, Best: 0.8367
Epoch 5/10 - Loss: 0.3120, Accuracy: 0.8805, Best: 0.8805
Epoch 6/10 - Loss: 0.2784, Accuracy: 0.8924, Best: 0.8924
Epoch 7/10 - Loss: 0.2424, Accuracy: 0.9044, Best: 0.9044
Epoch 8/10 - Loss: 0.2181, Accuracy: 0.9203, Best: 0.9203
Epoch 9/10 - Loss: 0.2202, Accuracy: 0.9243, Best: 0.9243
Epoch 10/10 - Loss: 0.2267, Accuracy: 0.9283, Best: 0.9283
Training completed. Best accuracy: 0.9283
Model 5 training completed!

Ensemble training completed! Trained 5 models.
Making ensemble predictions on test data...
Ensemble submission saved to: ensemble_roberta_perplexity_20250906_135109.csv
Predicted 1068 test samples
Ensemble training and