In [1]:
"""
LLM - Detect AI Generated Text Competition Solution - Enhanced Version
Optimized for Kaggle Notebooks with 9-hour runtime constraint

Key Improvements:
1. Better error handling and memory management
2. Adaptive runtime allocation based on available resources
3. More efficient zero-shot detection with caching
4. Improved ensemble with weighted voting
5. Better data preprocessing and validation
6. CPU/GPU adaptive configurations
"""

"""
LLM - Detect AI Generated Text Competition Solution - Maximum Resource Utilization
Optimized to fully utilize Kaggle's 9-hour runtime constraint for best performance
"""

"\nLLM - Detect AI Generated Text Competition Solution - Maximum Resource Utilization\nOptimized to fully utilize Kaggle's 9-hour runtime constraint for best performance\n"

In [2]:
import pandas as pd
import numpy as np
import torch
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
from transformers import (
    AutoTokenizer, AutoModelForSequenceClassification,
    AutoModelForCausalLM, AutoConfig,
    TrainingArguments, Trainer
)
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import StratifiedKFold
try:
    import xgboost as xgb
    HAS_XGB = True
except ImportError:
    HAS_XGB = False
import re
import time
import gc
import warnings
import pickle
import os
warnings.filterwarnings('ignore')

In [3]:
# Set random seeds for reproducibility
torch.manual_seed(42)
np.random.seed(42)

In [4]:
class Config:
    """Configuration optimized for maximum resource utilization"""
    
    # Check if GPU is available
    HAS_GPU = torch.cuda.is_available()
    DEVICE = torch.device('cuda' if HAS_GPU else 'cpu')

    # Model save directory
    MODEL_SAVE_DIR = "./saved_models"
    
    # Maximized model configurations for full resource utilization
    if HAS_GPU:
        DEBERTA_MODEL = "microsoft/deberta-v3-base"
        LLM_MODEL = "microsoft/DialoGPT-medium"
        BATCH_SIZE = 16
        EPOCHS = 12
        N_FOLDS = 10
        ZERO_SHOT_SAMPLE_SIZE = 1000
        MAX_LENGTH = 512
        GRADIENT_ACCUMULATION_STEPS = 1
        DATALOADER_WORKERS = 4                        # Utilize multiple cores
    else:
        DEBERTA_MODEL = "microsoft/deberta-v3-base"
        LLM_MODEL = "distilgpt2"
        BATCH_SIZE = 8
        EPOCHS = 3
        N_FOLDS = 5
        ZERO_SHOT_SAMPLE_SIZE = 500
        MAX_LENGTH = 512
        GRADIENT_ACCUMULATION_STEPS = 4
        DATALOADER_WORKERS = 0
    
    # Enhanced training parameters
    LEARNING_RATE = 2e-5                              
    WARMUP_RATIO = 0.1
    WEIGHT_DECAY = 0.01
    SCHEDULER_TYPE = "cosine"                         # Better scheduling
    
    # Enhanced zero-shot detection parameters
    PERTURB_SAMPLES = 25 if HAS_GPU else 10          # Increased perturbations
    PERTURB_RATIO = 0.15
    ZERO_SHOT_MULTIPLE_RUNS = 3                      # Multiple runs for stability
    
    # Optimized time budget allocation (9 hours = 32400 seconds)
    MAX_RUNTIME = 32400
    DEBERTA_TIME_BUDGET = 0.45 * MAX_RUNTIME         # 45% for DeBERTa
    TFIDF_TIME_BUDGET = 0.15 * MAX_RUNTIME           # 15% for TF-IDF
    ZERO_SHOT_TIME_BUDGET = 0.25 * MAX_RUNTIME       # 25% for zero-shot
    ENSEMBLE_TIME_BUDGET = 0.10 * MAX_RUNTIME        # 10% for ensemble
    BUFFER_TIME = 0.05 * MAX_RUNTIME                 # 5% buffer
    
    # Advanced features
    USE_PSEUDO_LABELING = True                        # Use confident predictions
    USE_ADVANCED_AUGMENTATION = True                  # Text augmentation
    USE_MULTI_SCALE_FEATURES = True                   # Multiple feature scales
    USE_STACKING = True                               # Advanced stacking

In [5]:
class AdvancedTextDataset(Dataset):
    """Enhanced dataset with data augmentation"""
    
    def __init__(self, texts, labels, tokenizer, max_length=768, augment=False):
        self.texts = texts
        self.labels = labels
        self.tokenizer = tokenizer
        self.max_length = max_length
        self.augment = augment
        
        if len(texts) != len(labels):
            raise ValueError("Texts and labels must have same length")
    
    def __len__(self):
        return len(self.texts)
    
    def augment_text(self, text):
        """Simple text augmentation"""
        if not self.augment or np.random.random() > 0.3:
            return text
        
        # Random word dropout
        words = text.split()
        if len(words) > 5:
            num_drop = max(1, len(words) // 20)
            drop_indices = np.random.choice(len(words), num_drop, replace=False)
            words = [w for i, w in enumerate(words) if i not in drop_indices]
        
        return ' '.join(words)
    
    def __getitem__(self, idx):
        try:
            text = str(self.texts[idx])
            label = int(self.labels[idx]) if self.labels is not None else 0
            
            # Apply augmentation
            if self.augment:
                text = self.augment_text(text)
            
            # Clean text
            text = re.sub(r'\s+', ' ', text.strip())
            
            # Tokenize
            encoding = self.tokenizer(
                text,
                truncation=True,
                padding='max_length',
                max_length=self.max_length,
                return_tensors='pt'
            )
            
            return {
                'input_ids': encoding['input_ids'].flatten(),
                'attention_mask': encoding['attention_mask'].flatten(),
                'labels': torch.tensor(label, dtype=torch.long)
            }
        except Exception as e:
            print(f"Error processing sample {idx}: {e}")
            return {
                'input_ids': torch.zeros(self.max_length, dtype=torch.long),
                'attention_mask': torch.zeros(self.max_length, dtype=torch.long),
                'labels': torch.tensor(0, dtype=torch.long)
            }

In [6]:
class DeBERTaClassifier:
    """Enhanced DeBERTa classifier with model saving capability"""
    
    def __init__(self, model_name=Config.DEBERTA_MODEL):
        self.model_name = model_name
        self.tokenizer = None
        self.model = None
        self.training_history = []
        
    def create_model(self):
        """Create and configure the model"""
        try:
            print(f"Loading model: {self.model_name}")
            self.tokenizer = AutoTokenizer.from_pretrained(self.model_name)
            
            config = AutoConfig.from_pretrained(self.model_name)
            config.num_labels = 2
            config.hidden_dropout_prob = 0.1
            config.attention_probs_dropout_prob = 0.1
            
            self.model = AutoModelForSequenceClassification.from_pretrained(
                self.model_name,
                config=config
            )
            
            self.model = self.model.to(Config.DEVICE)
            print(f"Model loaded successfully on {Config.DEVICE}")
            
        except Exception as e:
            print(f"Error loading model: {e}")
            raise
    
    def train_with_multiple_epochs(self, train_texts, train_labels, val_texts=None, val_labels=None):
        """Enhanced training with multiple epochs and validation"""
        print(f"Training DeBERTa with {Config.EPOCHS} epochs on {len(train_texts)} samples...")
        
        try:
            if self.model is None:
                self.create_model()
            
            # Create datasets with augmentation
            train_dataset = AdvancedTextDataset(
                train_texts, train_labels, self.tokenizer, 
                Config.MAX_LENGTH, augment=True
            )
            
            val_dataset = None
            if val_texts is not None and val_labels is not None:
                val_dataset = AdvancedTextDataset(
                    val_texts, val_labels, self.tokenizer, Config.MAX_LENGTH
                )
            
            # Calculate training steps
            num_training_steps = (len(train_dataset) // Config.BATCH_SIZE) * Config.EPOCHS
            warmup_steps = int(num_training_steps * Config.WARMUP_RATIO)
            
            # Enhanced training arguments
            training_args = TrainingArguments(
                output_dir='./deberta_output',
                num_train_epochs=Config.EPOCHS,
                per_device_train_batch_size=Config.BATCH_SIZE,
                per_device_eval_batch_size=Config.BATCH_SIZE,
                learning_rate=Config.LEARNING_RATE,
                weight_decay=Config.WEIGHT_DECAY,
                warmup_steps=warmup_steps,
                lr_scheduler_type=Config.SCHEDULER_TYPE,
                logging_dir='./logs',
                logging_steps=20,
                save_strategy='epoch',
                evaluation_strategy='epoch' if val_dataset else 'no',
                eval_steps=100,
                fp16=Config.HAS_GPU,
                dataloader_num_workers=Config.DATALOADER_WORKERS,
                remove_unused_columns=False,
                gradient_accumulation_steps=Config.GRADIENT_ACCUMULATION_STEPS,
                max_grad_norm=1.0,
                seed=42,
                report_to='none',
                load_best_model_at_end=True if val_dataset else False,
                metric_for_best_model='eval_loss' if val_dataset else None,
                greater_is_better=False,
                save_total_limit=2,
            )
            
            # Create trainer
            trainer = Trainer(
                model=self.model,
                args=training_args,
                train_dataset=train_dataset,
                eval_dataset=val_dataset,
                tokenizer=self.tokenizer,
            )
            
            # Train
            start_time = time.time()
            trainer.train()
            
            elapsed_time = time.time() - start_time
            print(f"Enhanced DeBERTa training completed in {elapsed_time:.2f} seconds!")
            
            # Store training history
            self.training_history.append({
                'epochs': Config.EPOCHS,
                'time': elapsed_time,
                'samples': len(train_texts)
            })
            
            # Memory cleanup
            del trainer
            gc.collect()
            if Config.HAS_GPU:
                torch.cuda.empty_cache()
                
        except Exception as e:
            print(f"Error during enhanced training: {e}")
            raise
    
    def save_model(self, save_path):
        """Save the trained model and tokenizer"""
        try:
            os.makedirs(save_path, exist_ok=True)
            
            # Save model and tokenizer
            self.model.save_pretrained(save_path)
            self.tokenizer.save_pretrained(save_path)
            
            # Save training history and config
            with open(os.path.join(save_path, 'training_info.pkl'), 'wb') as f:
                pickle.dump({
                    'training_history': self.training_history,
                    'model_name': self.model_name,
                    'config': {
                        'epochs': Config.EPOCHS,
                        'batch_size': Config.BATCH_SIZE,
                        'max_length': Config.MAX_LENGTH,
                        'learning_rate': Config.LEARNING_RATE
                    }
                }, f)
            
            print(f"DeBERTa model saved to {save_path}")
            
        except Exception as e:
            print(f"Error saving DeBERTa model: {e}")
            raise
    
    def predict_with_tta(self, texts):
        """Prediction with Test Time Augmentation"""
        if self.model is None:
            raise ValueError("Model not trained yet!")
        
        print(f"Making enhanced predictions on {len(texts)} samples...")
        
        try:
            self.model.eval()
            all_predictions = []
            
            # Multiple prediction runs with slight variations
            for run in range(3):
                predictions = []
                dataset = AdvancedTextDataset(
                    texts, [0] * len(texts), self.tokenizer, Config.MAX_LENGTH
                )
                dataloader = DataLoader(
                    dataset, batch_size=Config.BATCH_SIZE, 
                    shuffle=False, num_workers=Config.DATALOADER_WORKERS
                )
                
                with torch.no_grad():
                    for batch in dataloader:
                        input_ids = batch['input_ids'].to(Config.DEVICE)
                        attention_mask = batch['attention_mask'].to(Config.DEVICE)
                        
                        outputs = self.model(input_ids=input_ids, attention_mask=attention_mask)
                        probs = F.softmax(outputs.logits, dim=-1)
                        predictions.extend(probs[:, 1].cpu().numpy())
                
                all_predictions.append(predictions[:len(texts)])
            
            # Average predictions from multiple runs
            final_predictions = np.mean(all_predictions, axis=0)
            return final_predictions
            
        except Exception as e:
            print(f"Error during prediction: {e}")
            return np.full(len(texts), 0.5)

In [7]:
class ZeroShotDetector:
    """Enhanced zero-shot detector with model saving"""
    
    def __init__(self, model_name=Config.LLM_MODEL):
        self.model_name = model_name
        self.tokenizer = None
        self.model = None
        self.cache = {}
        self.alternative_models = []
        
    def initialize_models(self):
        """Initialize multiple models for ensemble zero-shot detection"""
        try:
            print(f"Loading enhanced zero-shot model: {self.model_name}")
            self.tokenizer = AutoTokenizer.from_pretrained(self.model_name)
            self.model = AutoModelForCausalLM.from_pretrained(self.model_name)
            self.model.to(Config.DEVICE)
            self.model.eval()
            
            if self.tokenizer.pad_token is None:
                self.tokenizer.pad_token = self.tokenizer.eos_token
                
            print("Enhanced zero-shot model loaded successfully")
            
        except Exception as e:
            print(f"Error loading zero-shot model: {e}")
            raise
    
    def save_model(self, save_path):
        """Save zero-shot model and cache"""
        try:
            os.makedirs(save_path, exist_ok=True)
            
            # Save model and tokenizer
            if self.model is not None:
                self.model.save_pretrained(save_path)
                self.tokenizer.save_pretrained(save_path)
            
            # Save cache and config
            with open(os.path.join(save_path, 'zero_shot_info.pkl'), 'wb') as f:
                pickle.dump({
                    'cache': self.cache,
                    'model_name': self.model_name,
                    'config': {
                        'perturb_samples': Config.PERTURB_SAMPLES,
                        'perturb_ratio': Config.PERTURB_RATIO,
                        'multiple_runs': Config.ZERO_SHOT_MULTIPLE_RUNS
                    }
                }, f)
            
            print(f"Zero-shot model saved to {save_path}")
            
        except Exception as e:
            print(f"Error saving zero-shot model: {e}")
    
    def advanced_perturb_text(self, text, num_perturbations=Config.PERTURB_SAMPLES):
        """Advanced text perturbation with multiple strategies"""
        perturbations = []
        tokens = text.split()
        
        if len(tokens) < 3:
            return [text]
        
        strategies = ['dropout', 'swap', 'duplicate', 'reorder', 'synonym']
        
        for _ in range(num_perturbations):
            try:
                strategy = np.random.choice(strategies)
                perturbed_tokens = tokens.copy()
                
                if strategy == 'dropout' and len(perturbed_tokens) > 3:
                    # Remove random tokens
                    num_drops = max(1, len(perturbed_tokens) // 10)
                    drop_indices = np.random.choice(len(perturbed_tokens), num_drops, replace=False)
                    perturbed_tokens = [t for i, t in enumerate(perturbed_tokens) if i not in drop_indices]
                
                elif strategy == 'swap' and len(perturbed_tokens) > 1:
                    # Swap adjacent tokens
                    idx = np.random.randint(0, len(perturbed_tokens) - 1)
                    perturbed_tokens[idx], perturbed_tokens[idx + 1] = perturbed_tokens[idx + 1], perturbed_tokens[idx]
                
                elif strategy == 'duplicate':
                    # Duplicate random tokens
                    idx = np.random.randint(0, len(perturbed_tokens))
                    perturbed_tokens.insert(idx, perturbed_tokens[idx])
                
                elif strategy == 'reorder' and len(perturbed_tokens) > 4:
                    # Reorder a small segment
                    start = np.random.randint(0, len(perturbed_tokens) - 3)
                    end = min(start + 3, len(perturbed_tokens))
                    segment = perturbed_tokens[start:end]
                    np.random.shuffle(segment)
                    perturbed_tokens[start:end] = segment
                
                perturbed_text = ' '.join(perturbed_tokens)
                if perturbed_text != text and len(perturbed_text.strip()) > 0:
                    perturbations.append(perturbed_text)
                    
            except Exception:
                continue
        
        return perturbations if perturbations else [text]
    
    def multi_run_detection(self, text):
        """Multiple runs of AI detection for stability"""
        try:
            if self.model is None:
                self.initialize_models()
            
            all_scores = []
            
            for run in range(Config.ZERO_SHOT_MULTIPLE_RUNS):
                # Original log-likelihood
                original_ll = self.get_log_likelihood(text)
                
                # Perturbed log-likelihoods
                perturbations = self.advanced_perturb_text(text)
                perturbed_lls = [self.get_log_likelihood(p) for p in perturbations]
                
                if perturbed_lls:
                    mean_perturbed_ll = np.mean(perturbed_lls)
                    std_perturbed_ll = np.std(perturbed_lls) if len(perturbed_lls) > 1 else 1.0
                    
                    # Enhanced scoring
                    curvature_score = (original_ll - mean_perturbed_ll) / (std_perturbed_ll + 1e-8)
                    prob = 1 / (1 + np.exp(-curvature_score * 2))
                    all_scores.append(prob)
            
            # Return average of multiple runs
            return np.mean(all_scores) if all_scores else 0.5
            
        except Exception as e:
            print(f"Error in multi-run detection: {e}")
            return 0.5
    
    def get_log_likelihood(self, text):
        """Enhanced log-likelihood calculation with caching"""
        if text in self.cache:
            return self.cache[text]
        
        try:
            # Truncate very long texts
            if len(text.split()) > 150:
                text = ' '.join(text.split()[:150])
            
            inputs = self.tokenizer(
                text,
                return_tensors='pt',
                truncation=True,
                max_length=Config.MAX_LENGTH,
                padding=True
            ).to(Config.DEVICE)
            
            with torch.no_grad():
                outputs = self.model(**inputs, labels=inputs['input_ids'])
                log_likelihood = -outputs.loss.item()
            
            self.cache[text] = log_likelihood
            return log_likelihood
            
        except Exception as e:
            return 0.0
    
    def predict(self, texts, time_budget=None):
        """Enhanced prediction with full resource utilization"""
        print(f"Running enhanced zero-shot detection on {len(texts)} samples...")
        
        start_time = time.time()
        predictions = []
        
        for i, text in enumerate(texts):
            if i % 25 == 0:
                print(f"Processing text {i+1}/{len(texts)}")
            
            # Check time budget
            if time_budget and (time.time() - start_time) > time_budget:
                print(f"Time budget reached, using default predictions for remaining samples")
                predictions.extend([0.5] * (len(texts) - i))
                break
            
            try:
                prob = self.multi_run_detection(text)
                predictions.append(prob)
            except Exception as e:
                print(f"Error processing text {i}: {e}")
                predictions.append(0.5)
        
        return np.array(predictions)

In [8]:
class TFIDFClassifier:
    """Enhanced TF-IDF classifier with model saving"""
    
    def __init__(self):
        # Multiple TF-IDF vectorizers with different parameters
        self.vectorizers = [
            TfidfVectorizer(max_features=10000, ngram_range=(1, 2), min_df=2, max_df=0.95),
            TfidfVectorizer(max_features=10000, ngram_range=(1, 3), min_df=3, max_df=0.90),
            TfidfVectorizer(max_features=8000, ngram_range=(2, 4), min_df=2, max_df=0.95, analyzer='char'),
        ]
        
        # Ensemble of classifiers
        if Config.HAS_GPU and HAS_XGB:
            self.classifiers = [
                xgb.XGBClassifier(n_estimators=300, max_depth=10, learning_rate=0.1, random_state=42),
                xgb.XGBClassifier(n_estimators=200, max_depth=8, learning_rate=0.15, random_state=43),
                LogisticRegression(random_state=42, max_iter=2000, C=0.5),
            ]
        else:
            self.classifiers = [
                LogisticRegression(random_state=42, max_iter=2000, C=1.0),
                LogisticRegression(random_state=43, max_iter=2000, C=0.5),
                LogisticRegression(random_state=44, max_iter=2000, C=2.0),
            ]
        
        self.feature_extractor = StatisticalFeatureExtractor()
        self.is_fitted = False
    
    def train(self, texts, labels):
        """Enhanced training with multiple feature sets"""
        print(f"Training enhanced TF-IDF ensemble with {len(texts)} samples...")
        
        try:
            # Extract features from all vectorizers
            all_features = []
            for i, vectorizer in enumerate(self.vectorizers):
                print(f"Training vectorizer {i+1}/{len(self.vectorizers)}")
                features = vectorizer.fit_transform(texts)
                all_features.append(features)
            
            # Extract statistical features
            stat_features = self.feature_extractor.extract_features(texts)
            
            # Train each classifier
            for i, classifier in enumerate(self.classifiers):
                print(f"Training classifier {i+1}/{len(self.classifiers)}")
                
                # Use different feature combinations for diversity
                if i < len(all_features):
                    combined_features = np.hstack([
                        all_features[i].toarray(),
                        stat_features.values
                    ])
                else:
                    # Combine all TF-IDF features for the last classifier
                    combined_tfidf = np.hstack([f.toarray() for f in all_features])
                    combined_features = np.hstack([
                        combined_tfidf,
                        stat_features.values
                    ])
                
                classifier.fit(combined_features, labels)
            
            self.is_fitted = True
            print("Enhanced TF-IDF ensemble training completed!")
            
        except Exception as e:
            print(f"Error training TF-IDF ensemble: {e}")
            raise
    
    def save_model(self, save_path):
        """Save TF-IDF models and vectorizers"""
        try:
            os.makedirs(save_path, exist_ok=True)
            
            # Save all components
            with open(os.path.join(save_path, 'tfidf_model.pkl'), 'wb') as f:
                pickle.dump({
                    'vectorizers': self.vectorizers,
                    'classifiers': self.classifiers,
                    'feature_extractor': self.feature_extractor,
                    'is_fitted': self.is_fitted
                }, f)
            
            print(f"TF-IDF model saved to {save_path}")
            
        except Exception as e:
            print(f"Error saving TF-IDF model: {e}")
    
    def predict(self, texts):
        """Enhanced prediction with ensemble voting"""
        if not self.is_fitted:
            raise ValueError("Model not fitted yet!")
        
        try:
            # Extract features
            all_features = []
            for vectorizer in self.vectorizers:
                features = vectorizer.transform(texts)
                all_features.append(features)
            
            stat_features = self.feature_extractor.extract_features(texts)
            
            # Get predictions from all classifiers
            all_predictions = []
            for i, classifier in enumerate(self.classifiers):
                if i < len(all_features):
                    combined_features = np.hstack([
                        all_features[i].toarray(),
                        stat_features.values
                    ])
                else:
                    combined_tfidf = np.hstack([f.toarray() for f in all_features])
                    combined_features = np.hstack([
                        combined_tfidf,
                        stat_features.values
                    ])
                
                if hasattr(classifier, 'predict_proba'):
                    pred = classifier.predict_proba(combined_features)[:, 1]
                else:
                    pred = classifier.predict(combined_features).astype(float)
                
                all_predictions.append(pred)
            
            # Ensemble prediction (weighted average)
            weights = [0.4, 0.3, 0.3]  # Adjust weights based on validation performance
            final_prediction = np.average(all_predictions, axis=0, weights=weights)
            
            return final_prediction
            
        except Exception as e:
            print(f"Error in TF-IDF prediction: {e}")
            return np.full(len(texts), 0.5)

In [9]:
class EnsembleClassifier:
    """Advanced ensemble with stacking and meta-learning, with saving capability"""
    
    def __init__(self):
        # Multiple meta-classifiers for robustness
        self.meta_classifiers = [
            LogisticRegression(random_state=42, max_iter=2000, C=1.0),
            LogisticRegression(random_state=43, max_iter=2000, C=0.5),
        ]
        
        if Config.HAS_GPU and HAS_XGB:
            self.meta_classifiers.append(
                xgb.XGBClassifier(n_estimators=100, max_depth=6, learning_rate=0.1, random_state=42)
            )
        
        self.component_weights = None
        self.component_names = ['DeBERTa', 'TF-IDF', 'ZeroShot']
        self.is_fitted = False
    
    def create_advanced_features(self, component_predictions):
        """Create advanced features from component predictions"""
        features = []
        
        # Original predictions
        features.extend(component_predictions)
        
        # Interaction features
        for i in range(len(component_predictions)):
            for j in range(i+1, len(component_predictions)):
                features.append(component_predictions[i] * component_predictions[j])
        
        # Statistical features
        stacked = np.column_stack(component_predictions)
        features.append(np.mean(stacked, axis=1))
        features.append(np.std(stacked, axis=1))
        features.append(np.max(stacked, axis=1))
        features.append(np.min(stacked, axis=1))
        
        # Confidence features
        for pred in component_predictions:
            confidence = np.abs(pred - 0.5)
            features.append(confidence)
        
        return np.column_stack(features)
    
    def train(self, component_predictions, labels):
        """Train advanced ensemble with multiple meta-classifiers"""
        print("Training advanced ensemble meta-classifiers...")
        
        try:
            # Create advanced features
            advanced_features = self.create_advanced_features(component_predictions)
            
            print(f"Advanced features shape: {advanced_features.shape}")
            
            # Train multiple meta-classifiers
            for i, meta_classifier in enumerate(self.meta_classifiers):
                print(f"Training meta-classifier {i+1}/{len(self.meta_classifiers)}")
                meta_classifier.fit(advanced_features, labels)
            
            self.is_fitted = True
            print("Advanced ensemble training completed!")
            
        except Exception as e:
            print(f"Error training advanced ensemble: {e}")
            raise
    
    def save_model(self, save_path):
        """Save ensemble model"""
        try:
            os.makedirs(save_path, exist_ok=True)
            
            with open(os.path.join(save_path, 'ensemble_model.pkl'), 'wb') as f:
                pickle.dump({
                    'meta_classifiers': self.meta_classifiers,
                    'component_weights': self.component_weights,
                    'component_names': self.component_names,
                    'is_fitted': self.is_fitted
                }, f)
            
            print(f"Ensemble model saved to {save_path}")
            
        except Exception as e:
            print(f"Error saving ensemble model: {e}")
    
    def predict(self, component_predictions):
        """Advanced ensemble prediction"""
        if not self.is_fitted:
            raise ValueError("Ensemble not fitted yet!")
        
        try:
            # Create advanced features
            advanced_features = self.create_advanced_features(component_predictions)
            
            # Get predictions from all meta-classifiers
            meta_predictions = []
            for meta_classifier in self.meta_classifiers:
                if hasattr(meta_classifier, 'predict_proba'):
                    pred = meta_classifier.predict_proba(advanced_features)[:, 1]
                else:
                    pred = meta_classifier.predict(advanced_features).astype(float)
                meta_predictions.append(pred)
            
            # Final ensemble of meta-classifiers
            final_prediction = np.mean(meta_predictions, axis=0)
            
            return final_prediction
            
        except Exception as e:
            print(f"Error in advanced ensemble prediction: {e}")
            return np.mean(component_predictions, axis=0)

In [10]:
class CompetitionSolution:
    """Main solution class optimized for training and saving models"""
    
    def __init__(self):
        self.deberta_classifier = DeBERTaClassifier()
        self.zero_shot_detector = ZeroShotDetector()
        self.tfidf_classifier = TFIDFClassifier()
        self.ensemble_classifier = EnsembleClassifier()
        
        self.is_trained = False
        self.start_time = None
        self.performance_metrics = {}
        
        # Create save directory
        os.makedirs(Config.MODEL_SAVE_DIR, exist_ok=True)
        
    def load_data(self):
        """Load and validate competition data"""
        print("Loading competition data...")
        
        try:
            train_df = pd.read_csv('/kaggle/input/llm-detect-ai-generated-text/train_essays.csv')
            test_df = pd.read_csv('/kaggle/input/llm-detect-ai-generated-text/test_essays.csv')
            
            print(f"Training samples: {len(train_df)}")
            print(f"Test samples: {len(test_df)}")
            
            return train_df, test_df
            
        except Exception as e:
            print(f"Error loading data: {e}")
            raise
    
    def preprocess_data(self, df):
        """Enhanced data preprocessing"""
        print("Preprocessing data...")
        
        try:
            original_len = len(df)
            
            # Clean text
            df['text'] = df['text'].fillna('')
            df['text'] = df['text'].str.strip()
            df = df[df['text'].str.len() > 10]
            df = df.drop_duplicates(subset=['text'])
            df['text'] = df['text'].apply(lambda x: re.sub(r'\s+', ' ', x))
            
            print(f"Data: {original_len} -> {len(df)} samples after preprocessing")
            
            return df
            
        except Exception as e:
            print(f"Error preprocessing data: {e}")
            raise
    
    def train_and_save_models(self, train_df):
        """Enhanced training pipeline with model saving"""
        print("Starting training and saving pipeline...")
        print(f"Target epochs: {Config.EPOCHS}")
        print(f"Target folds: {Config.N_FOLDS}")
        print(f"Save directory: {Config.MODEL_SAVE_DIR}")
        
        self.start_time = time.time()
        
        try:
            train_df = self.preprocess_data(train_df)
            texts = train_df['text'].values
            labels = train_df['generated'].values
            
            print(f"Training with {len(texts)} samples")
            print(f"Label distribution: {np.bincount(labels)}")
            
            # Maximize cross-validation folds
            skf = StratifiedKFold(n_splits=Config.N_FOLDS, shuffle=True, random_state=42)
            
            meta_predictions = []
            meta_labels = []
            
            # Process all folds to maximize resource utilization
            for fold, (train_idx, val_idx) in enumerate(skf.split(texts, labels)):
                print(f"\n=== FOLD {fold + 1}/{Config.N_FOLDS} ===")
                
                # Check time budget
                elapsed_time = time.time() - self.start_time
                remaining_time = Config.MAX_RUNTIME - elapsed_time
                
                if remaining_time < 3600:  # Less than 1 hour remaining
                    print(f"Limited time remaining ({remaining_time:.0f}s), optimizing remaining training")
                
                # Split data
                train_texts = texts[train_idx]
                train_labels = labels[train_idx]
                val_texts = texts[val_idx]
                val_labels = labels[val_idx]
                
                # Train DeBERTa with maximum epochs for all folds
                print(f"Training DeBERTa with {Config.EPOCHS} epochs...")
                self.deberta_classifier.train_with_multiple_epochs(train_texts, train_labels, val_texts, val_labels)
                
                # Train enhanced TF-IDF
                print("Training enhanced TF-IDF ensemble...")
                self.tfidf_classifier.train(train_texts, train_labels)
                
                # Get predictions
                print("Getting enhanced predictions...")
                deberta_preds = self.deberta_classifier.predict_with_tta(val_texts)
                tfidf_preds = self.tfidf_classifier.predict(val_texts)
                
                # Zero-shot with maximum sample size
                print(f"Zero-shot detection on {min(len(val_texts), Config.ZERO_SHOT_SAMPLE_SIZE)} samples...")
                zero_shot_texts = val_texts[:Config.ZERO_SHOT_SAMPLE_SIZE]
                zero_shot_preds = self.zero_shot_detector.predict(zero_shot_texts)
                
                # Pad if needed
                if len(zero_shot_preds) < len(val_texts):
                    zero_shot_preds = np.pad(
                        zero_shot_preds, 
                        (0, len(val_texts) - len(zero_shot_preds)),
                        mode='constant', 
                        constant_values=0.5
                    )
                
                # Store for meta-learning
                meta_predictions.append([deberta_preds, tfidf_preds, zero_shot_preds])
                meta_labels.extend(val_labels)
                
                fold_time = time.time() - self.start_time
                print(f"Fold {fold + 1} completed in {fold_time:.2f}s (total: {fold_time:.2f}s)")
                
                # Memory cleanup
                gc.collect()
                if Config.HAS_GPU:
                    torch.cuda.empty_cache()
            
            # Train advanced ensemble
            print("Training advanced ensemble meta-classifiers...")
            all_deberta_preds = np.concatenate([preds[0] for preds in meta_predictions])
            all_tfidf_preds = np.concatenate([preds[1] for preds in meta_predictions])
            all_zero_shot_preds = np.concatenate([preds[2] for preds in meta_predictions])
            
            self.ensemble_classifier.train(
                [all_deberta_preds, all_tfidf_preds, all_zero_shot_preds],
                np.array(meta_labels)
            )
            
            # Save all models
            print("\n=== SAVING MODELS ===")
            self.deberta_classifier.save_model(os.path.join(Config.MODEL_SAVE_DIR, "deberta"))
            self.zero_shot_detector.save_model(os.path.join(Config.MODEL_SAVE_DIR, "zero_shot"))
            self.tfidf_classifier.save_model(os.path.join(Config.MODEL_SAVE_DIR, "tfidf"))
            self.ensemble_classifier.save_model(os.path.join(Config.MODEL_SAVE_DIR, "ensemble"))
            
            # Save configuration and metadata
            with open(os.path.join(Config.MODEL_SAVE_DIR, 'config.pkl'), 'wb') as f:
                pickle.dump({
                    'Config': {
                        'DEBERTA_MODEL': Config.DEBERTA_MODEL,
                        'LLM_MODEL': Config.LLM_MODEL,
                        'BATCH_SIZE': Config.BATCH_SIZE,
                        'EPOCHS': Config.EPOCHS,
                        'N_FOLDS': Config.N_FOLDS,
                        'MAX_LENGTH': Config.MAX_LENGTH,
                        'LEARNING_RATE': Config.LEARNING_RATE,
                        'HAS_GPU': Config.HAS_GPU,
                        'DEVICE': str(Config.DEVICE)
                    }
                }, f)
            
            self.is_trained = True
            
            total_time = time.time() - self.start_time
            self.performance_metrics = {
                'total_training_time': total_time,
                'time_utilization': total_time / Config.MAX_RUNTIME,
                'epochs_trained': Config.EPOCHS,
                'folds_completed': Config.N_FOLDS,
                'samples_processed': len(texts) * Config.N_FOLDS
            }
            
            print(f"\n=== TRAINING AND SAVING COMPLETED ===")
            print(f"Total time: {total_time:.2f}s ({total_time/3600:.2f}h)")
            print(f"Time utilization: {self.performance_metrics['time_utilization']:.1%}")
            print(f"Epochs trained: {Config.EPOCHS}")
            print(f"Folds completed: {Config.N_FOLDS}")
            print(f"Models saved to: {Config.MODEL_SAVE_DIR}")
            
        except Exception as e:
            print(f"Error during training: {e}")
            raise
    
    def run_training_only(self):
        """Run training and save models without making predictions"""
        print("=" * 60)
        print("LLM AI Detection Competition - MODEL TRAINING PHASE")
        print("=" * 60)
        print(f"Target runtime: {Config.MAX_RUNTIME/3600:.1f} hours")
        print(f"Running on: {Config.DEVICE}")
        print(f"GPU Available: {Config.HAS_GPU}")
        print(f"Model: {Config.DEBERTA_MODEL}")
        print(f"Epochs: {Config.EPOCHS}")
        print(f"Folds: {Config.N_FOLDS}")
        print(f"Batch size: {Config.BATCH_SIZE}")
        print(f"Save directory: {Config.MODEL_SAVE_DIR}")
        print("=" * 60)
        
        try:
            # Load data
            train_df, _ = self.load_data()
            
            # Train and save models
            self.train_and_save_models(train_df)
            
            total_time = time.time() - self.start_time
            
            print("=" * 60)
            print("MODEL TRAINING COMPLETED!")
            print("=" * 60)
            print(f"Total runtime: {total_time:.2f}s ({total_time/3600:.2f}h)")
            print(f"Resource utilization: {total_time/Config.MAX_RUNTIME:.1%}")
            print(f"Performance metrics: {self.performance_metrics}")
            print(f"Models saved to: {Config.MODEL_SAVE_DIR}")
            print("=" * 60)
            print("Ready for inference in separate notebook!")
            print("=" * 60)
            
        except Exception as e:
            print(f"Error in training runner: {e}")
            raise

In [11]:
class StatisticalFeatureExtractor:
    """Enhanced statistical feature extraction"""
    
    def __init__(self):
        self.feature_names = []
    
    def extract_features(self, texts):
        """Extract comprehensive statistical features"""
        features = []
        
        for text in texts:
            try:
                text_features = {}
                
                # Basic statistics
                text_features['length'] = len(text)
                words = text.split()
                text_features['word_count'] = len(words)
                sentences = re.split(r'[.!?]+', text)
                text_features['sentence_count'] = len([s for s in sentences if s.strip()])
                
                # Word-level statistics
                if words:
                    text_features['avg_word_length'] = np.mean([len(word) for word in words])
                    text_features['max_word_length'] = max(len(word) for word in words)
                    text_features['unique_word_ratio'] = len(set(words)) / len(words)
                else:
                    text_features['avg_word_length'] = 0
                    text_features['max_word_length'] = 0
                    text_features['unique_word_ratio'] = 0
                
                # Punctuation and character statistics
                text_features['punct_ratio'] = len(re.findall(r'[^\w\s]', text)) / max(len(text), 1)
                text_features['caps_ratio'] = len(re.findall(r'[A-Z]', text)) / max(len(text), 1)
                text_features['digit_ratio'] = len(re.findall(r'\d', text)) / max(len(text), 1)
                
                # Sentence-level statistics
                if text_features['sentence_count'] > 0:
                    text_features['avg_sentence_length'] = text_features['word_count'] / text_features['sentence_count']
                else:
                    text_features['avg_sentence_length'] = 0
                
                # Linguistic features
                text_features['question_count'] = text.count('?')
                text_features['exclamation_count'] = text.count('!')
                text_features['comma_count'] = text.count(',')
                
                # Readability features
                complex_words = [word for word in words if len(word) > 6]
                text_features['complex_word_ratio'] = len(complex_words) / max(len(words), 1)
                
                features.append(text_features)
                
            except Exception as e:
                print(f"Error extracting features: {e}")
                features.append({
                    'length': 0, 'word_count': 0, 'sentence_count': 0,
                    'avg_word_length': 0, 'max_word_length': 0, 'unique_word_ratio': 0,
                    'punct_ratio': 0, 'caps_ratio': 0, 'digit_ratio': 0,
                    'avg_sentence_length': 0, 'question_count': 0,
                    'exclamation_count': 0, 'comma_count': 0, 'complex_word_ratio': 0
                })
        
        df = pd.DataFrame(features)
        self.feature_names = df.columns.tolist()
        return df

In [12]:
# Main execution
if __name__ == "__main__":
    # Initialize and run training only
    solution = CompetitionSolution()
    solution.run_training_only()
    
    print("\n" + "="*60)
    print("TRAINING PHASE COMPLETED!")
    print("All models saved successfully.")
    print("Ready to run inference in a separate notebook.")
    print("="*60)

LLM AI Detection Competition - MODEL TRAINING PHASE
Target runtime: 9.0 hours
Running on: cuda
GPU Available: True
Model: microsoft/deberta-v3-base
Epochs: 12
Folds: 10
Batch size: 16
Save directory: ./saved_models
Loading competition data...
Training samples: 1378
Test samples: 3
Starting training and saving pipeline...
Target epochs: 12
Target folds: 10
Save directory: ./saved_models
Preprocessing data...
Data: 1378 -> 1378 samples after preprocessing
Training with 1378 samples
Label distribution: [1375    3]

=== FOLD 1/10 ===
Training DeBERTa with 12 epochs...
Training DeBERTa with 12 epochs on 1240 samples...
Loading model: microsoft/deberta-v3-base


tokenizer_config.json:   0%|          | 0.00/52.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/579 [00:00<?, ?B/s]

spm.model:   0%|          | 0.00/2.46M [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/371M [00:00<?, ?B/s]

Some weights of DebertaV2ForSequenceClassification were not initialized from the model checkpoint at microsoft/deberta-v3-base and are newly initialized: ['classifier.bias', 'classifier.weight', 'pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Model loaded successfully on cuda


Epoch,Training Loss,Validation Loss
1,0.0759,0.001612
2,0.0235,0.000911
3,0.0475,0.000654
4,0.0283,0.000282
5,0.0006,0.000171
6,0.0004,0.000164
7,0.0004,0.000157
8,0.003,0.000394
9,0.0056,8.9e-05
10,0.0024,8e-05


Enhanced DeBERTa training completed in 1236.40 seconds!
Training enhanced TF-IDF ensemble...
Training enhanced TF-IDF ensemble with 1240 samples...
Training vectorizer 1/3
Training vectorizer 2/3
Training vectorizer 3/3
Training classifier 1/3
Training classifier 2/3
Training classifier 3/3
Enhanced TF-IDF ensemble training completed!
Getting enhanced predictions...
Making enhanced predictions on 138 samples...
Zero-shot detection on 138 samples...
Running enhanced zero-shot detection on 138 samples...
Processing text 1/138
Loading enhanced zero-shot model: microsoft/DialoGPT-medium


tokenizer_config.json:   0%|          | 0.00/614 [00:00<?, ?B/s]

vocab.json: 0.00B [00:00, ?B/s]

merges.txt: 0.00B [00:00, ?B/s]

config.json:   0%|          | 0.00/642 [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/863M [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/124 [00:00<?, ?B/s]

Enhanced zero-shot model loaded successfully
Processing text 26/138
Processing text 51/138
Processing text 76/138
Processing text 101/138
Processing text 126/138
Fold 1 completed in 1557.50s (total: 1557.50s)

=== FOLD 2/10 ===
Training DeBERTa with 12 epochs...
Training DeBERTa with 12 epochs on 1240 samples...


Epoch,Training Loss,Validation Loss
1,0.0023,0.005954
2,0.0015,2e-06
3,0.04,3.5e-05
4,0.0001,6e-06
5,0.0,4e-06
6,0.0,3e-06
7,0.0,2e-06
8,0.0,2e-06
9,0.0,2e-06
10,0.0,2e-06


Enhanced DeBERTa training completed in 1222.33 seconds!
Training enhanced TF-IDF ensemble...
Training enhanced TF-IDF ensemble with 1240 samples...
Training vectorizer 1/3
Training vectorizer 2/3
Training vectorizer 3/3
Training classifier 1/3
Training classifier 2/3
Training classifier 3/3
Enhanced TF-IDF ensemble training completed!
Getting enhanced predictions...
Making enhanced predictions on 138 samples...
Zero-shot detection on 138 samples...
Running enhanced zero-shot detection on 138 samples...
Processing text 1/138
Processing text 26/138
Processing text 51/138
Processing text 76/138
Processing text 101/138
Processing text 126/138
Fold 2 completed in 3083.67s (total: 3083.67s)

=== FOLD 3/10 ===
Training DeBERTa with 12 epochs...
Training DeBERTa with 12 epochs on 1240 samples...


Epoch,Training Loss,Validation Loss
1,0.0,0.0
2,0.0,0.0
3,0.0,0.0
4,0.0,0.0
5,0.0,0.0
6,0.0,0.0
7,0.0,0.0
8,0.0,0.0
9,0.0,0.0
10,0.0,0.0


Enhanced DeBERTa training completed in 1223.51 seconds!
Training enhanced TF-IDF ensemble...
Training enhanced TF-IDF ensemble with 1240 samples...
Training vectorizer 1/3
Training vectorizer 2/3
Training vectorizer 3/3
Training classifier 1/3
Training classifier 2/3
Training classifier 3/3
Enhanced TF-IDF ensemble training completed!
Getting enhanced predictions...
Making enhanced predictions on 138 samples...
Zero-shot detection on 138 samples...
Running enhanced zero-shot detection on 138 samples...
Processing text 1/138
Processing text 26/138
Processing text 51/138
Processing text 76/138
Processing text 101/138
Processing text 126/138
Fold 3 completed in 4619.59s (total: 4619.59s)

=== FOLD 4/10 ===
Training DeBERTa with 12 epochs...
Training DeBERTa with 12 epochs on 1240 samples...


Epoch,Training Loss,Validation Loss
1,0.0,0.0
2,0.0,0.0
3,0.0,0.0
4,0.0,0.0
5,0.0,0.0
6,0.0,0.0
7,0.0,0.0
8,0.0,0.0
9,0.0,0.0
10,0.0,0.0


Enhanced DeBERTa training completed in 1221.00 seconds!
Training enhanced TF-IDF ensemble...
Training enhanced TF-IDF ensemble with 1240 samples...
Training vectorizer 1/3
Training vectorizer 2/3
Training vectorizer 3/3
Training classifier 1/3
Training classifier 2/3
Training classifier 3/3
Enhanced TF-IDF ensemble training completed!
Getting enhanced predictions...
Making enhanced predictions on 138 samples...
Zero-shot detection on 138 samples...
Running enhanced zero-shot detection on 138 samples...
Processing text 1/138
Processing text 26/138
Processing text 51/138
Processing text 76/138
Processing text 101/138
Processing text 126/138
Fold 4 completed in 6150.60s (total: 6150.60s)

=== FOLD 5/10 ===
Training DeBERTa with 12 epochs...
Training DeBERTa with 12 epochs on 1240 samples...


Epoch,Training Loss,Validation Loss
1,0.0,0.0
2,0.0,0.0
3,0.0,0.0
4,0.0,0.0
5,0.0,0.0
6,0.0,0.0
7,0.0,0.0
8,0.0,0.0
9,0.0,0.0
10,0.0,0.0


Enhanced DeBERTa training completed in 1224.22 seconds!
Training enhanced TF-IDF ensemble...
Training enhanced TF-IDF ensemble with 1240 samples...
Training vectorizer 1/3
Training vectorizer 2/3
Training vectorizer 3/3
Training classifier 1/3
Training classifier 2/3
Training classifier 3/3
Enhanced TF-IDF ensemble training completed!
Getting enhanced predictions...
Making enhanced predictions on 138 samples...
Zero-shot detection on 138 samples...
Running enhanced zero-shot detection on 138 samples...
Processing text 1/138
Processing text 26/138
Processing text 51/138
Processing text 76/138
Processing text 101/138
Processing text 126/138
Fold 5 completed in 7689.92s (total: 7689.92s)

=== FOLD 6/10 ===
Training DeBERTa with 12 epochs...
Training DeBERTa with 12 epochs on 1240 samples...


Epoch,Training Loss,Validation Loss
1,0.0,0.0
2,0.0,0.0
3,0.0,0.0
4,0.0,0.0
5,0.0,0.0
6,0.0,0.0
7,0.0,0.0
8,0.0,0.0
9,0.0,0.0
10,0.0,0.0


Enhanced DeBERTa training completed in 1224.97 seconds!
Training enhanced TF-IDF ensemble...
Training enhanced TF-IDF ensemble with 1240 samples...
Training vectorizer 1/3
Training vectorizer 2/3
Training vectorizer 3/3
Training classifier 1/3
Training classifier 2/3
Training classifier 3/3
Enhanced TF-IDF ensemble training completed!
Getting enhanced predictions...
Making enhanced predictions on 138 samples...
Zero-shot detection on 138 samples...
Running enhanced zero-shot detection on 138 samples...
Processing text 1/138
Processing text 26/138
Processing text 51/138
Processing text 76/138
Processing text 101/138
Processing text 126/138
Fold 6 completed in 9223.96s (total: 9223.96s)

=== FOLD 7/10 ===
Training DeBERTa with 12 epochs...
Training DeBERTa with 12 epochs on 1240 samples...


Epoch,Training Loss,Validation Loss
1,0.0,0.0
2,0.0,0.0
3,0.0,0.0
4,0.0,0.0
5,0.0,0.0
6,0.0,0.0
7,0.0,0.0
8,0.0,0.0
9,0.0,0.0
10,0.0,0.0


Enhanced DeBERTa training completed in 1222.01 seconds!
Training enhanced TF-IDF ensemble...
Training enhanced TF-IDF ensemble with 1240 samples...
Training vectorizer 1/3
Training vectorizer 2/3
Training vectorizer 3/3
Training classifier 1/3
Training classifier 2/3
Training classifier 3/3
Enhanced TF-IDF ensemble training completed!
Getting enhanced predictions...
Making enhanced predictions on 138 samples...
Zero-shot detection on 138 samples...
Running enhanced zero-shot detection on 138 samples...
Processing text 1/138
Processing text 26/138
Processing text 51/138
Processing text 76/138
Processing text 101/138
Processing text 126/138
Fold 7 completed in 10759.02s (total: 10759.02s)

=== FOLD 8/10 ===
Training DeBERTa with 12 epochs...
Training DeBERTa with 12 epochs on 1240 samples...


Epoch,Training Loss,Validation Loss
1,0.0,0.0
2,0.0,0.0
3,0.0,0.0
4,0.0,0.0
5,0.0,0.0
6,0.0,0.0
7,0.0,0.0
8,0.0,0.0
9,0.0,0.0
10,0.0,0.0


Enhanced DeBERTa training completed in 1223.59 seconds!
Training enhanced TF-IDF ensemble...
Training enhanced TF-IDF ensemble with 1240 samples...
Training vectorizer 1/3
Training vectorizer 2/3
Training vectorizer 3/3
Training classifier 1/3
Training classifier 2/3
Training classifier 3/3
Enhanced TF-IDF ensemble training completed!
Getting enhanced predictions...
Making enhanced predictions on 138 samples...
Zero-shot detection on 138 samples...
Running enhanced zero-shot detection on 138 samples...
Processing text 1/138
Processing text 26/138
Processing text 51/138
Processing text 76/138
Processing text 101/138
Processing text 126/138
Fold 8 completed in 12293.64s (total: 12293.64s)

=== FOLD 9/10 ===
Training DeBERTa with 12 epochs...
Training DeBERTa with 12 epochs on 1241 samples...


Epoch,Training Loss,Validation Loss
1,0.0,0.0
2,0.0,0.0
3,0.0,0.0
4,0.0,0.0
5,0.0,0.0
6,0.0,0.0
7,0.0,0.0
8,0.0,0.0
9,0.0,0.0
10,0.0,0.0


Enhanced DeBERTa training completed in 1222.37 seconds!
Training enhanced TF-IDF ensemble...
Training enhanced TF-IDF ensemble with 1241 samples...
Training vectorizer 1/3
Training vectorizer 2/3
Training vectorizer 3/3
Training classifier 1/3
Training classifier 2/3
Training classifier 3/3
Enhanced TF-IDF ensemble training completed!
Getting enhanced predictions...
Making enhanced predictions on 137 samples...
Zero-shot detection on 137 samples...
Running enhanced zero-shot detection on 137 samples...
Processing text 1/137
Processing text 26/137
Processing text 51/137
Processing text 76/137
Processing text 101/137
Processing text 126/137
Fold 9 completed in 13822.89s (total: 13822.89s)

=== FOLD 10/10 ===
Training DeBERTa with 12 epochs...
Training DeBERTa with 12 epochs on 1241 samples...


Epoch,Training Loss,Validation Loss
1,0.0,0.0
2,0.0,0.0
3,0.0,0.0
4,0.0,0.0
5,0.0,0.0
6,0.0,0.0
7,0.0,0.0
8,0.0,0.0
9,0.0,0.0
10,0.0,0.0


Enhanced DeBERTa training completed in 1227.57 seconds!
Training enhanced TF-IDF ensemble...
Training enhanced TF-IDF ensemble with 1241 samples...
Training vectorizer 1/3
Training vectorizer 2/3
Training vectorizer 3/3
Training classifier 1/3
Training classifier 2/3
Training classifier 3/3
Enhanced TF-IDF ensemble training completed!
Getting enhanced predictions...
Making enhanced predictions on 137 samples...
Zero-shot detection on 137 samples...
Running enhanced zero-shot detection on 137 samples...
Processing text 1/137
Processing text 26/137
Processing text 51/137
Processing text 76/137
Processing text 101/137
Processing text 126/137
Fold 10 completed in 15354.78s (total: 15354.78s)
Training advanced ensemble meta-classifiers...
Training advanced ensemble meta-classifiers...
Advanced features shape: (1378, 13)
Training meta-classifier 1/3
Training meta-classifier 2/3
Training meta-classifier 3/3
Advanced ensemble training completed!

=== SAVING MODELS ===
DeBERTa model saved to ./