In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

/kaggle/input/cnn-lstm-training-output/cnn_training_output/cnn_training_output/confusion_matrix.png
/kaggle/input/cnn-lstm-training-output/cnn_training_output/cnn_training_output/best_plasma_cnn_model.pth
/kaggle/input/cnn-lstm-training-output/cnn_training_output/cnn_training_output/training_history.png
/kaggle/input/cnn-lstm-training-output/cnn_training_output/cnn_training_output/roc_curve.png
/kaggle/input/cnn-lstm-training-output/snn_training_output/snn_training_output/working_snn_model.pth
/kaggle/input/cnn-lstm-training-output/snn_training_output/snn_training_output/working_snn_results.json
/kaggle/input/cnn-lstm-training-output/lstm_training_output/lstm_training_output/lstm_training_results.png
/kaggle/input/cnn-lstm-training-output/lstm_training_output/lstm_training_output/best_plasma_lstm_model.pth


In [4]:
!pip install snntorch

Collecting snntorch
  Downloading snntorch-0.9.4-py2.py3-none-any.whl.metadata (15 kB)
Downloading snntorch-0.9.4-py2.py3-none-any.whl (125 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m125.6/125.6 kB[0m [31m2.8 MB/s[0m eta [36m0:00:00[0m00:01[0m
[?25hInstalling collected packages: snntorch
Successfully installed snntorch-0.9.4


In [13]:
"""
Proven Hybrid Ensemble - Working SNN (84.2%) + Optimized CNN
Build on confirmed 83.6% baseline to achieve >95% target
"""

import torch
import torch.nn as nn
import torch.nn.functional as F
import snntorch as snn
from snntorch import surrogate
import numpy as np
from sklearn.metrics import classification_report
from torch.utils.data import Dataset, DataLoader, WeightedRandomSampler
import time
import json
from pathlib import Path
import warnings
import random
warnings.filterwarnings('ignore')

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"🔧 Proven Hybrid Ensemble on: {device}")

# ... existing PlasmaLikeDataset class ...

class WorkingSNN(nn.Module):
    """Proven WorkingSNN (84.2% balanced accuracy) - EXACT replica"""
    def __init__(self, input_size=512, hidden_size=256, output_size=2):
        super(WorkingSNN, self).__init__()
        
        self.input_size = input_size
        self.hidden_size = hidden_size
        
        # EXACT architecture - confirmed working
        self.fc1 = nn.Linear(input_size, hidden_size)
        self.fc2 = nn.Linear(hidden_size, hidden_size // 2)
        self.fc3 = nn.Linear(hidden_size // 2, output_size)
        
        # EXACT parameters - beta=0.8 (not 0.9)
        self.lif1 = snn.Leaky(beta=0.8, spike_grad=surrogate.straight_through_estimator())
        self.lif2 = snn.Leaky(beta=0.8, spike_grad=surrogate.straight_through_estimator())
        self.lif3 = snn.Leaky(beta=0.8, spike_grad=surrogate.straight_through_estimator(), output=True)
        
        # EXACT initialization
        for layer in [self.fc1, self.fc2, self.fc3]:
            nn.init.normal_(layer.weight, mean=0.0, std=0.01)
            nn.init.zeros_(layer.bias)
        
        self.dropout = nn.Dropout(0.1)
        
    def forward(self, x):
        """EXACT forward pass - confirmed working"""
        mem1 = self.lif1.init_leaky()
        mem2 = self.lif2.init_leaky()
        mem3 = self.lif3.init_leaky()
        
        cur1 = self.fc1(x)
        spk1, mem1 = self.lif1(cur1, mem1)
        spk1 = self.dropout(spk1)
        
        cur2 = self.fc2(spk1)
        spk2, mem2 = self.lif2(cur2, mem2)
        spk2 = self.dropout(spk2)
        
        cur3 = self.fc3(spk2)
        spk3, mem3 = self.lif3(cur3, mem3)
        
        return mem3

class OptimizedFeatureClassifier(nn.Module):
    """Optimized feature classifier for ensemble enhancement"""
    def __init__(self, input_size=512, output_size=2):
        super(OptimizedFeatureClassifier, self).__init__()
        
        self.classifier = nn.Sequential(
            nn.Linear(input_size, 256),
            nn.ReLU(),
            nn.BatchNorm1d(256),
            nn.Dropout(0.3),
            
            nn.Linear(256, 128),
            nn.ReLU(),
            nn.BatchNorm1d(128),
            nn.Dropout(0.2),
            
            nn.Linear(128, 64),
            nn.ReLU(),
            nn.BatchNorm1d(64),
            nn.Dropout(0.1),
            
            nn.Linear(64, output_size)
        )
        
        # Xavier initialization for better performance
        for module in self.modules():
            if isinstance(module, nn.Linear):
                nn.init.xavier_normal_(module.weight)
                nn.init.zeros_(module.bias)
        
        print("✅ OptimizedFeatureClassifier: Deep feature processing")
    
    def forward(self, x):
        return self.classifier(x)

class ProvenHybridEnsemble(nn.Module):
    """Proven hybrid: Working SNN (84.2%) + Optimized Feature Classifier"""
    def __init__(self):
        super(ProvenHybridEnsemble, self).__init__()
        
        # Proven working SNN
        self.snn = WorkingSNN(input_size=512, hidden_size=256, output_size=2)
        
        # Optimized feature classifier
        self.feature_classifier = OptimizedFeatureClassifier(input_size=512, output_size=2)
        
        # Load proven SNN weights
        self._load_proven_snn_weights()
        
        # Conservative ensemble weights (favor proven SNN)
        self.snn_weight = 0.75  # 75% weight to proven component
        self.fc_weight = 0.25   # 25% weight to optimized component
        
        print("🚀 ProvenHybridEnsemble: Proven SNN + Optimized Classifier")
        print(f"   SNN: PROVEN 84.2% baseline (weight: {self.snn_weight})")
        print(f"   Classifier: Optimized deep features (weight: {self.fc_weight})")
        print(f"   Strategy: Build on proven foundation to reach >95%")
        
    def _load_proven_snn_weights(self):
        """Load the proven working SNN weights"""
        try:
            working_snn_path = Path("/kaggle/input/cnn-lstm-training-output/snn_training_output/snn_training_output/working_snn_model.pth")
            if working_snn_path.exists():
                snn_state = torch.load(working_snn_path, map_location=device, weights_only=True)
                self.snn.load_state_dict(snn_state, strict=False)
                print("✅ Proven SNN weights loaded successfully!")
                print("   Expected SNN performance: 84.2% balanced accuracy")
            else:
                print(f"❌ Proven SNN weights not found at: {working_snn_path}")
        except Exception as e:
            print(f"❌ Error loading proven SNN weights: {e}")
        
    def forward(self, x):
        """Ensemble forward pass"""
        # Proven SNN path
        snn_output = self.snn(x)
        
        # Optimized classifier path
        fc_output = self.feature_classifier(x)
        
        # Conservative weighted ensemble
        ensemble_output = self.snn_weight * snn_output + self.fc_weight * fc_output
        
        return ensemble_output, snn_output, fc_output
    
    def predict_anomaly(self, features):
        """Enhanced prediction with component analysis"""
        self.eval()
        with torch.no_grad():
            ensemble_outputs, snn_outputs, fc_outputs = self.forward(features)
            
            # Ensemble predictions
            ensemble_probs = torch.softmax(ensemble_outputs, dim=1)
            ensemble_preds = ensemble_outputs.argmax(dim=1)
            
            # Component predictions for analysis
            snn_probs = torch.softmax(snn_outputs, dim=1)
            snn_preds = snn_outputs.argmax(dim=1)
            
            fc_probs = torch.softmax(fc_outputs, dim=1)
            fc_preds = fc_outputs.argmax(dim=1)
            
            return ensemble_preds, ensemble_probs, snn_preds, snn_probs, fc_preds, fc_probs

def train_optimized_classifier(model, train_loader, val_loader, num_epochs=10):
    """Train only the feature classifier while keeping proven SNN frozen"""
    print("\n🧠 Training optimized feature classifier...")
    print("   SNN weights: FROZEN (preserving 84.2% baseline)")
    print("   Training: Feature classifier only")
    
    # Freeze proven SNN
    for param in model.snn.parameters():
        param.requires_grad = False
    
    # Train only feature classifier
    optimizer = torch.optim.AdamW(
        model.feature_classifier.parameters(),
        lr=0.001,
        weight_decay=1e-4
    )
    
    criterion = nn.CrossEntropyLoss()
    scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=num_epochs)
    
    best_accuracy = 0
    
    for epoch in range(num_epochs):
        # Training
        model.train()
        for param in model.snn.parameters():
            param.requires_grad = False  # Keep SNN frozen
        
        total_loss = 0
        correct = 0
        total = 0
        
        for features, labels in train_loader:
            features, labels = features.to(device), labels.to(device)
            
            optimizer.zero_grad()
            
            # Only train feature classifier
            fc_output = model.feature_classifier(features)
            loss = criterion(fc_output, labels)
            
            loss.backward()
            optimizer.step()
            
            total_loss += loss.item()
            predicted = fc_output.argmax(dim=1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
        
        # Validation
        model.eval()
        val_correct = 0
        val_total = 0
        
        with torch.no_grad():
            for features, labels in val_loader:
                features, labels = features.to(device), labels.to(device)
                fc_output = model.feature_classifier(features)
                predicted = fc_output.argmax(dim=1)
                val_total += labels.size(0)
                val_correct += (predicted == labels).sum().item()
        
        train_acc = 100. * correct / total
        val_acc = 100. * val_correct / val_total
        
        if val_acc > best_accuracy:
            best_accuracy = val_acc
        
        scheduler.step()
        
        print(f"   Epoch {epoch+1}/{num_epochs}: Train Acc: {train_acc:.1f}%, Val Acc: {val_acc:.1f}%")
    
    print(f"✅ Feature classifier training complete! Best Val Acc: {best_accuracy:.1f}%")

def main():
    """Proven hybrid ensemble for >95% accuracy target"""
    print("🚀 PROVEN HYBRID ENSEMBLE FOR >95% TARGET")
    print("=" * 60)
    print("🎯 Goal: Build on proven 84.2% SNN baseline to reach >95%")
    print("🧠 Architecture: Proven SNN (75%) + Optimized Classifier (25%)")
    print("🔧 Strategy: Conservative ensemble preserving proven foundation")
    print("=" * 60)
    
    output_dir = Path("proven_hybrid_output")
    output_dir.mkdir(exist_ok=True)
    
    # Create datasets
    print("\n📊 Creating plasma-like datasets...")
    train_dataset = PlasmaLikeDataset(num_samples=3000, input_size=512, anomaly_ratio=0.3)
    val_dataset = PlasmaLikeDataset(num_samples=800, input_size=512, anomaly_ratio=0.3)
    test_dataset = PlasmaLikeDataset(num_samples=1000, input_size=512, anomaly_ratio=0.3)
    
    train_sampler = train_dataset.get_balanced_sampler()
    
    train_loader = DataLoader(train_dataset, batch_size=32, sampler=train_sampler)
    val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)
    test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)
    
    # Initialize proven hybrid model
    print("\n🔧 Initializing proven hybrid ensemble...")
    model = ProvenHybridEnsemble().to(device)
    
    # Train optimized classifier (keep SNN frozen)
    train_optimized_classifier(model, train_loader, val_loader, num_epochs=10)
    
    # Test ensemble
    print("\n🧪 Testing proven hybrid ensemble...")
    model.eval()
    
    start_time = time.perf_counter()
    
    all_ensemble_predictions = []
    all_ensemble_probabilities = []
    all_snn_predictions = []
    all_snn_probabilities = []
    all_labels = []
    
    with torch.no_grad():
        for features, labels in test_loader:
            features, labels = features.to(device), labels.to(device)
            
            ensemble_preds, ensemble_probs, snn_preds, snn_probs, fc_preds, fc_probs = model.predict_anomaly(features)
            
            all_ensemble_predictions.extend(ensemble_preds.cpu().numpy())
            all_ensemble_probabilities.extend(ensemble_probs.cpu().numpy())
            all_snn_predictions.extend(snn_preds.cpu().numpy())
            all_snn_probabilities.extend(snn_probs.cpu().numpy())
            all_labels.extend(labels.cpu().numpy())
    
    end_time = time.perf_counter()
    inference_time = (end_time - start_time) * 1000
    
    # Calculate metrics
    ensemble_predictions = np.array(all_ensemble_predictions)
    snn_predictions = np.array(all_snn_predictions)
    true_labels = np.array(all_labels)
    
    # Ensemble metrics
    ensemble_accuracy = (ensemble_predictions == true_labels).mean() * 100
    normal_mask = true_labels == 0
    anomaly_mask = true_labels == 1
    
    ensemble_normal_acc = (ensemble_predictions[normal_mask] == true_labels[normal_mask]).mean() * 100
    ensemble_anomaly_acc = (ensemble_predictions[anomaly_mask] == true_labels[anomaly_mask]).mean() * 100
    ensemble_balanced_acc = (ensemble_normal_acc + ensemble_anomaly_acc) / 2
    
    # SNN component validation
    snn_accuracy = (snn_predictions == true_labels).mean() * 100
    snn_normal_acc = (snn_predictions[normal_mask] == true_labels[normal_mask]).mean() * 100
    snn_anomaly_acc = (snn_predictions[anomaly_mask] == true_labels[anomaly_mask]).mean() * 100
    snn_balanced_acc = (snn_normal_acc + snn_anomaly_acc) / 2
    
    # Results
    print(f"\n🎉 PROVEN HYBRID ENSEMBLE RESULTS")
    print("=" * 55)
    print(f"📊 Ensemble Performance:")
    print(f"   Overall Accuracy: {ensemble_accuracy:.1f}%")
    print(f"   Balanced Accuracy: {ensemble_balanced_acc:.1f}%")
    print(f"   Normal Class: {ensemble_normal_acc:.1f}%")
    print(f"   Anomaly Class: {ensemble_anomaly_acc:.1f}%")
    
    print(f"\n🧠 SNN Component Validation:")
    print(f"   Balanced Accuracy: {snn_balanced_acc:.1f}%")
    print(f"   Normal Class: {snn_normal_acc:.1f}%")
    print(f"   Anomaly Class: {snn_anomaly_acc:.1f}%")
    
    avg_latency = inference_time/len(all_ensemble_predictions)
    latency_met = avg_latency < 1.0
    
    print(f"\n⚡ Inference Performance:")
    print(f"   Per Sample: {avg_latency:.3f} ms")
    print(f"   Latency Target (<1ms): {'✅ MET' if latency_met else '❌ MISSED'}")
    
    # Target achievement
    accuracy_target = 95.0
    target_met = ensemble_balanced_acc >= accuracy_target
    
    print(f"\n🎯 PROJECT TARGET ANALYSIS:")
    print(f"Target (>95%): {'🎉 ACHIEVED' if target_met else '❌ NOT MET'} ({ensemble_balanced_acc:.1f}%)")
    print(f"Improvement: {ensemble_balanced_acc - 84.2:+.1f}% over proven SNN baseline")
    
    # Overall status
    if target_met and latency_met:
        status = "🎉 COMPLETE SUCCESS: >95% accuracy + <1ms latency achieved!"
    elif target_met:
        status = "✅ ACCURACY SUCCESS: >95% target achieved!"
    elif ensemble_balanced_acc > 90:
        status = "⚡ SIGNIFICANT IMPROVEMENT: Close to >95% target"
    else:
        status = "⚠️ NEEDS FURTHER OPTIMIZATION"
    
    print(f"\n{status}")
    
    # Save results
    results = {
        'ensemble_balanced_accuracy': float(ensemble_balanced_acc),
        'snn_baseline_maintained': float(snn_balanced_acc),
        'improvement_over_baseline': float(ensemble_balanced_acc - 84.2),
        'target_achieved': bool(target_met),
        'latency_met': bool(latency_met),
        'avg_latency_ms': float(avg_latency),
        'status': status
    }
    
    with open(output_dir / "proven_hybrid_results.json", 'w') as f:
        json.dump(results, f, indent=2)
    
    print(f"📁 Results saved to: {output_dir}")
    
    return results

if __name__ == "__main__":
    torch.manual_seed(42)
    np.random.seed(42)
    random.seed(42)
    
    try:
        results = main()
        print("\n✅ Proven hybrid ensemble testing completed!")
    except Exception as e:
        print(f"\n❌ Error during proven hybrid testing: {e}")
        raise

🔧 Proven Hybrid Ensemble on: cuda
🚀 PROVEN HYBRID ENSEMBLE FOR >95% TARGET
🎯 Goal: Build on proven 84.2% SNN baseline to reach >95%
🧠 Architecture: Proven SNN (75%) + Optimized Classifier (25%)
🔧 Strategy: Conservative ensemble preserving proven foundation

📊 Creating plasma-like datasets...
🔄 Creating plasma-like dataset: 3000 samples, 30.0% anomalies
   Normal patterns: mean=-0.003, std=0.514
   Anomaly patterns: mean=0.284, std=1.205
   Class distribution: {0: 2100, 1: 900}
🔄 Creating plasma-like dataset: 800 samples, 30.0% anomalies
   Normal patterns: mean=-0.002, std=0.514
   Anomaly patterns: mean=0.283, std=1.210
   Class distribution: {0: 560, 1: 240}
🔄 Creating plasma-like dataset: 1000 samples, 30.0% anomalies
   Normal patterns: mean=0.002, std=0.514
   Anomaly patterns: mean=0.287, std=1.214
   Class distribution: {0: 700, 1: 300}

🔧 Initializing proven hybrid ensemble...
✅ OptimizedFeatureClassifier: Deep feature processing
✅ Proven SNN weights loaded successfully!
   Exp