<a href="https://colab.research.google.com/github/binnwy/NASA/blob/main/Exo_Ensemble.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!ls "/content/drive/MyDrive/exo/"


autoencoder_kepler_confirmed.h5    kepler_koi_cleaned.csv
autoencoder_kepler.h5		   rf_kepler_model.joblib
autoencoder_scaler_confirmed.save  svm_kepler_model.save
autoencoder_scaler.save		   svm_kepler_no_smote_model.save
confirmed_exoplanets.csv	   svm_kepler_tuned_model.save
dnn_kepler.keras		   svm_scaler_no_smote.save
dnn_scaler.save			   svm_scaler.save
dt_kepler_model.joblib		   xgb_kepler_model.joblib


In [None]:
import pandas as pd
df=pd.read_csv("/content/drive/MyDrive/exo/kepler_koi_cleaned.csv")
df.head()

Unnamed: 0,koi_score,koi_fpflag_nt,koi_fpflag_ss,koi_fpflag_co,koi_fpflag_ec,koi_period,koi_duration,koi_depth,koi_prad,koi_impact,koi_steff,koi_slogg,koi_srad,koi_kepmag,koi_disposition,koi_disposition_encoded
0,1.0,0,0,0,0,9.488036,2.9575,615.8,2.26,0.146,5455.0,4.467,0.927,15.347,CONFIRMED,1
1,0.969,0,0,0,0,54.418383,4.507,874.8,2.83,0.586,5455.0,4.467,0.927,15.347,CONFIRMED,1
2,0.0,0,0,0,0,19.89914,1.7822,10829.0,14.6,0.969,5853.0,4.544,0.868,15.436,CANDIDATE,0
3,0.0,0,1,0,0,1.736952,2.40641,8079.2,33.46,1.276,5805.0,4.564,0.791,15.597,FALSE POSITIVE,-1
4,1.0,0,0,0,0,2.525592,1.6545,603.3,2.75,0.701,6031.0,4.438,1.046,15.509,CONFIRMED,1


In [None]:
import numpy as np
import joblib
from tensorflow.keras.models import load_model
from collections import Counter

# Load base models in Colab
model_svm = joblib.load("/content/drive/MyDrive/exo/svm_kepler_model.save")
scaler_svm = joblib.load("/content/drive/MyDrive/exo/svm_scaler_no_smote.save")

model_dnn = load_model("/content/drive/MyDrive/exo/dnn_kepler.keras")
scaler_dnn = joblib.load("/content/drive/MyDrive/exo/dnn_scaler.save")

model_dt = joblib.load("/content/drive/MyDrive/exo/dt_kepler_model.joblib")
model_rf = joblib.load("/content/drive/MyDrive/exo/rf_kepler_model.joblib")
model_xgb = joblib.load("/content/drive/MyDrive/exo/xgb_kepler_model.joblib")

# Ensemble class
class EnsembleModel:
    def __init__(self):
        # Store models inside the object
        self.model_svm = model_svm
        self.scaler_svm = scaler_svm
        self.model_dnn = model_dnn
        self.scaler_dnn = scaler_dnn
        self.model_dt = model_dt
        self.model_rf = model_rf
        self.model_xgb = model_xgb

    def predict(self, X):
        X = np.array(X)
        X_svm = self.scaler_svm.transform(X)
        X_dnn = self.scaler_dnn.transform(X)

        pred_svm = self.model_svm.predict(X_svm)
        pred_dnn = (self.model_dnn.predict(X_dnn) > 0.5).astype(int).flatten()
        pred_dt = self.model_dt.predict(X)
        pred_rf = self.model_rf.predict(X)
        pred_xgb = self.model_xgb.predict(X)

        from collections import Counter
        ensemble_preds = []
        for i in range(X.shape[0]):
            votes = [pred_svm[i], pred_dnn[i], pred_dt[i], pred_rf[i], pred_xgb[i]]
            ensemble_preds.append(Counter(votes).most_common(1)[0][0])
        return np.array(ensemble_preds)

# Create the object
ensemble_model = EnsembleModel()

# Save the **fully self-contained ensemble**
joblib.dump(ensemble_model, "/content/drive/MyDrive/exo/ensemble_model_full.save")
print("✅ Fully self-contained ensemble saved!")


✅ Fully self-contained ensemble saved!


In [None]:
# Load the ensemble model
ensemble_model = joblib.load("/content/drive/MyDrive/exo/ensemble_model.save")

# Make predictions on new tuples
tuple1 = [1, 0, 0, 0, 0, 9.488, 2.95, 615.8, 2.26, 0.146, 5455, 4.467, 0.927, 15.347]
tuple2 = [0, 0, 1, 0, 0, 1.736952453, 2.40641, 8079.2, 33.46, 1.276, 5805, 4.564, 0.791, 15.597]

preds = ensemble_model.predict([tuple1, tuple2])
for i, p in enumerate(preds):
    print(f"Tuple {i+1}: {'Confirmed Planet' if p==1 else 'Not Planet'}")




[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 388ms/step
Tuple 1: Confirmed Planet
Tuple 2: Not Planet




In [None]:
import numpy as np
import joblib
from tensorflow.keras.models import load_model
import pickle

class ExoplanetEnsemble:
    """
    Ensemble model combining DNN, Decision Tree, Random Forest, and XGBoost
    for exoplanet classification.
    """

    def __init__(self):
        self.model_dnn = None
        self.scaler_dnn = None
        self.model_dt = None
        self.model_rf = None
        self.model_xgb = None
        self.feature_names = None
        self.weights = None

    def load_models(self, dnn_path, scaler_path, dt_path, rf_path, xgb_path):
        """Load all base models and scaler"""
        print("Loading models...")
        self.model_dnn = load_model(dnn_path)
        self.scaler_dnn = joblib.load(scaler_path)
        self.model_dt = joblib.load(dt_path)
        self.model_rf = joblib.load(rf_path)
        self.model_xgb = joblib.load(xgb_path)
        print("All models loaded successfully!")

    def set_feature_names(self, feature_names):
        """Set the feature names used for training"""
        self.feature_names = feature_names

    def set_weights(self, weights=None):
        """
        Set voting weights for each model
        Default: Equal weights [0.25, 0.25, 0.25, 0.25]
        Order: [DNN, DT, RF, XGBoost]
        """
        if weights is None:
            self.weights = np.array([0.25, 0.25, 0.25, 0.25])
        else:
            self.weights = np.array(weights)
            # Normalize weights to sum to 1
            self.weights = self.weights / self.weights.sum()
        print(f"Model weights set: DNN={self.weights[0]:.3f}, DT={self.weights[1]:.3f}, "
              f"RF={self.weights[2]:.3f}, XGBoost={self.weights[3]:.3f}")

    def predict_proba(self, X):
        """
        Predict probabilities using weighted soft voting

        Parameters:
        -----------
        X : array-like of shape (n_samples, n_features)
            Input features

        Returns:
        --------
        proba : array of shape (n_samples, n_classes)
            Weighted average probabilities
        """
        # Scale features for DNN
        X_scaled = self.scaler_dnn.transform(X)

        # Get predictions from all models
        pred_dnn = self.model_dnn.predict(X_scaled, verbose=0)
        pred_dt = self.model_dt.predict_proba(X)
        pred_rf = self.model_rf.predict_proba(X)
        pred_xgb = self.model_xgb.predict_proba(X)

        # Handle different output shapes
        # DNN might output single column for binary classification
        if pred_dnn.shape[1] == 1:
            pred_dnn = np.hstack([1 - pred_dnn, pred_dnn])

        # Weighted average
        ensemble_proba = (
            self.weights[0] * pred_dnn +
            self.weights[1] * pred_dt +
            self.weights[2] * pred_rf +
            self.weights[3] * pred_xgb
        )

        return ensemble_proba

    def predict(self, X, threshold=0.5):
        """
        Predict class labels

        Parameters:
        -----------
        X : array-like of shape (n_samples, n_features)
            Input features
        threshold : float, default=0.5
            Decision threshold for binary classification

        Returns:
        --------
        predictions : array of shape (n_samples,)
            Predicted class labels
        """
        proba = self.predict_proba(X)
        # Assuming binary or multi-class with class 1 being positive
        if proba.shape[1] == 2:
            return (proba[:, 1] >= threshold).astype(int)
        else:
            return np.argmax(proba, axis=1)

    def predict_with_confidence(self, X):
        """
        Predict with confidence scores and individual model predictions

        Returns:
        --------
        dict with keys:
            - 'prediction': final ensemble prediction
            - 'confidence': confidence score (probability)
            - 'individual_predictions': dict of each model's prediction
        """
        X_scaled = self.scaler_dnn.transform(X)

        # Get predictions from all models
        pred_dnn = self.model_dnn.predict(X_scaled, verbose=0)
        pred_dt = self.model_dt.predict_proba(X)
        pred_rf = self.model_rf.predict_proba(X)
        pred_xgb = self.model_xgb.predict_proba(X)

        # Handle DNN output shape
        if pred_dnn.shape[1] == 1:
            pred_dnn = np.hstack([1 - pred_dnn, pred_dnn])

        # Get ensemble prediction
        ensemble_proba = self.predict_proba(X)
        final_pred = np.argmax(ensemble_proba, axis=1)
        confidence = np.max(ensemble_proba, axis=1)

        results = []
        for i in range(len(X)):
            results.append({
                'prediction': final_pred[i],
                'confidence': confidence[i],
                'individual_predictions': {
                    'DNN': np.argmax(pred_dnn[i]),
                    'Decision_Tree': np.argmax(pred_dt[i]),
                    'Random_Forest': np.argmax(pred_rf[i]),
                    'XGBoost': np.argmax(pred_xgb[i])
                },
                'individual_probabilities': {
                    'DNN': pred_dnn[i].tolist(),
                    'Decision_Tree': pred_dt[i].tolist(),
                    'Random_Forest': pred_rf[i].tolist(),
                    'XGBoost': pred_xgb[i].tolist()
                }
            })

        return results if len(results) > 1 else results[0]

    def save(self, filepath):
        """Save the ensemble model"""
        with open(filepath, 'wb') as f:
            pickle.dump(self, f)
        print(f"Ensemble model saved to {filepath}")

    @staticmethod
    def load(filepath):
        """Load a saved ensemble model"""
        with open(filepath, 'rb') as f:
            model = pickle.load(f)
        print(f"Ensemble model loaded from {filepath}")
        return model


# ============================================================================
# USAGE: Create and save ensemble model in Colab
# ============================================================================

# 1. Initialize ensemble
ensemble = ExoplanetEnsemble()

# 2. Load all base models
ensemble.load_models(
    dnn_path="/content/drive/MyDrive/exo/dnn_kepler.keras",
    scaler_path="/content/drive/MyDrive/exo/dnn_scaler.save",
    dt_path="/content/drive/MyDrive/exo/dt_kepler_model.joblib",
    rf_path="/content/drive/MyDrive/exo/rf_kepler_model.joblib",
    xgb_path="/content/drive/MyDrive/exo/xgb_kepler_model.joblib"
)

# 3. Set feature names (important for the Streamlit UI)
feature_names = [
    'koi_score', 'koi_fpflag_nt', 'koi_fpflag_ss', 'koi_fpflag_co',
    'koi_fpflag_ec', 'koi_period', 'koi_duration', 'koi_depth',
    'koi_prad', 'koi_impact', 'koi_steff', 'koi_slogg',
    'koi_srad', 'koi_kepmag'
]
ensemble.set_feature_names(feature_names)

# 4. Set weights (optional - defaults to equal weights)
# You can adjust these based on model performance
# Example: Give more weight to better performing models
ensemble.set_weights([0.30, 0.20, 0.25, 0.25])  # DNN, DT, RF, XGBoost

# 5. Save the ensemble model
ensemble.save("/content/drive/MyDrive/exo/ensemble_model.pkl")

print("\n" + "="*60)
print("Ensemble model created and saved successfully!")
print("="*60)

# 6. Test the ensemble (optional)
print("\nTesting ensemble with sample data...")
sample_data = np.array([[
    1.0, 0, 0, 0, 0, 9.488036, 2.957506, 15.8,
    2.26, 0.146, 5455.0, 4.467, 0.927, 15.347
]])

result = ensemble.predict_with_confidence(sample_data)
print(f"\nPrediction: {result['prediction']}")
print(f"Confidence: {result['confidence']:.4f}")
print(f"Individual predictions: {result['individual_predictions']}")

Loading models...
All models loaded successfully!
Model weights set: DNN=0.300, DT=0.200, RF=0.250, XGBoost=0.250
Ensemble model saved to /content/drive/MyDrive/exo/ensemble_model.pkl

Ensemble model created and saved successfully!

Testing ensemble with sample data...





Prediction: 1
Confidence: 0.5705
Individual predictions: {'DNN': np.int64(1), 'Decision_Tree': np.int64(1), 'Random_Forest': np.int64(1), 'XGBoost': np.int64(0)}




In [None]:
import numpy as np
import joblib
from tensorflow.keras.models import load_model
import pickle

class ExoplanetEnsemble:
    """
    Ensemble model combining DNN, SVM, Decision Tree, Random Forest, and XGBoost
    for exoplanet classification.
    """

    def __init__(self):
        self.model_dnn = None
        self.scaler_dnn = None
        self.model_svm = None
        self.scaler_svm = None
        self.model_dt = None
        self.model_rf = None
        self.model_xgb = None
        self.feature_names = None
        self.weights = None

    def load_models(self, dnn_path, scaler_dnn_path, svm_path, scaler_svm_path,
                    dt_path, rf_path, xgb_path):
        """Load all base models and scalers"""
        print("Loading models...")
        self.model_dnn = load_model(dnn_path)
        self.scaler_dnn = joblib.load(scaler_dnn_path)
        self.model_svm = joblib.load(svm_path)
        self.scaler_svm = joblib.load(scaler_svm_path)
        self.model_dt = joblib.load(dt_path)
        self.model_rf = joblib.load(rf_path)
        self.model_xgb = joblib.load(xgb_path)
        print("All models loaded successfully!")
        print(f"  ✓ DNN with scaler")
        print(f"  ✓ SVM with scaler")
        print(f"  ✓ Decision Tree")
        print(f"  ✓ Random Forest")
        print(f"  ✓ XGBoost")

    def set_feature_names(self, feature_names):
        """Set the feature names used for training"""
        self.feature_names = feature_names

    def set_weights(self, weights=None):
        """
        Set voting weights for each model
        Default: Equal weights [0.2, 0.2, 0.2, 0.2, 0.2]
        Order: [DNN, SVM, DT, RF, XGBoost]
        """
        if weights is None:
            self.weights = np.array([0.2, 0.2, 0.2, 0.2, 0.2])
        else:
            self.weights = np.array(weights)
            # Normalize weights to sum to 1
            self.weights = self.weights / self.weights.sum()
        print(f"Model weights set: DNN={self.weights[0]:.3f}, SVM={self.weights[1]:.3f}, "
              f"DT={self.weights[2]:.3f}, RF={self.weights[3]:.3f}, XGBoost={self.weights[4]:.3f}")

    def predict_proba(self, X):
        """
        Predict probabilities using weighted soft voting

        Parameters:
        -----------
        X : array-like of shape (n_samples, n_features)
            Input features

        Returns:
        --------
        proba : array of shape (n_samples, n_classes)
            Weighted average probabilities
        """
        # Scale features for DNN and SVM
        X_scaled_dnn = self.scaler_dnn.transform(X)
        X_scaled_svm = self.scaler_svm.transform(X)

        # Get predictions from all models
        pred_dnn = self.model_dnn.predict(X_scaled_dnn, verbose=0)

        # SVM prediction - handle both predict_proba and decision_function
        if hasattr(self.model_svm, 'predict_proba'):
            pred_svm = self.model_svm.predict_proba(X_scaled_svm)
        else:
            # If SVM doesn't have predict_proba, use decision_function
            decision = self.model_svm.decision_function(X_scaled_svm)
            # Convert to probabilities using sigmoid
            if len(decision.shape) == 1:
                # Binary classification
                proba_positive = 1 / (1 + np.exp(-decision))
                pred_svm = np.column_stack([1 - proba_positive, proba_positive])
            else:
                # Multi-class
                exp_scores = np.exp(decision - np.max(decision, axis=1, keepdims=True))
                pred_svm = exp_scores / np.sum(exp_scores, axis=1, keepdims=True)

        pred_dt = self.model_dt.predict_proba(X)
        pred_rf = self.model_rf.predict_proba(X)
        pred_xgb = self.model_xgb.predict_proba(X)

        # Handle different output shapes
        # DNN might output single column for binary classification
        if pred_dnn.shape[1] == 1:
            pred_dnn = np.hstack([1 - pred_dnn, pred_dnn])

        # Ensure all predictions have the same shape
        n_classes = max(pred_dnn.shape[1], pred_svm.shape[1], pred_dt.shape[1],
                       pred_rf.shape[1], pred_xgb.shape[1])

        # Weighted average
        ensemble_proba = (
            self.weights[0] * pred_dnn +
            self.weights[1] * pred_svm +
            self.weights[2] * pred_dt +
            self.weights[3] * pred_rf +
            self.weights[4] * pred_xgb
        )

        return ensemble_proba

    def predict(self, X, threshold=0.5):
        """
        Predict class labels

        Parameters:
        -----------
        X : array-like of shape (n_samples, n_features)
            Input features
        threshold : float, default=0.5
            Decision threshold for binary classification

        Returns:
        --------
        predictions : array of shape (n_samples,)
            Predicted class labels
        """
        proba = self.predict_proba(X)
        # Assuming binary or multi-class with class 1 being positive
        if proba.shape[1] == 2:
            return (proba[:, 1] >= threshold).astype(int)
        else:
            return np.argmax(proba, axis=1)

    def predict_with_confidence(self, X):
        """
        Predict with confidence scores and individual model predictions

        Returns:
        --------
        dict with keys:
            - 'prediction': final ensemble prediction
            - 'confidence': confidence score (probability)
            - 'individual_predictions': dict of each model's prediction
        """
        X_scaled_dnn = self.scaler_dnn.transform(X)
        X_scaled_svm = self.scaler_svm.transform(X)

        # Get predictions from all models
        pred_dnn = self.model_dnn.predict(X_scaled_dnn, verbose=0)

        # SVM prediction
        if hasattr(self.model_svm, 'predict_proba'):
            pred_svm = self.model_svm.predict_proba(X_scaled_svm)
        else:
            decision = self.model_svm.decision_function(X_scaled_svm)
            if len(decision.shape) == 1:
                proba_positive = 1 / (1 + np.exp(-decision))
                pred_svm = np.column_stack([1 - proba_positive, proba_positive])
            else:
                exp_scores = np.exp(decision - np.max(decision, axis=1, keepdims=True))
                pred_svm = exp_scores / np.sum(exp_scores, axis=1, keepdims=True)

        pred_dt = self.model_dt.predict_proba(X)
        pred_rf = self.model_rf.predict_proba(X)
        pred_xgb = self.model_xgb.predict_proba(X)

        # Handle DNN output shape
        if pred_dnn.shape[1] == 1:
            pred_dnn = np.hstack([1 - pred_dnn, pred_dnn])

        # Get ensemble prediction
        ensemble_proba = self.predict_proba(X)
        final_pred = np.argmax(ensemble_proba, axis=1)
        confidence = np.max(ensemble_proba, axis=1)

        results = []
        for i in range(len(X)):
            results.append({
                'prediction': final_pred[i],
                'confidence': confidence[i],
                'individual_predictions': {
                    'DNN': np.argmax(pred_dnn[i]),
                    'SVM': np.argmax(pred_svm[i]),
                    'Decision_Tree': np.argmax(pred_dt[i]),
                    'Random_Forest': np.argmax(pred_rf[i]),
                    'XGBoost': np.argmax(pred_xgb[i])
                },
                'individual_probabilities': {
                    'DNN': pred_dnn[i].tolist(),
                    'SVM': pred_svm[i].tolist(),
                    'Decision_Tree': pred_dt[i].tolist(),
                    'Random_Forest': pred_rf[i].tolist(),
                    'XGBoost': pred_xgb[i].tolist()
                }
            })

        return results if len(results) > 1 else results[0]

    def save(self, filepath):
        """Save the ensemble model"""
        with open(filepath, 'wb') as f:
            pickle.dump(self, f)
        print(f"Ensemble model saved to {filepath}")

    @staticmethod
    def load(filepath):
        """Load a saved ensemble model"""
        with open(filepath, 'rb') as f:
            model = pickle.load(f)
        print(f"Ensemble model loaded from {filepath}")
        return model


# ============================================================================
# USAGE: Create and save ensemble model in Colab
# ============================================================================

# 1. Initialize ensemble
ensemble = ExoplanetEnsemble()

# 2. Load all base models (UPDATE THESE PATHS WITH YOUR SVM MODEL PATHS)
ensemble.load_models(
    dnn_path="/content/drive/MyDrive/exo/dnn_kepler.keras",
    scaler_dnn_path="/content/drive/MyDrive/exo/dnn_scaler.save",
    svm_path="/content/drive/MyDrive/exo/svm_kepler_model.save",  # UPDATE THIS PATH
    scaler_svm_path="/content/drive/MyDrive/exo/svm_scaler.save",  # UPDATE THIS PATH
    dt_path="/content/drive/MyDrive/exo/dt_kepler_model.joblib",
    rf_path="/content/drive/MyDrive/exo/rf_kepler_model.joblib",
    xgb_path="/content/drive/MyDrive/exo/xgb_kepler_model.joblib"
)

# 3. Set feature names (important for the Streamlit UI)
feature_names = [
    'koi_score', 'koi_fpflag_nt', 'koi_fpflag_ss', 'koi_fpflag_co',
    'koi_fpflag_ec', 'koi_period', 'koi_duration', 'koi_depth',
    'koi_prad', 'koi_impact', 'koi_steff', 'koi_slogg',
    'koi_srad', 'koi_kepmag'
]
ensemble.set_feature_names(feature_names)

# 4. Set weights (optional - defaults to equal weights)
# You can adjust these based on model performance
# Example: Give more weight to better performing models
ensemble.set_weights([0.25, 0.20, 0.15, 0.20, 0.20])  # DNN, SVM, DT, RF, XGBoost

# 5. Save the ensemble model
ensemble.save("/content/drive/MyDrive/exo/ensemble_model.pkl")

print("\n" + "="*60)
print("Ensemble model created and saved successfully!")
print("="*60)

# 6. Test the ensemble (optional)
print("\nTesting ensemble with sample data...")
sample_data = np.array([[
    1.0, 0, 0, 0, 0, 9.488036, 2.957506, 15.8,
    2.26, 0.146, 5455.0, 4.467, 0.927, 15.347
]])

result = ensemble.predict_with_confidence(sample_data)
print(f"\nPrediction: {result['prediction']}")
print(f"Confidence: {result['confidence']:.4f}")
print(f"Individual predictions: {result['individual_predictions']}")

Loading models...
All models loaded successfully!
  ✓ DNN with scaler
  ✓ SVM with scaler
  ✓ Decision Tree
  ✓ Random Forest
  ✓ XGBoost
Model weights set: DNN=0.250, SVM=0.200, DT=0.150, RF=0.200, XGBoost=0.200
Ensemble model saved to /content/drive/MyDrive/exo/ensemble_model.pkl

Ensemble model created and saved successfully!

Testing ensemble with sample data...





Prediction: 1
Confidence: 0.6273
Individual predictions: {'DNN': np.int64(1), 'SVM': np.int64(1), 'Decision_Tree': np.int64(1), 'Random_Forest': np.int64(1), 'XGBoost': np.int64(0)}




In [None]:
"""
Test script to verify the ensemble model works correctly
Run this after downloading ensemble_model.pkl to your local system
"""

import numpy as np
import pickle
import warnings
warnings.filterwarnings('ignore')

def load_and_test_ensemble(model_path='ensemble_model.pkl'):
    """Load and test the ensemble model"""

    print("="*60)
    print("ENSEMBLE MODEL TEST")
    print("="*60)

    # Load the model
    print("\n1. Loading ensemble model...")
    try:
        with open(model_path, 'rb') as f:
            ensemble = pickle.load(f)
        print("   ✅ Model loaded successfully!")
    except Exception as e:
        print(f"   ❌ Error loading model: {e}")
        return

    # Display model info
    print("\n2. Model Information:")
    print(f"   - Feature names: {len(ensemble.feature_names) if ensemble.feature_names else 'Not set'} features")
    if ensemble.feature_names:
        print(f"   - Features: {', '.join(ensemble.feature_names[:5])}...")
    print(f"   - Model weights:")
    if ensemble.weights is not None:
        print(f"     • DNN: {ensemble.weights[0]:.3f}")
        print(f"     • SVM: {ensemble.weights[1]:.3f}")
        print(f"     • Decision Tree: {ensemble.weights[2]:.3f}")
        print(f"     • Random Forest: {ensemble.weights[3]:.3f}")
        print(f"     • XGBoost: {ensemble.weights[4]:.3f}")

    # Test with sample data
    print("\n3. Testing with sample data...")

    # Test case 1: CONFIRMED exoplanet
    test_case_1 = np.array([[
        1.0, 0, 0, 0, 0, 9.488036, 2.957506, 15.8,
        2.26, 0.146, 5455.0, 4.467, 0.927, 15.347
    ]])

    # Test case 2: CANDIDATE
    test_case_2 = np.array([[
        0.969, 0, 0, 0, 0, 54.418383, 4.507008, 74.8,
        2.83, 0.586, 5455.0, 4.467, 0.927, 15.347
    ]])

    # Test case 3: FALSE POSITIVE
    test_case_3 = np.array([[
        0.0001, 0, 0, 1, 0, 1.736952, 2.406418, 079.2,
        33.46, 1.276, 5805.0, 4.564, 0.791, 15.597
    ]])

    test_cases = [
        ("Test Case 1 (Expected: CONFIRMED)", test_case_1),
        ("Test Case 2 (Expected: CANDIDATE)", test_case_2),
        ("Test Case 3 (Expected: FALSE POSITIVE)", test_case_3)
    ]

    prediction_map = {-1: "FALSE POSITIVE", 0: "CANDIDATE", 1: "CONFIRMED"}

    for test_name, test_data in test_cases:
        print(f"\n   {test_name}")
        try:
            result = ensemble.predict_with_confidence(test_data)

            pred_label = prediction_map.get(result['prediction'], "UNKNOWN")
            print(f"   └─ Prediction: {pred_label}")
            print(f"   └─ Confidence: {result['confidence']:.2%}")
            print(f"   └─ Individual predictions:")
            for model_name, pred in result['individual_predictions'].items():
                model_pred = prediction_map.get(pred, "UNKNOWN")
                print(f"      • {model_name}: {model_pred}")
        except Exception as e:
            print(f"   └─ ❌ Error during prediction: {e}")

    # Test batch prediction
    print("\n4. Testing batch prediction...")
    try:
        batch_data = np.vstack([test_case_1, test_case_2, test_case_3])
        predictions = ensemble.predict(batch_data)
        probabilities = ensemble.predict_proba(batch_data)

        print(f"   ✅ Batch prediction successful!")
        print(f"   - Predictions: {[prediction_map.get(p, 'UNKNOWN') for p in predictions]}")
        print(f"   - Probabilities shape: {probabilities.shape}")
    except Exception as e:
        print(f"   ❌ Error during batch prediction: {e}")

    print("\n" + "="*60)
    print("TEST COMPLETED")
    print("="*60)
    print("\n✅ If all tests passed, your ensemble model is ready for Streamlit!")
    print("   Run: streamlit run streamlit_app.py")
    print("="*60)

if __name__ == "__main__":
    # Test the model
    load_and_test_ensemble('ensemble_model.pkl')

    # Optional: Test with custom data
    print("\n\nWant to test with custom data? (y/n): ", end="")
    choice = input().strip().lower()

    if choice == 'y':
        print("\nEnter feature values (comma-separated):")
        print("Format: koi_score,koi_fpflag_nt,koi_fpflag_ss,koi_fpflag_co,koi_fpflag_ec,")
        print("        koi_period,koi_duration,koi_depth,koi_prad,koi_impact,")
        print("        koi_steff,koi_slogg,koi_srad,koi_kepmag")

        try:
            user_input = input("\nValues: ").strip()
            values = [float(x.strip()) for x in user_input.split(',')]

            if len(values) != 14:
                print(f"❌ Error: Expected 14 values, got {len(values)}")
            else:
                custom_data = np.array([values])

                with open('ensemble_model.pkl', 'rb') as f:
                    ensemble = pickle.load(f)

                result = ensemble.predict_with_confidence(custom_data)
                prediction_map = {-1: "FALSE POSITIVE", 0: "CANDIDATE", 1: "CONFIRMED"}

                print("\n" + "="*60)
                print("CUSTOM PREDICTION RESULT")
                print("="*60)
                print(f"Prediction: {prediction_map.get(result['prediction'], 'UNKNOWN')}")
                print(f"Confidence: {result['confidence']:.2%}")
                print("\nIndividual Model Predictions:")
                for model_name, pred in result['individual_predictions'].items():
                    model_pred = prediction_map.get(pred, "UNKNOWN")
                    print(f"  • {model_name}: {model_pred}")
                print("="*60)
        except Exception as e:
            print(f"❌ Error: {e}")

ENSEMBLE MODEL TEST

1. Loading ensemble model...
   ❌ Error loading model: [Errno 2] No such file or directory: 'ensemble_model.pkl'


Want to test with custom data? (y/n): y

Enter feature values (comma-separated):
Format: koi_score,koi_fpflag_nt,koi_fpflag_ss,koi_fpflag_co,koi_fpflag_ec,
        koi_period,koi_duration,koi_depth,koi_prad,koi_impact,
        koi_steff,koi_slogg,koi_srad,koi_kepmag


KeyboardInterrupt: Interrupted by user