In [None]:
# Continuous Learning IoT Malware Detection System
# Building on your existing Random Forest model

import pandas as pd
import numpy as np
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, f1_score, classification_report
from collections import deque
import joblib
import time
from datetime import datetime
import warnings
warnings.filterwarnings('ignore')

# =============================================================================
# PART 1: BASELINE NETWORK PROFILING
# =============================================================================

class NetworkBaselineProfiler:
    """
    Creates and maintains a baseline of normal network behavior
    """
    def __init__(self, window_size=1000):
        self.window_size = window_size
        self.baseline_stats = {}
        self.feature_names = None
        self.history = deque(maxlen=window_size)

    def initialize_baseline(self, normal_data):
        """
        Initialize baseline with known normal traffic
        normal_data: DataFrame with normal network features
        """
        print("Initializing network baseline...")
        self.feature_names = list(normal_data.columns)

        # Calculate statistical baseline for each feature
        for feature in self.feature_names:
            self.baseline_stats[feature] = {
                'mean': normal_data[feature].mean(),
                'std': normal_data[feature].std(),
                'min': normal_data[feature].min(),
                'max': normal_data[feature].max(),
                'q1': normal_data[feature].quantile(0.25),
                'q3': normal_data[feature].quantile(0.75)
            }

        # Store initial samples
        for _, row in normal_data.iterrows():
            self.history.append(row.values)

        print(f"Baseline initialized with {len(normal_data)} samples")

    def update_baseline(self, new_sample, is_normal=True):
        """
        Update baseline with new normal samples
        """
        if is_normal:
            self.history.append(new_sample)

            # Recalculate statistics every 100 samples
            if len(self.history) % 100 == 0:
                recent_data = pd.DataFrame(list(self.history), columns=self.feature_names)
                for feature in self.feature_names:
                    self.baseline_stats[feature]['mean'] = recent_data[feature].mean()
                    self.baseline_stats[feature]['std'] = recent_data[feature].std()

    def calculate_anomaly_score(self, sample):
        """
        Calculate how anomalous a sample is compared to baseline
        Returns score between 0 (normal) and 1 (highly anomalous)
        """
        anomaly_scores = []

        for i, feature in enumerate(self.feature_names):
            value = sample[i]
            stats = self.baseline_stats[feature]

            # Z-score based anomaly
            if stats['std'] > 0:
                z_score = abs((value - stats['mean']) / stats['std'])
                anomaly_scores.append(min(z_score / 3, 1))  # Normalize to 0-1
            else:
                anomaly_scores.append(0)

        return np.mean(anomaly_scores)

# =============================================================================
# PART 2: CONTINUOUS LEARNING DETECTOR
# =============================================================================

class ContinuousLearningDetector:
    """
    IoT malware detector with continuous learning capabilities
    """
    def __init__(self, base_model, scaler, feature_columns, retrain_threshold=500):
        self.model = base_model
        self.scaler = scaler
        self.feature_columns = feature_columns
        self.retrain_threshold = retrain_threshold

        # Continuous learning components
        self.new_samples = []
        self.new_labels = []
        self.performance_history = []
        self.model_versions = []
        self.false_positives = []
        self.false_negatives = []

        # Drift detection
        self.baseline_profiler = NetworkBaselineProfiler()
        self.drift_threshold = 0.3

        # Save initial model
        self.model_versions.append({
            'version': 0,
            'timestamp': datetime.now(),
            'model': joblib.dumps(self.model),
            'performance': {'accuracy': 1.0, 'f1': 1.0}
        })

    def predict_with_confidence(self, sample):
        """
        Predict with confidence score and anomaly detection
        """
        # Scale the sample
        sample_scaled = self.scaler.transform([sample])

        # Get prediction and probability
        prediction = self.model.predict(sample_scaled)[0]
        prob = self.model.predict_proba(sample_scaled)[0]
        confidence = max(prob)

        # Calculate anomaly score
        anomaly_score = self.baseline_profiler.calculate_anomaly_score(sample)

        # Adjust confidence based on anomaly
        if anomaly_score > self.drift_threshold:
            confidence *= (1 - anomaly_score/2)  # Reduce confidence for anomalies

        return {
            'prediction': prediction,
            'confidence': confidence,
            'anomaly_score': anomaly_score,
            'probabilities': prob
        }

    def add_feedback(self, sample, true_label, predicted_label):
        """
        Add user feedback for continuous learning
        """
        self.new_samples.append(sample)
        self.new_labels.append(true_label)

        # Track errors for learning
        if true_label != predicted_label:
            if predicted_label == 1 and true_label == 0:
                self.false_positives.append(sample)
            elif predicted_label == 0 and true_label == 1:
                self.false_negatives.append(sample)

        # Update baseline if normal
        if true_label == 0:
            self.baseline_profiler.update_baseline(sample, is_normal=True)

        # Check if retraining needed
        if len(self.new_samples) >= self.retrain_threshold:
            self.retrain_model()

    def retrain_model(self):
        """
        Retrain model with accumulated samples
        """
        print(f"\nRetraining model with {len(self.new_samples)} new samples...")

        # Combine with recent false positives/negatives for focused learning
        X_new = np.array(self.new_samples)
        y_new = np.array(self.new_labels)

        # Add recent errors with higher weight
        if len(self.false_positives) > 0:
            fp_samples = self.false_positives[-50:]  # Last 50 FPs
            X_new = np.vstack([X_new] + [fp_samples] * 2)  # Weight 2x
            y_new = np.concatenate([y_new] + [[0] * len(fp_samples)] * 2)

        if len(self.false_negatives) > 0:
            fn_samples = self.false_negatives[-50:]  # Last 50 FNs
            X_new = np.vstack([X_new] + [fn_samples] * 3)  # Weight 3x (more critical)
            y_new = np.concatenate([y_new] + [[1] * len(fn_samples)] * 3)

        # Scale new data
        X_new_scaled = self.scaler.transform(X_new)

        # Incremental learning (warm start)
        self.model.n_estimators += 10  # Add more trees
        self.model.fit(X_new_scaled, y_new)

        # Evaluate on recent data
        if len(self.new_samples) > 100:
            X_eval = self.scaler.transform(self.new_samples[-100:])
            y_eval = self.new_labels[-100:]
            accuracy = accuracy_score(y_eval, self.model.predict(X_eval))
            f1 = f1_score(y_eval, self.model.predict(X_eval), average='weighted')

            self.performance_history.append({
                'timestamp': datetime.now(),
                'accuracy': accuracy,
                'f1': f1,
                'samples_trained': len(self.new_samples)
            })

            print(f"Retrained model performance: Accuracy={accuracy:.4f}, F1={f1:.4f}")

        # Save model version
        self.model_versions.append({
            'version': len(self.model_versions),
            'timestamp': datetime.now(),
            'model': joblib.dumps(self.model),
            'performance': {'accuracy': accuracy, 'f1': f1}
        })

        # Clear training buffer
        self.new_samples = []
        self.new_labels = []

    def detect_concept_drift(self, recent_predictions, window_size=100):
        """
        Detect if the network behavior has significantly changed
        """
        if len(recent_predictions) < window_size:
            return False

        recent = recent_predictions[-window_size:]
        anomaly_scores = [p['anomaly_score'] for p in recent]

        # High average anomaly score indicates drift
        avg_anomaly = np.mean(anomaly_scores)
        if avg_anomaly > self.drift_threshold:
            print(f"⚠️  Concept drift detected! Average anomaly score: {avg_anomaly:.3f}")
            return True

        return False

# =============================================================================
# PART 3: REAL-TIME MONITORING SYSTEM
# =============================================================================

class IoTMalwareMonitor:
    """
    Complete monitoring system for deployment
    """
    def __init__(self, model_path, scaler_path, feature_columns):
        # Load pre-trained model
        self.base_model = joblib.load(model_path)
        self.scaler = joblib.load(scaler_path)

        # Initialize continuous learning detector
        self.detector = ContinuousLearningDetector(
            self.base_model,
            self.scaler,
            feature_columns,
            retrain_threshold=500
        )

        # Monitoring state
        self.alerts = []
        self.predictions_cache = deque(maxlen=1000)
        self.start_time = datetime.now()

    def initialize_household_baseline(self, normal_traffic_df):
        """
        Initialize with household's normal traffic patterns
        """
        print("Learning household network patterns...")
        self.detector.baseline_profiler.initialize_baseline(normal_traffic_df)
        print("✅ Baseline established")

    def monitor_traffic(self, traffic_sample):
        """
        Main monitoring function - call this for each network sample
        Returns: (is_malware, confidence, should_alert)
        """
        # Get prediction with confidence
        result = self.detector.predict_with_confidence(traffic_sample)

        # Cache for drift detection
        self.predictions_cache.append(result)

        # Determine if alert needed
        should_alert = False
        if result['prediction'] == 1:  # Malware detected
            if result['confidence'] > 0.7:  # High confidence
                should_alert = True
                self.alerts.append({
                    'timestamp': datetime.now(),
                    'confidence': result['confidence'],
                    'anomaly_score': result['anomaly_score'],
                    'sample': traffic_sample
                })

        # Check for concept drift
        if len(self.predictions_cache) % 100 == 0:
            if self.detector.detect_concept_drift(list(self.predictions_cache)):
                print("🔄 Adapting to network changes...")

        return result['prediction'], result['confidence'], should_alert

    def provide_feedback(self, sample_id, true_label):
        """
        User feedback for false positives/negatives
        """
        # In real deployment, you'd track sample IDs
        # For demo, use last prediction
        if self.predictions_cache:
            last_sample = self.alerts[-1]['sample'] if self.alerts else None
            if last_sample is not None:
                predicted = self.predictions_cache[-1]['prediction']
                self.detector.add_feedback(last_sample, true_label, predicted)
                print(f"✅ Feedback recorded. Model will adapt.")

    def get_system_status(self):
        """
        Get current system status and statistics
        """
        uptime = datetime.now() - self.start_time
        return {
            'uptime': str(uptime),
            'total_samples': len(self.predictions_cache),
            'alerts_raised': len(self.alerts),
            'model_version': len(self.detector.model_versions) - 1,
            'last_retrain': self.detector.model_versions[-1]['timestamp'],
            'current_performance': self.detector.model_versions[-1]['performance'],
            'false_positives': len(self.detector.false_positives),
            'false_negatives': len(self.detector.false_negatives)
        }

# =============================================================================
# PART 4: DEPLOYMENT SCRIPT
# =============================================================================

def deploy_continuous_monitor():
    """
    Deploy the continuous learning monitor
    """
    print("🚀 Deploying IoT Malware Monitor with Continuous Learning")
    print("="*60)

    # Load your trained model and scaler
    # Replace with your actual paths
    MODEL_PATH = 'iot_malware_rf_model.pkl'
    SCALER_PATH = 'iot_malware_scaler.pkl'

    # Use your selected features
    FEATURE_COLUMNS = ['HH_jit_L0_1_mean', 'HH_jit_L0_01_mean', 'HpHp_L0_01_radius',
                       'H_L0_01_weight', 'MI_dir_L0_1_weight']  # ... add all 30

    # Initialize monitor
    monitor = IoTMalwareMonitor(MODEL_PATH, SCALER_PATH, FEATURE_COLUMNS)

    # Simulate initialization with household baseline
    # In real deployment, collect ~1 hour of normal traffic
    print("\n📊 Establishing household baseline...")
    # normal_traffic = load_normal_household_traffic()  # Your function
    # monitor.initialize_household_baseline(normal_traffic)

    print("\n✅ System Ready! Monitoring network traffic...")
    print("="*60)

    # Simulation of real-time monitoring
    malware_count = 0
    sample_count = 0

    # In real deployment, this would be a continuous loop
    # reading from network interface
    while True:
        try:
            # Get next network sample (replace with actual network capture)
            # traffic_sample = capture_network_features()  # Your function

            # For demo, simulate with random sample
            traffic_sample = np.random.randn(30)  # Replace with real data

            # Monitor the traffic
            is_malware, confidence, should_alert = monitor.monitor_traffic(traffic_sample)

            sample_count += 1
            if is_malware:
                malware_count += 1

            # Alert on high-confidence malware
            if should_alert:
                print(f"\n🚨 MALWARE DETECTED! Confidence: {confidence:.2%}")
                print(f"   Total detections: {malware_count}/{sample_count}")

            # Periodic status update
            if sample_count % 1000 == 0:
                status = monitor.get_system_status()
                print(f"\n📈 Status Update:")
                print(f"   Samples processed: {status['total_samples']}")
                print(f"   Alerts: {status['alerts_raised']}")
                print(f"   Model version: {status['model_version']}")

            # Simulate delay
            time.sleep(0.1)  # In real deployment, remove this

        except KeyboardInterrupt:
            print("\n\n🛑 Monitoring stopped by user")
            break
        except Exception as e:
            print(f"\n❌ Error: {e}")
            continue

    # Save final model
    print("\n💾 Saving adapted model...")
    final_model = monitor.detector.model
    joblib.dump(final_model, 'iot_malware_adapted_model.pkl')

    # Print final statistics
    final_status = monitor.get_system_status()
    print("\n📊 Final Statistics:")
    for key, value in final_status.items():
        print(f"   {key}: {value}")

# =============================================================================
# USAGE EXAMPLE
# =============================================================================

if __name__ == "__main__":
    # For testing in Colab
    print("Continuous Learning IoT Malware Detector")
    print("This system will:")
    print("1. Learn your household's normal network patterns")
    print("2. Continuously monitor for malware")
    print("3. Adapt to new threats automatically")
    print("4. Reduce false positives over time")

    # To deploy:
    # deploy_continuous_monitor()

    # For testing with your existing model:
    # 1. Load your trained RF model and scaler
    # 2. Create some test normal/malware samples
    # 3. Initialize the continuous learning detector
    # 4. Simulate monitoring and adaptation

# Task
Create a Python application that uses a pre-trained SVM model and scaler located at "/content/drive/MyDrive/Colab Notebooks/ML_Models/3_SVM_WORKING/Model_V1/iot_malware_svm_subset_model.pkl" and "/content/drive/MyDrive/Colab Notebook/ML_Models/3_SVM_WORKING/Model_V1/iot_malware_svm_subset_scaler.pkl" respectively, to detect IoT malware. The application should implement a combined detection approach that uses both the model's prediction and a baseline anomaly check to reduce false positives. Simulate a real-time data pipeline by reading data from designated folders: one for baseline data and another for live monitoring data. Develop a simple web interface using Flask that displays the system's status (active/loading), provides a button to initiate baseline training with user prompts based on malware detection in the baseline data, and shows real-time prediction results, potentially with basic visualizations.