# NoxSuite Security Platform - AI/ML Demo

This notebook demonstrates the AI/ML capabilities integrated into the NoxSuite Security Platform.

## Features Demonstrated:
- Security model training on log data
- Anomaly detection for login patterns
- Risk scoring for users and sessions
- Real-time threat assessment

## Requirements:
- Python 3.12+
- All ML dependencies from requirements.txt
- Sample security log data

In [None]:
# Import required libraries
import sys
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import datetime, timedelta

# Add the project root to Python path
sys.path.append('..')

# Import NoxSuite ML modules
try:
    from ml.model_training import SecurityModelTrainer, create_sample_security_data
    from ml.anomaly_detection import AnomalyDetector, create_sample_system_metrics
    from ml.predictive_engine import RiskScoreEngine, create_sample_training_data
    print("✅ NoxSuite ML modules imported successfully")
except ImportError as e:
    print(f"❌ Error importing ML modules: {e}")
    print("Please ensure all dependencies are installed: pip install -r requirements.txt")

## 1. Security Model Training Demo

Demonstrate training ML models on security log data for threat detection.

In [None]:
# Initialize the Security Model Trainer
trainer = SecurityModelTrainer(model_dir="../models")

# Create sample security log data
print("Creating sample security log data...")
security_logs = create_sample_security_data(1000)

print(f"Generated {len(security_logs)} security log entries")
print("\nSample data:")
security_logs.head()

In [None]:
# Visualize the security log data
fig, axes = plt.subplots(2, 2, figsize=(15, 10))

# Login status distribution
security_logs['status'].value_counts().plot(kind='bar', ax=axes[0,0], title='Login Status Distribution')

# Failed login patterns by hour
security_logs['hour'] = pd.to_datetime(security_logs['timestamp']).dt.hour
failed_by_hour = security_logs[security_logs['status'] == 'failed'].groupby('hour').size()
failed_by_hour.plot(kind='line', ax=axes[0,1], title='Failed Logins by Hour')

# User activity distribution
security_logs['user_id'].value_counts().plot(kind='bar', ax=axes[1,0], title='User Activity Distribution')

# IP address distribution
security_logs['ip_address'].value_counts().plot(kind='bar', ax=axes[1,1], title='IP Address Distribution')

plt.tight_layout()
plt.show()

In [None]:
# Train anomaly detection model
print("Training anomaly detection model...")
anomaly_results = trainer.train_anomaly_detector(security_logs, "demo_anomaly_model")

print("\nAnomaly Detection Results:")
for key, value in anomaly_results.items():
    print(f"{key}: {value}")

In [None]:
# Train classification model for threat detection
print("Training threat classification model...")
classification_results = trainer.train_classification_model(security_logs, "demo_threat_classifier")

print("\nThreat Classification Results:")
for key, value in classification_results.items():
    print(f"{key}: {value}")

## 2. Real-time Anomaly Detection Demo

Demonstrate real-time anomaly detection on system metrics and user behavior.

In [None]:
# Initialize anomaly detector
detector = AnomalyDetector()

# Simulate real-time system monitoring
print("Simulating real-time system monitoring...")
all_alerts = []
system_metrics_history = []

for i in range(20):
    # Generate system metrics
    metrics = create_sample_system_metrics()
    system_metrics_history.append(metrics)
    
    # Detect anomalies
    alerts = detector.detect_system_metric_anomalies(metrics)
    all_alerts.extend(alerts)
    
    if alerts:
        print(f"Iteration {i+1}: {len(alerts)} alerts detected")
        for alert in alerts:
            print(f"  - {alert.severity.upper()}: {alert.description}")

print(f"\nTotal alerts generated: {len(all_alerts)}")

In [None]:
# Visualize system metrics and anomalies
metrics_df = pd.DataFrame(system_metrics_history)

fig, axes = plt.subplots(2, 2, figsize=(15, 10))

# Plot each metric
metrics_df['cpu_usage'].plot(ax=axes[0,0], title='CPU Usage Over Time')
axes[0,0].set_ylabel('CPU %')

metrics_df['memory_usage'].plot(ax=axes[0,1], title='Memory Usage Over Time')
axes[0,1].set_ylabel('Memory %')

metrics_df['network_throughput'].plot(ax=axes[1,0], title='Network Throughput Over Time')
axes[1,0].set_ylabel('Throughput')

metrics_df['active_connections'].plot(ax=axes[1,1], title='Active Connections Over Time')
axes[1,1].set_ylabel('Connections')

plt.tight_layout()
plt.show()

In [None]:
# Get alert summary
alert_summary = detector.get_alert_summary(hours=24)

print("Alert Summary (Last 24 hours):")
print(f"Total alerts: {alert_summary['total_alerts']}")
print(f"By severity: {alert_summary['by_severity']}")
print(f"By type: {alert_summary['by_type']}")

if alert_summary['latest_alert']:
    print(f"Latest alert: {alert_summary['latest_alert']}")

## 3. Risk Scoring Engine Demo

Demonstrate predictive risk scoring for users, sessions, and IP addresses.

In [None]:
# Initialize risk scoring engine
risk_engine = RiskScoreEngine(model_dir="../models")

# Create sample training data for risk models
print("Creating sample training data for risk models...")
training_data = create_sample_training_data(500)

print(f"Generated {len(training_data)} training samples")
print("\nSample training data:")
training_data.head()

In [None]:
# Visualize risk score distribution
fig, axes = plt.subplots(2, 2, figsize=(15, 10))

# Risk score distribution
training_data['risk_score'].hist(bins=30, ax=axes[0,0], title='Risk Score Distribution')
axes[0,0].set_xlabel('Risk Score')
axes[0,0].set_ylabel('Frequency')

# Risk vs Failed Login Ratio
axes[0,1].scatter(training_data['failed_login_ratio'], training_data['risk_score'], alpha=0.6)
axes[0,1].set_xlabel('Failed Login Ratio')
axes[0,1].set_ylabel('Risk Score')
axes[0,1].set_title('Risk Score vs Failed Login Ratio')

# Risk vs Privilege Escalation
axes[1,0].scatter(training_data['privilege_escalation_attempts'], training_data['risk_score'], alpha=0.6)
axes[1,0].set_xlabel('Privilege Escalation Attempts')
axes[1,0].set_ylabel('Risk Score')
axes[1,0].set_title('Risk Score vs Privilege Escalation')

# Risk vs External IP Usage
axes[1,1].scatter(training_data['external_ip_usage'], training_data['risk_score'], alpha=0.6)
axes[1,1].set_xlabel('External IP Usage')
axes[1,1].set_ylabel('Risk Score')
axes[1,1].set_title('Risk Score vs External IP Usage')

plt.tight_layout()
plt.show()

In [None]:
# Train risk scoring models
print("Training risk scoring models...")
training_results = risk_engine.train_risk_models(training_data)

print("\nTraining Results:")
print(f"Best model: {training_results['best_model']}")
print(f"Total samples: {training_results['total_samples']}")
print(f"Feature count: {training_results['feature_count']}")

print("\nModel Performance:")
for model_name, metrics in training_results['model_results'].items():
    print(f"{model_name}:")
    print(f"  - MAE: {metrics['mae']:.4f}")
    print(f"  - MSE: {metrics['mse']:.4f}")
    print(f"  - R²: {metrics['r2']:.4f}")

In [None]:
# Test user risk scoring
print("Testing user risk scoring...")

# Get sample user data
sample_user_data = training_data[training_data['entity_id'] == 'user_5'].copy()
sample_user_data['timestamp'] = pd.date_range('2024-01-01', periods=len(sample_user_data), freq='H')
sample_user_data['ip_address'] = ['192.168.1.10'] * len(sample_user_data)
sample_user_data['status'] = ['success'] * len(sample_user_data)

# Calculate risk score
user_risk = risk_engine.calculate_user_risk_score('user_5', sample_user_data)

print(f"\nUser Risk Assessment for {user_risk.entity_id}:")
print(f"Risk Score: {user_risk.risk_score:.3f}")
print(f"Risk Level: {user_risk.risk_level.value.upper()}")
print(f"Confidence: {user_risk.confidence:.3f}")
print(f"Prediction Horizon: {user_risk.prediction_horizon}")

print(f"\nRisk Factors:")
for factor, value in user_risk.factors.items():
    print(f"  - {factor}: {value:.3f}")

print(f"\nRecommendations:")
for rec in user_risk.recommendations:
    print(f"  - {rec}")

In [None]:
# Test session risk scoring
print("Testing session risk scoring...")

# Create sample session data
sample_sessions = [
    {
        'session_id': 'session_normal',
        'duration_minutes': 30,
        'pages_accessed': 5,
        'failed_attempts': 0,
        'ip_address': '192.168.1.100',
        'new_device': False,
        'user_agent': 'Mozilla/5.0...'
    },
    {
        'session_id': 'session_suspicious',
        'duration_minutes': 120,
        'pages_accessed': 50,
        'failed_attempts': 5,
        'ip_address': '203.0.113.1',
        'new_device': True,
        'user_agent': 'bot/1.0'
    }
]

for session_data in sample_sessions:
    session_id = session_data.pop('session_id')
    session_risk = risk_engine.calculate_session_risk_score(session_id, session_data)
    
    print(f"\nSession Risk Assessment for {session_risk.entity_id}:")
    print(f"Risk Score: {session_risk.risk_score:.3f}")
    print(f"Risk Level: {session_risk.risk_level.value.upper()}")
    print(f"Recommendations: {', '.join(session_risk.recommendations)}")

## 4. Integration Demo

Demonstrate how all ML components work together for comprehensive security monitoring.

In [None]:
# Comprehensive security monitoring simulation
print("Running comprehensive security monitoring simulation...")

# Simulate 24 hours of monitoring
monitoring_results = {
    'anomaly_alerts': [],
    'risk_assessments': [],
    'model_predictions': []
}

for hour in range(24):
    print(f"\nHour {hour:02d}:00 - Security Monitoring")
    
    # 1. System metrics monitoring
    metrics = create_sample_system_metrics()
    anomaly_alerts = detector.detect_system_metric_anomalies(metrics)
    
    if anomaly_alerts:
        monitoring_results['anomaly_alerts'].extend(anomaly_alerts)
        print(f"  🚨 {len(anomaly_alerts)} anomalies detected")
    
    # 2. User risk assessment (simulate random user)
    if hour % 4 == 0:  # Assess users every 4 hours
        user_id = f"user_{hour // 4}"
        user_data = training_data[training_data['entity_id'] == 'user_1'].head(10).copy()
        user_data['timestamp'] = pd.date_range('2024-01-01', periods=len(user_data), freq='H')
        
        user_risk = risk_engine.calculate_user_risk_score(user_id, user_data)
        monitoring_results['risk_assessments'].append(user_risk)
        
        if user_risk.risk_level.value in ['high', 'critical']:
            print(f"  ⚠️  High risk user detected: {user_id} (Risk: {user_risk.risk_score:.3f})")

print(f"\n📊 Monitoring Summary:")
print(f"Total anomaly alerts: {len(monitoring_results['anomaly_alerts'])}")
print(f"Total risk assessments: {len(monitoring_results['risk_assessments'])}")

# Alert severity breakdown
severity_counts = {}
for alert in monitoring_results['anomaly_alerts']:
    severity_counts[alert.severity] = severity_counts.get(alert.severity, 0) + 1

print(f"Alert severity breakdown: {severity_counts}")

# Risk level breakdown
risk_counts = {}
for assessment in monitoring_results['risk_assessments']:
    level = assessment.risk_level.value
    risk_counts[level] = risk_counts.get(level, 0) + 1

print(f"Risk level breakdown: {risk_counts}")

In [None]:
# Create final summary visualization
fig, axes = plt.subplots(2, 2, figsize=(15, 10))

# Alert severity distribution
if severity_counts:
    pd.Series(severity_counts).plot(kind='bar', ax=axes[0,0], title='Anomaly Alerts by Severity')
    axes[0,0].set_ylabel('Count')
    axes[0,0].tick_params(axis='x', rotation=45)

# Risk level distribution
if risk_counts:
    pd.Series(risk_counts).plot(kind='bar', ax=axes[0,1], title='Risk Assessments by Level')
    axes[0,1].set_ylabel('Count')
    axes[0,1].tick_params(axis='x', rotation=45)

# Risk scores over time
if monitoring_results['risk_assessments']:
    risk_scores = [r.risk_score for r in monitoring_results['risk_assessments']]
    pd.Series(risk_scores).plot(ax=axes[1,0], title='Risk Scores Over Time')
    axes[1,0].set_ylabel('Risk Score')
    axes[1,0].set_xlabel('Assessment #')

# Alert timeline
if monitoring_results['anomaly_alerts']:
    alert_times = [i for i, alert in enumerate(monitoring_results['anomaly_alerts'])]
    alert_scores = [alert.score for alert in monitoring_results['anomaly_alerts']]
    axes[1,1].scatter(alert_times, alert_scores, alpha=0.7)
    axes[1,1].set_title('Anomaly Alert Scores Timeline')
    axes[1,1].set_xlabel('Alert #')
    axes[1,1].set_ylabel('Anomaly Score')

plt.tight_layout()
plt.show()

print("\n✅ AI/ML Integration Demo Complete!")
print("\nThe NoxSuite Security Platform now includes:")
print("- Advanced ML-based anomaly detection")
print("- Predictive risk scoring for users and sessions")
print("- Real-time threat assessment capabilities")
print("- Comprehensive security model training")