# SOC Alert Analysis and Visualization

This notebook provides visualizations and analysis of security alerts from our SOC environment, including:
1. Alert trends over time
2. Severity distribution
3. XAI explanations
4. Geographic distribution
5. Attack type patterns

## Setup and Requirements
First, let's import the required libraries and set up our connections.

In [None]:
import pandas as pd
import numpy as np
import plotly.express as px
import plotly.graph_objects as go
from datetime import datetime, timedelta
import json
import requests
from pathlib import Path

# Load configuration
with open('../config.json') as f:
    config = json.load(f)

# Set up authentication for Sentinel
workspace_id = config['sentinel']['workspaceId']
workspace_key = config['sentinel']['primaryKey']

## Data Collection
Let's fetch alerts from both Wazuh (via Elasticsearch) and Sentinel for the past 24 hours.

In [None]:
def fetch_elasticsearch_alerts(hours=24):
    """Fetch alerts from Elasticsearch."""
    es_host = config['elasticsearch']['host']
    es_port = config['elasticsearch']['port']
    es_user = config['elasticsearch']['username']
    es_pass = config['elasticsearch']['password']
    
    # Calculate time range
    end_time = datetime.utcnow()
    start_time = end_time - timedelta(hours=hours)
    
    # Elasticsearch query
    query = {
        "query": {
            "range": {
                "@timestamp": {
                    "gte": start_time.isoformat(),
                    "lte": end_time.isoformat()
                }
            }
        },
        "sort": [{"@timestamp": "desc"}],
        "size": 10000
    }
    
    # Make request to Elasticsearch
    url = f"http://{es_host}:{es_port}/wazuh-alerts-*/_search"
    response = requests.get(url, json=query, auth=(es_user, es_pass))
    
    if response.status_code == 200:
        hits = response.json()['hits']['hits']
        alerts = [hit['_source'] for hit in hits]
        return pd.DataFrame(alerts)
    else:
        print(f"Error fetching alerts: {response.status_code}")
        return pd.DataFrame()

# Fetch alerts
df_alerts = fetch_elasticsearch_alerts()
print(f"Retrieved {len(df_alerts)} alerts")

## Alert Trends
Let's visualize the alert trends over time, including:
1. Alert frequency by hour
2. Severity distribution
3. Top alert rules

In [None]:
# Convert timestamp to datetime and set as index
df_alerts['@timestamp'] = pd.to_datetime(df_alerts['@timestamp'])
df_alerts.set_index('@timestamp', inplace=True)

# Alert frequency by hour
hourly_alerts = df_alerts.resample('H').size()

# Create time series plot
fig = px.line(hourly_alerts, 
              title='Alert Frequency Over Time',
              labels={'index': 'Time', 'value': 'Number of Alerts'})
fig.show()

# Severity distribution
fig_severity = px.histogram(df_alerts, 
                          x='rule.level',
                          title='Alert Severity Distribution',
                          labels={'rule.level': 'Severity Level', 'count': 'Number of Alerts'},
                          nbins=20)
fig_severity.show()

# Top alert rules
top_rules = df_alerts['rule.description'].value_counts().head(10)
fig_rules = px.bar(top_rules,
                  title='Top 10 Alert Rules',
                  labels={'index': 'Rule Description', 'value': 'Count'})
fig_rules.update_layout(xaxis_tickangle=45)
fig_rules.show()

## XAI Analysis
Now let's examine the XAI explanations for high-severity alerts to understand the model's decision-making process.

In [None]:
# Filter high-severity alerts (level > 10)
high_severity = df_alerts[df_alerts['rule.level'] > 10].copy()

# Extract XAI features and explanations
if 'xai_analysis' in high_severity.columns:
    # Create feature importance plot
    feature_importance = pd.DataFrame([
        alert['xai_analysis'].get('feature_importance', [])
        for alert in high_severity['xai_analysis']
    ])
    
    # Average feature importance across all high-severity alerts
    avg_importance = feature_importance.mean()
    
    # Plot feature importance
    fig_xai = px.bar(avg_importance,
                     title='Average Feature Importance for High-Severity Alerts',
                     labels={'index': 'Feature', 'value': 'Importance Score'})
    fig_xai.update_layout(xaxis_tickangle=45)
    fig_xai.show()
    
    # Show sample explanations
    print("\nSample XAI Explanations for High-Severity Alerts:")
    for idx, row in high_severity.head().iterrows():
        print(f"\nAlert at {idx}:")
        print(f"Rule: {row['rule.description']}")
        print(f"XAI Explanation: {row['xai_analysis'].get('explanation_text', 'No explanation available')}")
else:
    print("No XAI analysis found in the alerts")

## Geographic Distribution
Let's visualize the geographic distribution of alerts using source IP addresses.

In [None]:
# Extract source IPs and their frequencies
if 'source_ip' in df_alerts.columns:
    ip_counts = df_alerts['source_ip'].value_counts()
    
    # Create choropleth map
    # Note: This assumes we have a way to map IPs to countries
    # You might want to use a geolocation service or database
    
    fig_geo = go.Figure(data=go.Choropleth(
        # Add your geolocation data here
        locationmode='country names',
        colorbar_title='Number of Alerts'
    ))
    
    fig_geo.update_layout(
        title='Geographic Distribution of Alert Sources',
        geo=dict(showframe=False, showcoastlines=True, projection_type='equirectangular'),
    )
    fig_geo.show()
    
    # Show top source countries
    print("\nTop Source Countries:")
    # Add your country resolution logic here
else:
    print("No source IP information found in alerts")

## Summary and Recommendations
Based on the analysis above, let's generate some security recommendations and insights.

In [None]:
# Generate summary statistics
summary = {
    'total_alerts': len(df_alerts),
    'high_severity': len(df_alerts[df_alerts['rule.level'] > 10]),
    'unique_sources': df_alerts['source_ip'].nunique() if 'source_ip' in df_alerts.columns else 0,
    'top_rules': df_alerts['rule.description'].value_counts().head(5).to_dict()
}

print("Alert Analysis Summary:")
print(f"Total Alerts: {summary['total_alerts']}")
print(f"High Severity Alerts: {summary['high_severity']}")
print(f"Unique Source IPs: {summary['unique_sources']}")
print("\nTop Alert Rules:")
for rule, count in summary['top_rules'].items():
    print(f"- {rule}: {count}")

# Generate recommendations based on the analysis
print("\nRecommendations:")
if summary['high_severity'] > 0:
    print("- High priority: Review and address high-severity alerts")
    print("- Consider adjusting detection thresholds for frequently triggering rules")
    print("- Investigate patterns in source IPs for potential targeted attacks")
    
if 'xai_analysis' in df_alerts.columns:
    print("- Review XAI explanations for insight into alert patterns")
    print("- Use feature importance data to tune detection models")