In [1]:
# Notebook 04: Detection Simulation.ipynb

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

# Load healthcare logs
df = pd.read_csv("../data/healthcare_logs.csv")

# --- Step 1: Rebuild detection features (robust standalone) ---

eu_countries = [
    'AT', 'BE', 'BG', 'CY', 'CZ', 'DE', 'DK', 'EE', 'ES', 'FI',
    'FR', 'GR', 'HR', 'HU', 'IE', 'IT', 'LT', 'LU', 'LV', 'MT',
    'NL', 'PL', 'PT', 'RO', 'SE', 'SI', 'SK'
]
df["non_eu_flag"] = ~df["Country"].isin(eu_countries)
df["consent_violation_flag"] = df["ConsentStatus"] == False
df["timestamp"] = pd.to_datetime(df["Timestamp"], errors='coerce')
df["hour"] = df["timestamp"].dt.hour
df["admin_access_outside_hours"] = (
    (df["Role"] == "admin") &
    ((df["hour"] < 6) | (df["hour"] > 22))
)

# --- Step 2: Risk Scoring ---

df["risk_score"] = (
    df["non_eu_flag"].astype(int) +
    df["consent_violation_flag"].astype(int) +
    df["admin_access_outside_hours"].astype(int)
)

df["risk_level"] = df["risk_score"].apply(
    lambda x: "High" if x >= 2 else ("Medium" if x == 1 else "Low")
)

# --- Step 3: Simulated Alerts Table ---

alerts = df[df["risk_score"] >= 1].copy()
alerts["alert_message"] = (
    "Risk=" + alerts["risk_level"] +
    " | Consent=" + alerts["consent_violation_flag"].astype(str) +
    " | Non-EU=" + alerts["non_eu_flag"].astype(str) +
    " | AdminNight=" + alerts["admin_access_outside_hours"].astype(str)
)

# Display sample alerts
alerts[["UserID", "Timestamp", "Role", "Country", "risk_score", "risk_level", "alert_message"]].head()


Unnamed: 0,UserID,Timestamp,Role,Country,risk_score,risk_level,alert_message
2,U003,2024-02-12T10:00:00Z,clinician,FR,1,Medium,Risk=Medium | Consent=True | Non-EU=False | Ad...
5,U006,2024-02-14T07:00:00Z,external_vendor,FR,1,Medium,Risk=Medium | Consent=True | Non-EU=False | Ad...
7,U008,2024-01-13T04:00:00Z,external_vendor,CN,2,High,Risk=High | Consent=True | Non-EU=True | Admin...
8,U009,2024-02-21T18:00:00Z,clinician,CA,1,Medium,Risk=Medium | Consent=False | Non-EU=True | Ad...
10,U011,2024-02-20T04:00:00Z,admin,CN,3,High,Risk=High | Consent=True | Non-EU=True | Admin...


In [2]:
# Export alerts to CSV
alerts.to_csv("../data/alerts_simulated.csv", index=False)

print(f"Export successful – {len(alerts)} alerts written to /data/alerts_simulated.csv")


Export successful – 65 alerts written to /data/alerts_simulated.csv


In [6]:
from datetime import datetime, timezone
import uuid

# Table for SIEM
alerts_export = alerts.copy()

# Generate unique alert ID (UUID truncated to 8 chars)
alerts_export["alert_id"] = [str(uuid.uuid4())[:8] for _ in range(len(alerts_export))]

# Rule matching (based on flags)
def map_rule_id(row):
    if row["non_eu_flag"]:
        return "UC-01"
    elif row["consent_violation_flag"]:
        return "UC-03"
    elif row["admin_access_outside_hours"]:
        return "UC-05"
    else:
        return "UC-00"  # fallback (none matched)

alerts_export["rule_id"] = alerts_export.apply(map_rule_id, axis=1)

# Add detection timestamp (UTC, timezone-aware)
alerts_export["detection_timestamp"] = datetime.now(timezone.utc).isoformat()

# Reorder columns for SIEM readiness
alerts_export = alerts_export[
    ["alert_id", "rule_id", "detection_timestamp", "UserID", "Timestamp", "Country", "Role",
     "risk_score", "risk_level", "alert_message"]
]

# Export as CSV
alerts_export.to_csv("../data/alerts_siem_ready.csv", index=False)

print(f"SIEM export successful – {len(alerts_export)} alerts written to /data/alerts_siem_ready.csv")


SIEM export successful – 65 alerts written to /data/alerts_siem_ready.csv
