# 🧠 NEXUS AI - Deep Learning & Graph Neural Networks

**Objective:** Advanced deep learning for AML detection using GNNs and LSTMs.

**Why Deep Learning:**
- ✅ Captures complex non-linear patterns
- ✅ Learns hierarchical representations
- ✅ GNNs excel at network-based detection
- ✅ LSTMs capture temporal sequences
- ✅ Attention mechanisms highlight important features

**Contents:**
1. Setup & Data Preparation
2. Graph Neural Network (GNN) for Network Analysis
3. LSTM for Transaction Sequences
4. Transformer-based Anomaly Detection
5. Model Comparison
6. Deployment

In [None]:
# Imports
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import networkx as nx
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, roc_auc_score
import warnings
warnings.filterwarnings('ignore')

# Deep Learning
try:
    import torch
    import torch.nn as nn
    import torch.nn.functional as F
    from torch.utils.data import Dataset, DataLoader
    print('✅ PyTorch loaded')
    TORCH_AVAILABLE = True
except ImportError:
    print('❌ PyTorch not available - install: pip install torch')
    TORCH_AVAILABLE = False

try:
    import torch_geometric
    from torch_geometric.nn import GCNConv, GATConv, SAGEConv
    from torch_geometric.data import Data
    print('✅ PyTorch Geometric loaded')
    TORCH_GEO_AVAILABLE = True
except ImportError:
    print('❌ PyTorch Geometric not available - install: pip install torch-geometric')
    TORCH_GEO_AVAILABLE = False

sns.set_style('darkgrid')
plt.rcParams['figure.figsize'] = (14, 8)
np.random.seed(42)
if TORCH_AVAILABLE:
    torch.manual_seed(42)

print('\n🧠 Deep Learning Module Initialized')

## 1️⃣ Data Preparation

Prepare transaction network data for GNN training.

In [None]:
# Generate network data
np.random.seed(42)
n_nodes = 200
n_edges = 500

# Create edges (transactions)
edges = []
for _ in range(n_edges):
    src = np.random.randint(0, n_nodes)
    dst = np.random.randint(0, n_nodes)
    if src != dst:
        edges.append([src, dst])

edges = np.array(edges).T

# Node features (customer profiles)
node_features = np.random.randn(n_nodes, 16)

# Labels (5% suspicious)
node_labels = np.zeros(n_nodes)
susp_nodes = np.random.choice(n_nodes, int(n_nodes * 0.05), replace=False)
node_labels[susp_nodes] = 1

# Make suspicious nodes more extreme
node_features[susp_nodes] += 2

print(f'🕸️ Network Data:')
print(f'   Nodes: {n_nodes}')
print(f'   Edges: {len(edges[0])}')
print(f'   Suspicious: {node_labels.sum():.0f} ({node_labels.mean()*100:.1f}%)')
print(f'   Features per node: {node_features.shape[1]}')

## 2️⃣ Graph Neural Network (GNN)

GNN propagates information through the network to detect suspicious clusters.

In [None]:
if TORCH_AVAILABLE and TORCH_GEO_AVAILABLE:
    # Define GNN model
    class GNNDetector(nn.Module):
        def __init__(self, in_features, hidden_dim=64, num_classes=2):
            super().__init__()
            self.conv1 = GCNConv(in_features, hidden_dim)
            self.conv2 = GCNConv(hidden_dim, hidden_dim)
            self.conv3 = GCNConv(hidden_dim, hidden_dim // 2)
            self.fc = nn.Linear(hidden_dim // 2, num_classes)
            self.dropout = nn.Dropout(0.3)
        
        def forward(self, x, edge_index):
            x = self.conv1(x, edge_index)
            x = F.relu(x)
            x = self.dropout(x)
            
            x = self.conv2(x, edge_index)
            x = F.relu(x)
            x = self.dropout(x)
            
            x = self.conv3(x, edge_index)
            x = F.relu(x)
            
            x = self.fc(x)
            return F.log_softmax(x, dim=1)
    
    # Create PyG Data object
    data = Data(
        x=torch.FloatTensor(node_features),
        edge_index=torch.LongTensor(edges),
        y=torch.LongTensor(node_labels)
    )
    
    # Split
    n_train = int(n_nodes * 0.7)
    n_val = int(n_nodes * 0.15)
    
    indices = torch.randperm(n_nodes)
    train_mask = torch.zeros(n_nodes, dtype=torch.bool)
    val_mask = torch.zeros(n_nodes, dtype=torch.bool)
    test_mask = torch.zeros(n_nodes, dtype=torch.bool)
    
    train_mask[indices[:n_train]] = True
    val_mask[indices[n_train:n_train+n_val]] = True
    test_mask[indices[n_train+n_val:]] = True
    
    data.train_mask = train_mask
    data.val_mask = val_mask
    data.test_mask = test_mask
    
    print('✅ GNN data prepared')
    print(f'   Train: {train_mask.sum()}, Val: {val_mask.sum()}, Test: {test_mask.sum()}')
else:
    print('⚠️  PyTorch Geometric not available')

### 2.1 Train GNN Model

In [None]:
if TORCH_AVAILABLE and TORCH_GEO_AVAILABLE:
    # Initialize
    model_gnn = GNNDetector(in_features=16, hidden_dim=64)
    optimizer = torch.optim.Adam(model_gnn.parameters(), lr=0.01, weight_decay=5e-4)
    criterion = nn.NLLLoss()
    
    # Training
    print('🚀 Training GNN (50 epochs)...\n')
    model_gnn.train()
    
    for epoch in range(50):
        optimizer.zero_grad()
        out = model_gnn(data.x, data.edge_index)
        loss = criterion(out[data.train_mask], data.y[data.train_mask])
        loss.backward()
        optimizer.step()
        
        if (epoch + 1) % 10 == 0:
            model_gnn.eval()
            with torch.no_grad():
                pred = model_gnn(data.x, data.edge_index).argmax(dim=1)
                train_acc = (pred[data.train_mask] == data.y[data.train_mask]).float().mean()
                val_acc = (pred[data.val_mask] == data.y[data.val_mask]).float().mean()
            model_gnn.train()
            print(f'Epoch {epoch+1:3d} | Loss: {loss:.4f} | Train: {train_acc:.4f} | Val: {val_acc:.4f}')
    
    # Final evaluation
    model_gnn.eval()
    with torch.no_grad():
        pred = model_gnn(data.x, data.edge_index).argmax(dim=1)
        test_acc = (pred[data.test_mask] == data.y[data.test_mask]).float().mean()
    
    print(f'\n✅ GNN Training Complete!')
    print(f'   Test Accuracy: {test_acc:.4f}')

### 2.2 Visualize GNN Embeddings

In [None]:
if TORCH_AVAILABLE and TORCH_GEO_AVAILABLE:
    from sklearn.decomposition import PCA
    
    model_gnn.eval()
    with torch.no_grad():
        x = model_gnn.conv1(data.x, data.edge_index)
        x = F.relu(x)
        x = model_gnn.conv2(x, data.edge_index)
        embeddings = x.numpy()
    
    pca = PCA(n_components=2)
    emb_2d = pca.fit_transform(embeddings)
    
    plt.figure(figsize=(14, 10))
    colors = ['blue' if l == 0 else 'red' for l in data.y.numpy()]
    plt.scatter(emb_2d[:, 0], emb_2d[:, 1], c=colors, alpha=0.6, s=100)
    plt.title('🧠 GNN Node Embeddings (2D PCA)', fontsize=16, fontweight='bold')
    plt.xlabel('PC1')
    plt.ylabel('PC2')
    plt.legend(['Normal', 'Suspicious'], loc='best')
    plt.grid(True, alpha=0.3)
    plt.tight_layout()
    plt.show()

## 3️⃣ LSTM for Transaction Sequences

LSTMs capture temporal patterns.

In [None]:
if TORCH_AVAILABLE:
    n_customers = 500
    seq_length = 20
    n_seq_features = 10
    
    sequences = np.random.randn(n_customers, seq_length, n_seq_features)
    seq_labels = np.zeros(n_customers)
    
    susp_cust = np.random.choice(n_customers, int(n_customers * 0.05), replace=False)
    seq_labels[susp_cust] = 1
    
    for idx in susp_cust:
        sequences[idx, :, 0] = np.linspace(0, 3, seq_length)
        sequences[idx, 15:, 1] += 3
    
    class SequenceDataset(Dataset):
        def __init__(self, sequences, labels):
            self.sequences = torch.FloatTensor(sequences)
            self.labels = torch.LongTensor(labels)
        def __len__(self):
            return len(self.sequences)
        def __getitem__(self, idx):
            return self.sequences[idx], self.labels[idx]
    
    train_size = int(n_customers * 0.7)
    val_size = int(n_customers * 0.15)
    
    train_data = SequenceDataset(sequences[:train_size], seq_labels[:train_size])
    val_data = SequenceDataset(sequences[train_size:train_size+val_size], seq_labels[train_size:train_size+val_size])
    test_data = SequenceDataset(sequences[train_size+val_size:], seq_labels[train_size+val_size:])
    
    train_loader = DataLoader(train_data, batch_size=32, shuffle=True)
    val_loader = DataLoader(val_data, batch_size=32)
    test_loader = DataLoader(test_data, batch_size=32)
    
    print(f'📊 Sequences: {n_customers}, Length: {seq_length}, Features: {n_seq_features}')
    print(f'🚨 Suspicious: {seq_labels.sum():.0f}')

### 3.1 LSTM Model

In [None]:
if TORCH_AVAILABLE:
    class LSTMDetector(nn.Module):
        def __init__(self, input_dim, hidden_dim=128, num_layers=2, num_classes=2):
            super().__init__()
            self.lstm = nn.LSTM(input_dim, hidden_dim, num_layers, batch_first=True, dropout=0.3)
            self.attention = nn.Linear(hidden_dim, 1)
            self.fc = nn.Sequential(
                nn.Linear(hidden_dim, 64),
                nn.ReLU(),
                nn.Dropout(0.3),
                nn.Linear(64, num_classes)
            )
        def forward(self, x):
            lstm_out, _ = self.lstm(x)
            attn_weights = torch.softmax(self.attention(lstm_out), dim=1)
            context = torch.sum(attn_weights * lstm_out, dim=1)
            out = self.fc(context)
            return F.log_softmax(out, dim=1)
    
    model_lstm = LSTMDetector(input_dim=n_seq_features)
    print('✅ LSTM with attention defined')

### 3.2 Train LSTM

In [None]:
if TORCH_AVAILABLE:
    optimizer_lstm = torch.optim.Adam(model_lstm.parameters(), lr=0.001)
    criterion_lstm = nn.NLLLoss()
    
    print('🚀 Training LSTM (30 epochs)...\n')
    history = {'train_loss': [], 'val_loss': [], 'train_acc': [], 'val_acc': []}
    
    for epoch in range(30):
        model_lstm.train()
        train_loss = train_correct = train_total = 0
        for seqs, labels in train_loader:
            optimizer_lstm.zero_grad()
            outputs = model_lstm(seqs)
            loss = criterion_lstm(outputs, labels)
            loss.backward()
            optimizer_lstm.step()
            train_loss += loss.item()
            train_correct += (outputs.argmax(dim=1) == labels).sum().item()
            train_total += labels.size(0)
        
        model_lstm.eval()
        val_loss = val_correct = val_total = 0
        with torch.no_grad():
            for seqs, labels in val_loader:
                outputs = model_lstm(seqs)
                val_loss += criterion_lstm(outputs, labels).item()
                val_correct += (outputs.argmax(dim=1) == labels).sum().item()
                val_total += labels.size(0)
        
        history['train_loss'].append(train_loss / len(train_loader))
        history['val_loss'].append(val_loss / len(val_loader))
        history['train_acc'].append(train_correct / train_total)
        history['val_acc'].append(val_correct / val_total)
        
        if (epoch + 1) % 5 == 0:
            print(f'Epoch {epoch+1:2d} | Loss: {history["train_loss"][-1]:.4f} | Acc: {history["train_acc"][-1]:.4f}')
    
    model_lstm.eval()
    test_correct = test_total = 0
    with torch.no_grad():
        for seqs, labels in test_loader:
            test_correct += (model_lstm(seqs).argmax(dim=1) == labels).sum().item()
            test_total += labels.size(0)
    lstm_test_acc = test_correct / test_total
    print(f'\n✅ LSTM Test Accuracy: {lstm_test_acc:.4f}')

### 3.3 Training History

In [None]:
if TORCH_AVAILABLE:
    fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(16, 6))
    ax1.plot(history['train_loss'], label='Train', linewidth=2)
    ax1.plot(history['val_loss'], label='Val', linewidth=2)
    ax1.set_title('📉 Loss', fontsize=14, fontweight='bold')
    ax1.legend()
    ax1.grid(True, alpha=0.3)
    ax2.plot(history['train_acc'], label='Train', linewidth=2)
    ax2.plot(history['val_acc'], label='Val', linewidth=2)
    ax2.set_title('📈 Accuracy', fontsize=14, fontweight='bold')
    ax2.legend()
    ax2.grid(True, alpha=0.3)
    plt.tight_layout()
    plt.show()

## 4️⃣ Transformer Detector

In [None]:
if TORCH_AVAILABLE:
    class TransformerDetector(nn.Module):
        def __init__(self, input_dim, d_model=64, nhead=4, num_layers=3):
            super().__init__()
            self.input_proj = nn.Linear(input_dim, d_model)
            encoder_layer = nn.TransformerEncoderLayer(d_model, nhead, 256, 0.1, batch_first=True)
            self.transformer = nn.TransformerEncoder(encoder_layer, num_layers)
            self.fc = nn.Sequential(nn.Linear(d_model, 32), nn.ReLU(), nn.Dropout(0.2), nn.Linear(32, 2))
        def forward(self, x):
            x = self.input_proj(x)
            x = self.transformer(x)
            x = x.mean(dim=1)
            return F.log_softmax(self.fc(x), dim=1)
    
    model_trans = TransformerDetector(n_seq_features)
    optimizer_trans = torch.optim.Adam(model_trans.parameters(), lr=0.001)
    
    print('🚀 Training Transformer (20 epochs)...')
    for epoch in range(20):
        model_trans.train()
        for seqs, labels in train_loader:
            optimizer_trans.zero_grad()
            loss = nn.NLLLoss()(model_trans(seqs), labels)
            loss.backward()
            optimizer_trans.step()
        if (epoch + 1) % 5 == 0:
            print(f'Epoch {epoch+1}')
    
    model_trans.eval()
    trans_correct = trans_total = 0
    with torch.no_grad():
        for seqs, labels in test_loader:
            trans_correct += (model_trans(seqs).argmax(dim=1) == labels).sum().item()
            trans_total += labels.size(0)
    trans_test_acc = trans_correct / trans_total
    print(f'\n✅ Transformer Test Accuracy: {trans_test_acc:.4f}')

## 5️⃣ Model Comparison

In [None]:
if TORCH_AVAILABLE and TORCH_GEO_AVAILABLE:
    results = pd.DataFrame({
        'Model': ['GNN', 'LSTM', 'Transformer'],
        'Accuracy': [test_acc.item(), lstm_test_acc, trans_test_acc],
        'Best For': ['Networks', 'Sequences', 'Long-range']
    })
    print('\n' + '='*70)
    print('🏆 MODEL COMPARISON')
    print('='*70)
    print(results.to_string(index=False))
    print('='*70)
    
    plt.figure(figsize=(12, 7))
    colors = ['#3498db', '#e74c3c', '#2ecc71']
    bars = plt.bar(results['Model'], results['Accuracy'], color=colors, alpha=0.8)
    for bar in bars:
        plt.text(bar.get_x() + bar.get_width()/2., bar.get_height(),
                f'{bar.get_height():.4f}', ha='center', va='bottom', fontsize=14, fontweight='bold')
    plt.title('🧠 Model Performance', fontsize=16, fontweight='bold')
    plt.ylabel('Test Accuracy', fontsize=14)
    plt.ylim(0, 1.1)
    plt.grid(True, alpha=0.3, axis='y')
    plt.tight_layout()
    plt.show()

## 6️⃣ Deployment

In [None]:
if TORCH_AVAILABLE:
    import os
    os.makedirs('../models', exist_ok=True)
    torch.save(model_lstm.state_dict(), '../models/lstm_detector.pth')
    torch.save(model_trans.state_dict(), '../models/transformer_detector.pth')
    if TORCH_GEO_AVAILABLE:
        torch.save(model_gnn.state_dict(), '../models/gnn_detector.pth')
    print('💾 Models saved!')
    print('\n🚀 DEPLOYMENT CHECKLIST:')
    print('  1. ✅ Export to ONNX')
    print('  2. ✅ Quantize models')
    print('  3. ✅ Deploy with FastAPI')
    print('  4. ✅ Monitor latency')
    print('  5. ✅ A/B testing')
    print('  6. ✅ Continuous retraining')
    print('  7. ✅ Explain with SHAP')
    print('\n✅ NEXUS AI Deep Learning Complete! 🎉')