In [None]:
# by Alex Mosok - 
# My Jupyter code for creating a detailded AI Stack Architecture to be used as a big national wide platform
# June 2025
# 1404/3/20
# AMK - SabaHoosh Homa

import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.patches as patches
import collections

# Architecture data
data = [
    ("Application","AI Assistants","Chatbots, RAG Pipelines, Copilot Apps","LLM-powered user interaction"),
    ("Application","Analytics Applications","BI Dashboards, Predictive Analytics, AI Decision Support","Operational insights via AI"),
    ("Application","Computer Vision","OCR, Surveillance AI, Image Recognition","Industrial & national security use cases"),
    ("Application","Speech & NLP","Text-to-Speech, Speech-to-Text, Whisper","Multimodal natural language applications"),
    ("Application","Digital Twins","Simulations, Predictive Maintenance","Industrial AI for manufacturing, logistics"),
    ("Application","Internal Tools","Streamlit, Dash, Flask","For internal AI labs, rapid prototyping"),
    ("Operations","DevOps/MLOps","Jenkins, GitLab CI, ArgoCD, Kubeflow Pipelines","Model lifecycle & deployment automation"),
    ("Operations","Backup & Restore","Snapshots, Replication, Disaster Recovery","Multi-region backups, hot/cold standby"),
    ("Operations","Multi-Tenancy","Kubernetes Namespaces, VPC isolation, Tenant resource pools","Data, compute, and security isolation"),
    ("Observability","Monitoring","Prometheus, Grafana, NVIDIA DCGM","System health and resource usage"),
    ("Observability","Logging","ELK Stack, OpenTelemetry","Centralized log management"),
    ("Observability","Tracing","Jaeger, Zipkin","Distributed request tracing"),
    ("Observability","Alerting","PagerDuty, OpsGenie","On-call alerting and incident management"),
    ("Observability","Drift Detection","Data Drift, Concept Drift Monitors","Model stability monitoring"),
    ("Security","Identity & Access Mgmt","SSO (OIDC, SAML), RBAC, ABAC, IAM","User and service authentication"),
    ("Security","Encryption","AES-256, TLS 1.3, Confidential Computing","Encryption at rest, in transit, in use"),
    ("Security","Secrets Management","HashiCorp Vault, AWS KMS, Azure KeyVault","Centralized secrets storage"),
    ("Security","Model Security","Watermarking, Fingerprinting, Prompt Injection Defense","Model integrity and protection"),
    ("Security","Governance & Compliance","GDPR, HIPAA, NIST 800-53, EU AI Act","Compliance frameworks"),
    ("Security","Policy Engine","OPA (Open Policy Agent)","Enforcement of security and compliance rules"),
    ("Security","Audit & Explainability","SHAP, LIME, Model Cards","Explainability, bias & fairness tracking"),
    ("Integration/API","API Gateway","Kong, Apigee, Istio Ingress","Public and internal API management"),
    ("Integration/API","Service Mesh","Istio, Linkerd","Microservice-to-microservice communication"),
    ("Integration/API","Protocols","gRPC, REST APIs","Real-time and batch model access"),
    ("Integration/API","Enterprise Integration","ERP, CRM, SCADA, IoT Integrations","Legacy and industrial system integration"),
    ("AI Platform","Model Training","PyTorch, TensorFlow, Hugging Face","Foundation Model Pretraining"),
    ("AI Platform","Experiment Tracking","MLflow, Weights & Biases, Comet","Track model experiments & hyperparameter tuning"),
    ("AI Platform","Model Registry","MLflow Registry, SageMaker Registry","Version control for models"),
    ("AI Platform","Inference Serving","NVIDIA Triton, TorchServe, TensorFlow Serving","Real-time model deployment"),
    ("AI Platform","Model Optimization","Quantization, Pruning, Knowledge Distillation","Optimize model size, speed, efficiency"),
    ("AI Platform","LLM Orchestration","LangChain, LlamaIndex","RAG pipelines, retrieval-augmented generation"),
    ("AI Platform","Multi-Modal AI","CLIP, YOLO, Whisper, Sora","Support for vision, speech, video, time-series"),
    ("Data","Storage Systems","Lustre, BeeGFS, IBM Spectrum Scale, High Performance NAS","Parallel file systems for AI workloads"),
    ("Data","Object Storage","S3, MinIO, Ceph","Blob storage for AI data lakes"),
    ("Data","Cold Storage","Glacier, Tape Libraries","Archival data storage"),
    ("Data","Data Lake","Delta Lake, Apache Iceberg, Apache Hudi","Scalable data lake formats for AI data"),
    ("Data","Data Mesh","Data Products, Decentralized Ownership","Federated data ownership model"),
    ("Data","Databases","PostgreSQL, MySQL, Oracle, MongoDB, Cassandra, Vector DB: Pinecone, Weaviate, Milvus","Transactional & analytical databases"),
    ("Data","Streaming Platforms","Kafka, Apache Pulsar","Event streaming & real-time data ingestion"),
    ("Data","ETL Pipelines","Apache Airflow, dbt, Prefect","Ingestion, transformation & pipeline orchestration"),
    ("Data","Data Catalog & Lineage","Amundsen, DataHub, Collibra","Metadata, lineage, discovery"),
    ("Compute","CPU Compute","AMD EPYC, Intel Xeon, ARM","High-density compute nodes for general purpose workloads"),
    ("Compute","GPU Compute","NVIDIA H100, A100, L40S, AMD MI300","Accelerated compute for AI training & inference"),
    ("Compute","AI ASICs","Google TPU, Custom ASIC","Optional domain-specific accelerators"),
    ("Compute","Cluster Management","Kubernetes, OpenShift, EKS Anywhere, Tanzu","Cluster orchestration & resource scheduling"),
    ("Compute","Distributed Training","Hugging Face Accelerate, Horovod, DeepSpeed, Megatron","Distributed model training at scale"),
    ("Infrastructure","Data Center Facilities","Private Data Centers","National Tier III/IV certified data centers"),
    ("Infrastructure","Public/Private Cloud","OpenStack, VMware, AWS Outposts, Azure Stack Hub, Google Anthos","For hybrid/multi-cloud setups"),
    ("Infrastructure","Network Fabric","SD-WAN, Fiber Backbone, 100/400 Gbps Ethernet","Low latency, high bandwidth nationwide connectivity"),
]

# Define layer order and colors
layer_order = [
    "Application", "Operations", "Observability", "Security",
    "Integration/API", "AI Platform", "Data", "Compute", "Infrastructure"
]

layer_colors = {
    "Application": "#FFD700",
    "Operations": "#FFA500",
    "Observability": "#FF8C00",
    "Security": "#FF6347",
    "Integration/API": "#40E0D0",
    "AI Platform": "#00CED1",
    "Data": "#1E90FF",
    "Compute": "#6A5ACD",
    "Infrastructure": "#2F4F4F",
}

# Group data by layer
layer_dict = collections.OrderedDict()
for layer in layer_order:
    layer_dict[layer] = []
for row in data:
    layer_dict[row[0]].append(row)

# Compute diagram height dynamically
total_rows = sum(len(v) for v in layer_dict.values())
height_per_row = 0.6
total_height = total_rows * height_per_row + 2

# Plotting
fig, ax = plt.subplots(figsize=(20, total_height/1.3))
y_pos = 1

# Draw each layer and its sublayers
for layer in reversed(layer_order):
    entries = layer_dict[layer]
    color = layer_colors[layer]
    ax.add_patch(patches.Rectangle((0, y_pos-0.3), 20, len(entries)*height_per_row, facecolor=color, edgecolor='black'))
    ax.text(0.3, y_pos + (len(entries)-1)*height_per_row/2, layer, fontsize=14, fontweight='bold', va='center', ha='left')
    
    for sublayer in entries:
        ax.text(3, y_pos, sublayer[1], fontsize=11, va='center')
        ax.text(8, y_pos, sublayer[2], fontsize=10, va='center')
        ax.text(14, y_pos, sublayer[3], fontsize=10, va='center')
        y_pos += height_per_row

# Beautify
ax.set_xlim(0, 20)
ax.set_ylim(0, total_height)
ax.axis("off")
plt.title("Nationwide AI Platform Architecture — Detailed Layered Diagram - by Alex Mosok (Jun 2025)", fontsize=18, pad=20)

# Save the diagram
detailed_diagram_path = "./diagram/Ai_platform_detailed_architecture_diagram_detail_0.png"
plt.savefig(detailed_diagram_path, bbox_inches="tight")
plt.show()
plt.close()

detailed_diagram_path