# Volume 1, Chapter 12: Ethics and Responsible AI

**Deploy AI Safely in Network Operations**

[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/eduardd76/AI_for_networking_and_security_engineers/blob/master/Volume-1-Foundations/Colab-Notebooks/Vol1_Ch12_Ethics.ipynb)

---

**What you'll learn:**
- üîí Sanitize sensitive data
- üìù Audit AI decisions
- ‚úã Human-in-the-loop approvals
- ‚öñÔ∏è Detect and mitigate bias

In [None]:
!pip install -q anthropic

import os
from getpass import getpass
from datetime import datetime
import json
import re

try:
    from google.colab import userdata
    os.environ['ANTHROPIC_API_KEY'] = userdata.get('ANTHROPIC_API_KEY')
except:
    if 'ANTHROPIC_API_KEY' not in os.environ:
        os.environ['ANTHROPIC_API_KEY'] = getpass('Anthropic API key: ')

from anthropic import Anthropic
client = Anthropic()
print("‚úì Ready!")

---
## üîí Example 1: Sanitize Sensitive Data

In [None]:
class DataSanitizer:
    """Remove sensitive info before sending to AI."""
    
    PATTERNS = {
        "password": r'password\s+\S+',
        "secret": r'secret\s+\d+\s+\S+',
        "snmp_community": r'snmp-server community \S+',
        "api_key": r'(api[_-]?key|token)[=:\s]+[\w-]+',
        "ip_address": r'\b(?:\d{1,3}\.){3}\d{1,3}\b',
    }
    
    def __init__(self, mask_ips=False):
        self.mask_ips = mask_ips
        self.redactions = []
    
    def sanitize(self, text):
        """Remove sensitive data from text."""
        self.redactions = []
        sanitized = text
        
        for name, pattern in self.PATTERNS.items():
            if name == "ip_address" and not self.mask_ips:
                continue
            
            matches = re.findall(pattern, sanitized, re.IGNORECASE)
            for match in matches:
                self.redactions.append({"type": name, "value": match})
                sanitized = sanitized.replace(match, f"[REDACTED_{name.upper()}]")
        
        return sanitized
    
    def report(self):
        """Report what was redacted."""
        return self.redactions

# Test
config = """
hostname ROUTER-01
enable secret 5 $1$xyz$hashedvalue
username admin password cisco123
snmp-server community public RO
snmp-server community s3cr3tRW RW
interface GigabitEthernet0/0
 ip address 192.168.1.1 255.255.255.0
"""

sanitizer = DataSanitizer()
safe_config = sanitizer.sanitize(config)

print("üîí DATA SANITIZATION")
print("=" * 60)
print("\nüìã Original config has sensitive data")
print("\nüìã Sanitized config (safe to send to AI):")
print(safe_config)
print(f"\n‚ö†Ô∏è Redacted {len(sanitizer.report())} items:")
for r in sanitizer.report():
    print(f"   [{r['type']}] {r['value'][:20]}...")

---
## üìù Example 2: Audit Logging

In [None]:
class AuditLogger:
    """Log all AI decisions for compliance."""
    
    def __init__(self):
        self.logs = []
    
    def log(self, action, input_data, output, user="system", metadata=None):
        """Log an AI action."""
        entry = {
            "timestamp": datetime.now().isoformat(),
            "action": action,
            "user": user,
            "input_summary": input_data[:100] + "..." if len(input_data) > 100 else input_data,
            "output_summary": output[:100] + "..." if len(output) > 100 else output,
            "metadata": metadata or {}
        }
        self.logs.append(entry)
        return entry
    
    def get_logs(self, action=None, user=None):
        """Query logs."""
        results = self.logs
        if action:
            results = [l for l in results if l["action"] == action]
        if user:
            results = [l for l in results if l["user"] == user]
        return results
    
    def export(self):
        """Export logs as JSON."""
        return json.dumps(self.logs, indent=2)

# Use with AI calls
audit = AuditLogger()

# Make an AI call and log it
prompt = "Analyze this config for security issues: interface Gi0/0"
response = client.messages.create(
    model="claude-haiku-4-5-20251001",
    max_tokens=200,
    temperature=0,
    messages=[{"role": "user", "content": prompt}]
)

audit.log(
    action="config_analysis",
    input_data=prompt,
    output=response.content[0].text,
    user="network_admin",
    metadata={"model": "claude-haiku-4-5-20251001", "tokens": response.usage.input_tokens}
)

print("üìù AUDIT LOGGING")
print("=" * 60)
print("\nLogged entry:")
print(json.dumps(audit.logs[0], indent=2))

---
## ‚úã Example 3: Human-in-the-Loop Approval

In [None]:
class ApprovalGate:
    """Require human approval for risky actions."""
    
    RISKY_PATTERNS = [
        "shutdown",
        "no interface",
        "write erase",
        "reload",
        "delete",
        "format"
    ]
    
    def __init__(self):
        self.pending = []
    
    def assess_risk(self, commands):
        """Assess risk level of commands."""
        risks = []
        for cmd in commands if isinstance(commands, list) else [commands]:
            for pattern in self.RISKY_PATTERNS:
                if pattern in cmd.lower():
                    risks.append({"command": cmd, "reason": f"Contains '{pattern}'"})
        
        if risks:
            return {"level": "HIGH", "risks": risks, "approval_required": True}
        return {"level": "LOW", "risks": [], "approval_required": False}
    
    def request_approval(self, action, commands, justification):
        """Create approval request."""
        assessment = self.assess_risk(commands)
        
        if not assessment["approval_required"]:
            return {"status": "AUTO_APPROVED", "reason": "Low risk"}
        
        request = {
            "id": f"REQ-{len(self.pending)+1:04d}",
            "action": action,
            "commands": commands,
            "justification": justification,
            "risk_assessment": assessment,
            "status": "PENDING",
            "created": datetime.now().isoformat()
        }
        self.pending.append(request)
        return {"status": "PENDING_APPROVAL", "request_id": request["id"]}
    
    def approve(self, request_id, approver):
        """Approve a pending request."""
        for req in self.pending:
            if req["id"] == request_id:
                req["status"] = "APPROVED"
                req["approver"] = approver
                req["approved_at"] = datetime.now().isoformat()
                return True
        return False

# Test
gate = ApprovalGate()

# Safe command - auto-approved
result1 = gate.request_approval(
    action="show_config",
    commands="show running-config",
    justification="Routine audit"
)

# Risky command - needs approval
result2 = gate.request_approval(
    action="interface_shutdown",
    commands=["interface Gi0/1", "shutdown"],
    justification="Decommissioning old link"
)

print("‚úã HUMAN-IN-THE-LOOP APPROVAL")
print("=" * 60)
print(f"\nSafe command: {result1}")
print(f"\nRisky command: {result2}")

# Simulate approval
if result2.get("request_id"):
    gate.approve(result2["request_id"], "senior_engineer")
    print(f"\n‚úÖ Request {result2['request_id']} approved by senior_engineer")

---
## ‚öñÔ∏è Example 4: Bias Detection

In [None]:
def check_vendor_bias(prompt):
    """Check if AI response favors specific vendors."""
    
    response = client.messages.create(
        model="claude-sonnet-4-20250514",
        max_tokens=500,
        temperature=0,
        messages=[{"role": "user", "content": prompt}]
    )
    
    text = response.content[0].text.lower()
    
    vendors = {
        "cisco": len(re.findall(r'\bcisco\b', text)),
        "juniper": len(re.findall(r'\bjuniper\b', text)),
        "arista": len(re.findall(r'\barista\b', text)),
        "palo alto": len(re.findall(r'\bpalo alto\b', text)),
    }
    
    total = sum(vendors.values())
    if total == 0:
        return {"bias": "NONE", "vendors": vendors, "response": response.content[0].text[:200]}
    
    dominant = max(vendors, key=vendors.get)
    dominant_pct = vendors[dominant] / total * 100
    
    return {
        "bias": "HIGH" if dominant_pct > 70 else "MODERATE" if dominant_pct > 50 else "LOW",
        "dominant_vendor": dominant,
        "percentage": dominant_pct,
        "vendors": vendors,
        "response": response.content[0].text[:300]
    }

# Test with potentially biased prompt
result = check_vendor_bias("What's the best way to configure enterprise routing?")

print("‚öñÔ∏è BIAS DETECTION")
print("=" * 60)
print(f"\nBias level: {result['bias']}")
if 'dominant_vendor' in result:
    print(f"Dominant vendor: {result['dominant_vendor']} ({result['percentage']:.0f}%)")
print(f"\nVendor mentions: {result['vendors']}")
print(f"\nResponse preview: {result['response']}...")

---
## üõ°Ô∏è Example 5: Complete Responsible AI System

In [None]:
class ResponsibleAISystem:
    """Complete responsible AI implementation."""
    
    def __init__(self):
        self.sanitizer = DataSanitizer()
        self.audit = AuditLogger()
        self.approval = ApprovalGate()
    
    def analyze(self, config, user, action_type="analysis"):
        """Analyze config with full safety checks."""
        
        # Step 1: Sanitize
        safe_config = self.sanitizer.sanitize(config)
        
        # Step 2: Call AI
        response = client.messages.create(
            model="claude-haiku-4-5-20251001",
            max_tokens=300,
            temperature=0,
            messages=[{
                "role": "user",
                "content": f"Analyze for security issues:\n{safe_config}"
            }]
        )
        
        output = response.content[0].text
        
        # Step 3: Audit
        self.audit.log(
            action=action_type,
            input_data=safe_config,
            output=output,
            user=user,
            metadata={"redactions": len(self.sanitizer.report())}
        )
        
        return {
            "analysis": output,
            "redacted_items": len(self.sanitizer.report()),
            "audit_logged": True
        }
    
    def apply_change(self, commands, user, justification):
        """Apply change with approval if needed."""
        
        # Step 1: Check if approval needed
        result = self.approval.request_approval(
            action="config_change",
            commands=commands,
            justification=justification
        )
        
        # Step 2: Audit
        self.audit.log(
            action="change_request",
            input_data=str(commands),
            output=str(result),
            user=user
        )
        
        return result

# Demo
system = ResponsibleAISystem()

config = """
hostname R1
enable password test123
interface Gi0/0
 ip address 10.1.1.1 255.255.255.0
"""

print("üõ°Ô∏è COMPLETE RESPONSIBLE AI SYSTEM")
print("=" * 60)

# Analyze
result = system.analyze(config, user="admin")
print(f"\n‚úÖ Analysis complete")
print(f"   Redacted items: {result['redacted_items']}")
print(f"   Audit logged: {result['audit_logged']}")
print(f"\nüìã Analysis result:")
print(result['analysis'][:300])

# Try to apply risky change
change_result = system.apply_change(
    commands=["interface Gi0/0", "shutdown"],
    user="admin",
    justification="Maintenance window"
)
print(f"\n\nüîí Change request: {change_result}")

---
## üéØ Key Takeaways

| Practice | Why It Matters |
|----------|----------------|
| Sanitize data | Protect credentials from exposure |
| Audit logging | Compliance and troubleshooting |
| Approval gates | Prevent unintended damage |
| Bias detection | Ensure fair recommendations |

**The responsible AI checklist:**
1. ‚úÖ Never send raw credentials to AI APIs
2. ‚úÖ Log all AI decisions with context
3. ‚úÖ Require approval for destructive actions
4. ‚úÖ Monitor for vendor/solution bias
5. ‚úÖ Keep humans in the loop for production changes

---

## üéâ Congratulations!

You've completed **Volume 1: Foundations**!

You now understand:
- How LLMs work (tokens, context, models)
- How to call APIs effectively
- How to write good prompts
- How to get structured outputs
- How to manage costs
- How to deploy responsibly

**Next:** [Volume 2: Practical Applications](../../../Volume-2-Practical-Applications/) - Build RAG systems, agents, and automation!