In [1]:
#!/usr/bin/env python3
"""
Fine-Grained Policy Generator for AccessBench Framework
Generates policies in the exact JSON format required by the simulator
"""

import json
import random
from typing import List, Dict

def generate_fine_grained_policies() -> List[Dict]:
    """Generate 40+ fine-grained policies in the exact JSON format."""
    
    policies = []
    
    # ==================== ACCESS POLICIES ====================
    access_policies = [
        {
            "id": "P1.1",
            "group": "Logon Time Restrictions",
            "category": "Access",
            "rule": "No login between 22:00 and 06:00 without multi-factor approval",
            "signal_channel": "s_logon_time",
            "severity": 0.7,
            "rationale": "Detects reconnaissance outside business hours"
        },
        {
            "id": "P1.2", 
            "group": "Logon Time Restrictions",
            "category": "Access",
            "rule": "No more than 2 consecutive after-hours logins within 7 days",
            "signal_channel": "s_logon_time",
            "severity": 0.6,
            "rationale": "Detects persistent off-hours activity patterns"
        },
        {
            "id": "P1.3",
            "group": "Workstation Access Patterns", 
            "category": "Access",
            "rule": "No access to more than 5 unique workstations per day without department head approval",
            "signal_channel": "s_logon_breadth",
            "severity": 0.8,
            "rationale": "Detects horizontal movement and reconnaissance"
        },
        {
            "id": "P1.4",
            "group": "Workstation Access Patterns",
            "category": "Access", 
            "rule": "No cross-department workstation access without business justification",
            "signal_channel": "s_logon_breadth",
            "severity": 0.75,
            "rationale": "Detects privilege boundary testing"
        }
    ]
    
    # ==================== PRIVILEGE POLICIES ====================
    privilege_policies = [
        {
            "id": "P2.1",
            "group": "Role Change Monitoring",
            "category": "Privilege", 
            "rule": "All role changes require manager approval and security review",
            "signal_channel": "s_role_change",
            "severity": 0.85,
            "rationale": "Detects privilege escalation attempts"
        },
        {
            "id": "P2.2",
            "group": "Role Change Monitoring",
            "category": "Privilege",
            "rule": "No more than 2 role changes per quarter without CISO approval",
            "signal_channel": "s_role_change",
            "severity": 0.8,
            "rationale": "Detects rapid privilege accumulation"
        },
        {
            "id": "P2.3",
            "group": "Sensitive Role Access",
            "category": "Privilege",
            "rule": "Access to financial systems requires additional background check",
            "signal_channel": "s_role_sensitivity",
            "severity": 0.9,
            "rationale": "Protects high-value financial data"
        }
    ]
    
    # ==================== EXFILTRATION POLICIES ====================
    exfiltration_policies = [
        {
            "id": "P3.1",
            "group": "USB Device Controls",
            "category": "Exfiltration",
            "rule": "No USB storage devices allowed without encryption and registration",
            "signal_channel": "s_device_vol",
            "severity": 0.95,
            "rationale": "Prevents unencrypted data transfer"
        },
        {
            "id": "P3.2",
            "group": "USB Device Controls", 
            "category": "Exfiltration",
            "rule": "No file transfers >100MB to removable media without approval",
            "signal_channel": "s_device_vol",
            "severity": 0.85,
            "rationale": "Detects large-scale data exfiltration"
        },
        {
            "id": "P3.3",
            "group": "Email Transfer Limits",
            "category": "Exfiltration",
            "rule": "No email attachments >5MB to external domains without DLP scan",
            "signal_channel": "s_email_vol",
            "severity": 0.8,
            "rationale": "Detects data exfiltration via email"
        },
        {
            "id": "P3.4",
            "group": "Cloud Upload Restrictions",
            "category": "Exfiltration",
            "rule": "No uploads to personal cloud storage services (Dropbox, Google Drive, etc.)",
            "signal_channel": "s_http_exfil",
            "severity": 0.9,
            "rationale": "Prevents data leakage to unauthorized cloud services"
        }
    ]
    
    # ==================== RECONNAISSANCE POLICIES ====================
    reconnaissance_policies = [
        {
            "id": "P4.1",
            "group": "Sensitive File Access",
            "category": "Reconnaissance", 
            "rule": "No access to /etc/shadow, SAM databases, or password files",
            "signal_channel": "s_file_sensitive",
            "severity": 0.96,
            "rationale": "Prevents credential harvesting attempts"
        },
        {
            "id": "P4.2",
            "group": "Network Scanning Detection",
            "category": "Reconnaissance",
            "rule": "Port scanning and network enumeration activities are prohibited",
            "signal_channel": "s_net_scan",
            "severity": 0.94,
            "rationale": "Detects network reconnaissance and mapping"
        },
        {
            "id": "P4.3",
            "group": "Database Query Monitoring",
            "category": "Reconnaissance", 
            "rule": "Unusual database query patterns trigger immediate review",
            "signal_channel": "s_db_query",
            "severity": 0.82,
            "rationale": "Detects data mining and reconnaissance"
        }
    ]
    
    # ==================== WORKFLOW POLICIES ====================
    workflow_policies = [
        {
            "id": "P5.1",
            "group": "Job Search Monitoring",
            "category": "Workflow",
            "rule": "Excessive access to job search sites during work hours triggers review",
            "signal_channel": "s_http_intent",
            "severity": 0.6,
            "rationale": "Detects potential departure intent"
        },
        {
            "id": "P5.2",
            "group": "Browsing Pattern Analysis", 
            "category": "Workflow",
            "rule": "Significant changes in browsing entropy require behavior analysis",
            "signal_channel": "s_browsing_entropy",
            "severity": 0.55,
            "rationale": "Detects reconnaissance and research activities"
        }
    ]
    
    # ==================== ANOMALY POLICIES ====================
    anomaly_policies = [
        {
            "id": "P6.1",
            "group": "Behavioral Baseline Monitoring",
            "category": "Anomaly",
            "rule": "Significant deviations from established behavioral baselines require investigation",
            "signal_channel": "s_behavior_anomaly",
            "severity": 0.75,
            "rationale": "Detects subtle behavioral changes indicating risk"
        },
        {
            "id": "P6.2",
            "group": "Velocity-Based Detection",
            "category": "Anomaly", 
            "rule": "Rapid acceleration in privilege changes or data access triggers review",
            "signal_channel": "s_velocity_anomaly",
            "severity": 0.8,
            "rationale": "Detects preparation phases of insider attacks"
        }
    ]
    
    # ==================== LIFECYCLE POLICIES ====================
    lifecycle_policies = [
        {
            "id": "P7.1",
            "group": "Termination Procedures",
            "category": "Lifecycle",
            "rule": "All access must be revoked within 24 hours of termination notification",
            "signal_channel": "s_term_access",
            "severity": 0.95,
            "rationale": "Prevents post-termination access and retaliation"
        },
        {
            "id": "P7.2",
            "group": "Access Review Cycles",
            "category": "Lifecycle",
            "rule": "All user access must be reviewed semi-annually by data owners",
            "signal_channel": "s_access_review",
            "severity": 0.75,
            "rationale": "Maintains principle of least privilege"
        }
    ]
    
    # Combine all policies
    all_policies = (access_policies + privilege_policies + exfiltration_policies + 
                   reconnaissance_policies + workflow_policies + anomaly_policies + 
                   lifecycle_policies)
    
    return all_policies

def generate_large_policy_dataset(num_policies: int = 300) -> List[Dict]:
    """Generate a larger dataset of policies for comprehensive testing."""
    
    base_policies = generate_fine_grained_policies()
    
    # If we need more policies, generate variations
    if num_policies > len(base_policies):
        additional_needed = num_policies - len(base_policies)
        
        # Template for generating additional policies
        policy_templates = [
            {
                "group": "Logon Time Restrictions",
                "category": "Access",
                "signal_channel": "s_logon_time",
                "severity_range": (0.5, 0.8)
            },
            {
                "group": "Workstation Access Patterns", 
                "category": "Access",
                "signal_channel": "s_logon_breadth", 
                "severity_range": (0.6, 0.9)
            },
            {
                "group": "Role Change Monitoring",
                "category": "Privilege",
                "signal_channel": "s_role_change",
                "severity_range": (0.7, 0.95)
            },
            {
                "group": "USB Device Controls",
                "category": "Exfiltration", 
                "signal_channel": "s_device_vol",
                "severity_range": (0.8, 0.98)
            },
            {
                "group": "Email Transfer Limits",
                "category": "Exfiltration",
                "signal_channel": "s_email_vol",
                "severity_range": (0.7, 0.9)
            }
        ]
        
        rule_variations = {
            "s_logon_time": [
                "No login during company holidays without VP approval",
                "Consecutive failed logins > {n} within {m} minutes triggers account lock",
                "Login from new geographic region requires additional verification"
            ],
            "s_logon_breadth": [
                "Access from more than {n} different networks in {h} hours requires review",
                "Concurrent sessions from multiple locations prohibited",
                "VPN access patterns must match physical location history"
            ],
            "s_role_change": [
                "Temporary role assignments expire after {d} days without extension",
                "Role inheritance across projects requires security assessment", 
                "Emergency role grants must be reviewed within {h} hours"
            ],
            "s_device_vol": [
                "External device registration required before first use",
                "Data transfer to unregistered devices blocked automatically",
                "Encrypted device usage logs mandatory for compliance"
            ],
            "s_email_vol": [
                "External email with attachments requires keyword scanning",
                "Bulk email to external domains triggers DLP review",
                "Email forwarding to personal accounts prohibited"
            ]
        }
        
        rationale_options = [
            "Prevents unauthorized access attempts",
            "Detects potential security policy violations", 
            "Ensures compliance with data protection regulations",
            "Monitors for insider threat indicators",
            "Protects sensitive organizational assets",
            "Maintains audit trail for security incidents"
        ]
        
        # Generate additional policies
        for i in range(additional_needed):
            template = random.choice(policy_templates)
            rule_template = random.choice(rule_variations[template["signal_channel"]])
            
            # Fill in template parameters
            rule = rule_template.format(
                n=random.randint(3, 10),
                m=random.randint(15, 60),
                h=random.randint(1, 48),
                d=random.randint(1, 30)
            )
            
            new_policy = {
                "id": f"P{len(base_policies) + i + 1:03d}",
                "group": template["group"],
                "category": template["category"],
                "rule": rule,
                "signal_channel": template["signal_channel"],
                "severity": round(random.uniform(*template["severity_range"]), 2),
                "rationale": random.choice(rationale_options)
            }
            
            base_policies.append(new_policy)
    
    return base_policies

def save_policies_to_file(policies: List[Dict], filename: str = "policies.json"):
    """Save policies to JSON file."""
    with open(filename, 'w') as f:
        json.dump(policies, f, indent=2)
    print(f"‚úì Generated {len(policies)} policies in {filename}")

def print_policy_statistics(policies: List[Dict]):
    """Print statistics about the generated policies."""
    categories = {}
    groups = {}
    channels = {}
    severities = []
    
    for policy in policies:
        # Count by category
        cat = policy["category"]
        categories[cat] = categories.get(cat, 0) + 1
        
        # Count by group
        group = policy["group"]
        groups[group] = groups.get(group, 0) + 1
        
        # Count by signal channel
        channel = policy["signal_channel"]
        channels[channel] = channels.get(channel, 0) + 1
        
        # Collect severities
        severities.append(policy["severity"])
    
    print("\n" + "="*60)
    print("POLICY DATASET STATISTICS")
    print("="*60)
    
    print(f"\nüìä Total Policies: {len(policies)}")
    
    print(f"\nüìÇ By Category:")
    for cat, count in sorted(categories.items(), key=lambda x: x[1], reverse=True):
        print(f"   ‚Ä¢ {cat}: {count} policies")
    
    print(f"\nüè∑Ô∏è  By Signal Channel:")
    for channel, count in sorted(channels.items(), key=lambda x: x[1], reverse=True):
        print(f"   ‚Ä¢ {channel}: {count} policies")
    
    print(f"\n‚öñÔ∏è  Severity Statistics:")
    print(f"   ‚Ä¢ Average: {np.mean(severities):.3f}")
    print(f"   ‚Ä¢ Minimum: {min(severities):.3f}")
    print(f"   ‚Ä¢ Maximum: {max(severities):.3f}")
    print(f"   ‚Ä¢ Std Dev: {np.std(severities):.3f}")

def main():
    """Main function to generate and save policies."""
    print("="*60)
    print("FINE-GRAINED POLICY GENERATOR")
    print("For Bayesian-Temporal Correlated Equilibrium Framework")
    print("="*60)
    
    # Generate policies
    print("\n‚ñ∂ Generating fine-grained policies...")
    policies = generate_large_policy_dataset(300)
    
    # Save to file
    save_policies_to_file(policies, "policies_300_v2.json")
    
    # Print statistics
    print_policy_statistics(policies)
    
    # Show sample of policies
    print(f"\nüîç Sample Policies (first 5):")
    print("="*60)
    for i, policy in enumerate(policies[:5]):
        print(f"\n{i+1}. {policy['id']} - {policy['group']}")
        print(f"   Category: {policy['category']}")
        print(f"   Rule: {policy['rule']}")
        print(f"   Signal: {policy['signal_channel']}")
        print(f"   Severity: {policy['severity']}")
        print(f"   Rationale: {policy['rationale']}")
    
    print(f"\n‚úì Policy generation complete!")
    print(f"‚úì File ready for simulator: policies.json")

if __name__ == "__main__":
    # Import numpy for statistics (only needed here)
    import numpy as np
    main()

FINE-GRAINED POLICY GENERATOR
For Bayesian-Temporal Correlated Equilibrium Framework

‚ñ∂ Generating fine-grained policies...
‚úì Generated 300 policies in policies_300_v2.json

POLICY DATASET STATISTICS

üìä Total Policies: 300

üìÇ By Category:
   ‚Ä¢ Access: 122 policies
   ‚Ä¢ Exfiltration: 117 policies
   ‚Ä¢ Privilege: 52 policies
   ‚Ä¢ Reconnaissance: 3 policies
   ‚Ä¢ Workflow: 2 policies
   ‚Ä¢ Anomaly: 2 policies
   ‚Ä¢ Lifecycle: 2 policies

üè∑Ô∏è  By Signal Channel:
   ‚Ä¢ s_logon_time: 68 policies
   ‚Ä¢ s_email_vol: 61 policies
   ‚Ä¢ s_device_vol: 55 policies
   ‚Ä¢ s_logon_breadth: 54 policies
   ‚Ä¢ s_role_change: 51 policies
   ‚Ä¢ s_role_sensitivity: 1 policies
   ‚Ä¢ s_http_exfil: 1 policies
   ‚Ä¢ s_file_sensitive: 1 policies
   ‚Ä¢ s_net_scan: 1 policies
   ‚Ä¢ s_db_query: 1 policies
   ‚Ä¢ s_http_intent: 1 policies
   ‚Ä¢ s_browsing_entropy: 1 policies
   ‚Ä¢ s_behavior_anomaly: 1 policies
   ‚Ä¢ s_velocity_anomaly: 1 policies
   ‚Ä¢ s_term_access: 1 policie