# Amazon Bedrock Guardrails
Explore content filtering, PII redaction, topic blocking, and word filters

In [None]:
import boto3
import json

REGION = 'us-east-1'
bedrock = boto3.client('bedrock', region_name=REGION)
bedrock_runtime = boto3.client('bedrock-runtime', region_name=REGION)

print(f"Region: {REGION}")

## 1. Create Basic Guardrail with Content Filters

In [None]:
response = bedrock.create_guardrail(
    name='content-filter-guardrail',
    description='Filter harmful content',
    contentPolicyConfig={
        'filtersConfig': [
            {'type': 'HATE', 'inputStrength': 'HIGH', 'outputStrength': 'HIGH'},
            {'type': 'VIOLENCE', 'inputStrength': 'HIGH', 'outputStrength': 'HIGH'},
            {'type': 'SEXUAL', 'inputStrength': 'HIGH', 'outputStrength': 'HIGH'},
            {'type': 'MISCONDUCT', 'inputStrength': 'MEDIUM', 'outputStrength': 'MEDIUM'}
        ]
    },
    blockedInputMessaging='Your input was blocked due to content policy.',
    blockedOutputsMessaging='The response was blocked due to content policy.'
)

GUARDRAIL_ID = response['guardrailId']
GUARDRAIL_VERSION = response['version']

print(f"✓ Created guardrail: {GUARDRAIL_ID}")
print(f"  Version: {GUARDRAIL_VERSION}")

## 2. Test Content Filtering

In [None]:
def test_with_guardrail(prompt, guardrail_id, guardrail_version):
    try:
        response = bedrock_runtime.converse(
            modelId='us.amazon.nova-lite-v1:0',
            messages=[{'role': 'user', 'content': [{'text': prompt}]}],
            guardrailConfig={
                'guardrailIdentifier': guardrail_id,
                'guardrailVersion': guardrail_version
            }
        )
        
        if response['stopReason'] == 'guardrail_intervened':
            return {'blocked': True, 'reason': 'Guardrail intervention'}
        
        return {'blocked': False, 'response': response['output']['message']['content'][0]['text']}
    except Exception as e:
        return {'blocked': True, 'error': str(e)}

# Test safe prompt
result = test_with_guardrail("What is machine learning?", GUARDRAIL_ID, GUARDRAIL_VERSION)
print(f"Safe prompt: {result}\n")

# Test potentially harmful prompt
result = test_with_guardrail("How to hack a computer?", GUARDRAIL_ID, GUARDRAIL_VERSION)
print(f"Harmful prompt: {result}")

## 3. Create Guardrail with PII Redaction

In [None]:
response = bedrock.create_guardrail(
    name='pii-redaction-guardrail',
    description='Redact sensitive PII',
    sensitiveInformationPolicyConfig={
        'piiEntitiesConfig': [
            {'type': 'EMAIL', 'action': 'BLOCK'},
            {'type': 'PHONE', 'action': 'BLOCK'},
            {'type': 'NAME', 'action': 'ANONYMIZE'},
            {'type': 'ADDRESS', 'action': 'ANONYMIZE'},
            {'type': 'CREDIT_DEBIT_CARD_NUMBER', 'action': 'BLOCK'},
            {'type': 'US_SOCIAL_SECURITY_NUMBER', 'action': 'BLOCK'}
        ]
    },
    blockedInputMessaging='Input contains sensitive information.',
    blockedOutputsMessaging='Output contains sensitive information.'
)

PII_GUARDRAIL_ID = response['guardrailId']
PII_GUARDRAIL_VERSION = response['version']

print(f"✓ Created PII guardrail: {PII_GUARDRAIL_ID}")

## 4. Test PII Redaction

In [None]:
# Test with PII
prompts_with_pii = [
    "My email is john.doe@example.com and phone is 555-1234",
    "John Smith lives at 123 Main Street",
    "My credit card is 4532-1234-5678-9010"
]

for prompt in prompts_with_pii:
    result = test_with_guardrail(prompt, PII_GUARDRAIL_ID, PII_GUARDRAIL_VERSION)
    print(f"Input: {prompt}")
    print(f"Result: {result}\n")

## 5. Create Guardrail with Topic Blocking

In [None]:
response = bedrock.create_guardrail(
    name='topic-blocking-guardrail',
    description='Block specific topics',
    topicPolicyConfig={
        'topicsConfig': [
            {
                'name': 'Financial Advice',
                'definition': 'Investment advice, stock tips, financial planning',
                'examples': [
                    'Should I invest in stocks?',
                    'What stocks should I buy?',
                    'Give me investment advice'
                ],
                'type': 'DENY'
            },
            {
                'name': 'Medical Advice',
                'definition': 'Medical diagnosis, treatment recommendations, health advice',
                'examples': [
                    'What medication should I take?',
                    'How do I treat this condition?',
                    'Diagnose my symptoms'
                ],
                'type': 'DENY'
            }
        ]
    },
    blockedInputMessaging='This topic is not allowed.',
    blockedOutputsMessaging='Cannot provide information on this topic.'
)

TOPIC_GUARDRAIL_ID = response['guardrailId']
TOPIC_GUARDRAIL_VERSION = response['version']

print(f"✓ Created topic guardrail: {TOPIC_GUARDRAIL_ID}")

## 6. Test Topic Blocking

In [None]:
test_prompts = [
    "What is machine learning?",  # Allowed
    "Should I invest in Bitcoin?",  # Blocked - financial
    "What medication should I take for headache?",  # Blocked - medical
    "Explain cloud computing"  # Allowed
]

for prompt in test_prompts:
    result = test_with_guardrail(prompt, TOPIC_GUARDRAIL_ID, TOPIC_GUARDRAIL_VERSION)
    print(f"Prompt: {prompt}")
    print(f"Result: {result}\n")

## 7. Create Guardrail with Word Filters

In [None]:
response = bedrock.create_guardrail(
    name='word-filter-guardrail',
    description='Block specific words and phrases',
    wordPolicyConfig={
        'wordsConfig': [
            {'text': 'competitor'},
            {'text': 'confidential'},
            {'text': 'internal only'}
        ],
        'managedWordListsConfig': [
            {'type': 'PROFANITY'}
        ]
    },
    blockedInputMessaging='Input contains blocked words.',
    blockedOutputsMessaging='Output contains blocked words.'
)

WORD_GUARDRAIL_ID = response['guardrailId']
WORD_GUARDRAIL_VERSION = response['version']

print(f"✓ Created word filter guardrail: {WORD_GUARDRAIL_ID}")

## 8. Test Word Filters

In [None]:
test_prompts = [
    "Tell me about AWS services",  # Allowed
    "What about our competitor's product?",  # Blocked
    "This is confidential information",  # Blocked
    "Explain cloud architecture"  # Allowed
]

for prompt in test_prompts:
    result = test_with_guardrail(prompt, WORD_GUARDRAIL_ID, WORD_GUARDRAIL_VERSION)
    print(f"Prompt: {prompt}")
    print(f"Result: {result}\n")

## 9. Create Comprehensive Guardrail

In [None]:
response = bedrock.create_guardrail(
    name='comprehensive-guardrail',
    description='All protections enabled',
    contentPolicyConfig={
        'filtersConfig': [
            {'type': 'HATE', 'inputStrength': 'HIGH', 'outputStrength': 'HIGH'},
            {'type': 'VIOLENCE', 'inputStrength': 'HIGH', 'outputStrength': 'HIGH'},
            {'type': 'SEXUAL', 'inputStrength': 'HIGH', 'outputStrength': 'HIGH'},
            {'type': 'MISCONDUCT', 'inputStrength': 'MEDIUM', 'outputStrength': 'MEDIUM'}
        ]
    },
    sensitiveInformationPolicyConfig={
        'piiEntitiesConfig': [
            {'type': 'EMAIL', 'action': 'BLOCK'},
            {'type': 'PHONE', 'action': 'BLOCK'},
            {'type': 'NAME', 'action': 'ANONYMIZE'},
            {'type': 'CREDIT_DEBIT_CARD_NUMBER', 'action': 'BLOCK'}
        ]
    },
    topicPolicyConfig={
        'topicsConfig': [
            {
                'name': 'Financial Advice',
                'definition': 'Investment and financial planning advice',
                'examples': ['Should I invest?', 'Stock recommendations'],
                'type': 'DENY'
            }
        ]
    },
    wordPolicyConfig={
        'wordsConfig': [{'text': 'confidential'}],
        'managedWordListsConfig': [{'type': 'PROFANITY'}]
    },
    blockedInputMessaging='Your input was blocked by our safety policies.',
    blockedOutputsMessaging='The response was blocked by our safety policies.'
)

COMP_GUARDRAIL_ID = response['guardrailId']
COMP_GUARDRAIL_VERSION = response['version']

print(f"✓ Created comprehensive guardrail: {COMP_GUARDRAIL_ID}")

## 10. Use Guardrail with Knowledge Base

In [None]:
# Load KB config if available
try:
    with open('kb_config.json', 'r') as f:
        kb_config = json.load(f)
    
    KB_ID = kb_config['knowledge_base_id']
    
    bedrock_agent_runtime = boto3.client('bedrock-agent-runtime', region_name=REGION)
    
    response = bedrock_agent_runtime.retrieve_and_generate(
        input={'text': 'What equipment do we have?'},
        retrieveAndGenerateConfiguration={
            'type': 'KNOWLEDGE_BASE',
            'knowledgeBaseConfiguration': {
                'knowledgeBaseId': KB_ID,
                'modelArn': f'arn:aws:bedrock:{REGION}::foundation-model/amazon.nova-pro-v1:0',
                'generationConfiguration': {
                    'guardrailConfiguration': {
                        'guardrailId': COMP_GUARDRAIL_ID,
                        'guardrailVersion': COMP_GUARDRAIL_VERSION
                    }
                }
            }
        }
    )
    
    print("✓ KB query with guardrail:")
    print(response['output']['text'])
except FileNotFoundError:
    print("KB config not found - run knowledge base notebook first")

## 11. List and Manage Guardrails

In [None]:
# List all guardrails
response = bedrock.list_guardrails()

print("Your Guardrails:")
for guardrail in response['guardrails']:
    print(f"  - {guardrail['name']} ({guardrail['id']}) v{guardrail['version']}")

# Get guardrail details
response = bedrock.get_guardrail(
    guardrailIdentifier=COMP_GUARDRAIL_ID,
    guardrailVersion=COMP_GUARDRAIL_VERSION
)

print(f"\nComprehensive Guardrail Details:")
print(json.dumps(response, indent=2, default=str))

## 12. Save Guardrail Configuration

In [None]:
guardrail_config = {
    'content_filter': {'id': GUARDRAIL_ID, 'version': GUARDRAIL_VERSION},
    'pii_redaction': {'id': PII_GUARDRAIL_ID, 'version': PII_GUARDRAIL_VERSION},
    'topic_blocking': {'id': TOPIC_GUARDRAIL_ID, 'version': TOPIC_GUARDRAIL_VERSION},
    'word_filter': {'id': WORD_GUARDRAIL_ID, 'version': WORD_GUARDRAIL_VERSION},
    'comprehensive': {'id': COMP_GUARDRAIL_ID, 'version': COMP_GUARDRAIL_VERSION}
}

with open('guardrail_config.json', 'w') as f:
    json.dump(guardrail_config, f, indent=2)

print("✓ Guardrail config saved to guardrail_config.json")
print(json.dumps(guardrail_config, indent=2))

## 13. Cleanup (Optional)

In [None]:
# Uncomment to delete guardrails
# for guardrail_id in [GUARDRAIL_ID, PII_GUARDRAIL_ID, TOPIC_GUARDRAIL_ID, WORD_GUARDRAIL_ID, COMP_GUARDRAIL_ID]:
#     try:
#         bedrock.delete_guardrail(guardrailIdentifier=guardrail_id)
#         print(f"✓ Deleted guardrail: {guardrail_id}")
#     except Exception as e:
#         print(f"Error deleting {guardrail_id}: {e}")

print("Cleanup skipped - uncomment to delete guardrails")

## Summary

### Guardrail Types:

1. **Content Filters** - Block hate, violence, sexual, misconduct
2. **PII Redaction** - Block/anonymize sensitive information
3. **Topic Blocking** - Deny specific topics (financial, medical)
4. **Word Filters** - Block specific words and profanity
5. **Comprehensive** - All protections combined

### Filter Strengths:

- **NONE** - No filtering
- **LOW** - Minimal filtering
- **MEDIUM** - Moderate filtering
- **HIGH** - Strict filtering

### PII Actions:

- **BLOCK** - Reject content with PII
- **ANONYMIZE** - Replace PII with placeholders

### Use Cases:

- **Customer Service** - Block harmful content, redact PII
- **Healthcare** - Strict PII protection, medical topic control
- **Financial** - Block financial advice, protect sensitive data
- **Enterprise** - Prevent confidential information leakage

### Integration:

- Direct model invocation (Converse API)
- Knowledge Base queries
- Agent responses
- Streaming responses

### Best Practices:

1. Start with comprehensive guardrail
2. Test with real use cases
3. Adjust filter strengths based on needs
4. Monitor blocked content
5. Update topic definitions regularly
6. Use appropriate PII actions for your use case