# CloudTrail Log Analysis using Amazon Bedrock

This notebook demonstrates how to analyze CloudTrail logs using Amazon Bedrock for security event analysis.
We'll perform the following steps:
1. Query CloudTrail logs using Athena
2. Analyze security events using Amazon Bedrock
3. Visualize the results
4. Generate Additional Analysis Suggestions Using Amazon Bedrock

First, let's import required libraries and set up our environment.

In [None]:
import boto3
import pandas as pd
import matplotlib.pyplot as plt
from datetime import datetime, timedelta
import json
import time

# Initialize AWS clients
athena = boto3.client('athena')
bedrock = boto3.client('bedrock-runtime')

## Query CloudTrail Logs
Let's query the CloudTrail logs for the last hour to analyze security-related events like errors, deletions, and updates

In [None]:
# Function to run Athena query and get results
def run_athena_query(query, database):
    response = athena.start_query_execution(
        QueryString=query,
        QueryExecutionContext={'Database': database},
        ResultConfiguration={'OutputLocation': 's3://{UPDATE_WITH_YOUR_UNIQUE_NAME}/athena_results/'}  # Updated bucket name
    )
    
    query_execution_id = response['QueryExecutionId']
    
    # Wait for query to complete with exponential backoff
    wait_time = 1
    max_wait = 10
    while True:
        response = athena.get_query_execution(QueryExecutionId=query_execution_id)
        state = response['QueryExecution']['Status']['State']
        if state in ['SUCCEEDED', 'FAILED', 'CANCELLED']:
            break
        time.sleep(min(wait_time, max_wait))
        wait_time *= 2
    
    if state == 'SUCCEEDED':
        results = athena.get_query_results(QueryExecutionId=query_execution_id)
        return results
    else:
        error_details = response['QueryExecution']['Status'].get('StateChangeReason', 'No error details available')
        raise Exception(f"Query failed with state: {state}. Error details: {error_details}")

# Query to get security-relevant events
query = """
SELECT 
    eventtime,
    eventsource,
    eventname,
    sourceipaddress,
    errorcode,
    errormessage,
    useridentity.userName as username,
    useridentity.type as identity_type
FROM cloudtrail_logs
WHERE 
    eventtime >= date_format(date_add('hour', -1, current_timestamp), '%Y-%m-%d''T''%H:%i:%S''Z')
    AND (
        errorcode IS NOT NULL
        OR lower(eventname) LIKE 'delete%' 
        OR lower(eventname) LIKE 'update%'
        OR lower(eventname) LIKE 'create%'
        OR lower(eventname) LIKE 'modify%'
    )
ORDER BY eventtime DESC
LIMIT 1000
"""

try:
    results = run_athena_query(query, 'cloudtrail_logs_db')

    # Extract column names from first row
    columns = [field['VarCharValue'] for field in results['ResultSet']['Rows'][0]['Data']]
    
    # Convert remaining rows to DataFrame
    data = []
    for row in results['ResultSet']['Rows'][1:]:  # Skip header row
        row_data = [field.get('VarCharValue', '') for field in row['Data']]
        data.append(row_data)
    
    df = pd.DataFrame(data, columns=columns)
except Exception as e:
    print(f"Error executing Athena query: {str(e)}")
    df = pd.DataFrame()  # Create empty DataFrame on error
    
df

## Analyze Events using Amazon Bedrock
Now let's use Bedrock's Claude model to analyze the security events

In [None]:
# Function to invoke Bedrock model
def invoke_bedrock_model(prompt):
    response = bedrock.invoke_model(
        # modelId='anthropic.claude-3-5-sonnet-20240620-v1:0',
        modelId='anthropic.claude-3-sonnet-20240229-v1:0',
        body=json.dumps({
            "anthropic_version": "bedrock-2023-05-31",
            "messages": [
                {
                    "role": "user", 
                    "content": prompt
                }
            ],
            "max_tokens": 2000,
            "temperature": 0.5
        })
    )
    
    response_body = json.loads(response.get('body').read())
    
    return response_body

# Function to analyze events using Bedrock
def analyze_events_with_bedrock(events_description):
    prompt = f"""
    You are a security analyst. Analyze the following CloudTrail events and identify any potential security concerns or patterns in JSON format only. Do not include any text outside of the JSON structure:

{events_description}

{{"key_findings": [
    "List your key security findings here"
],
"suspicious_patterns": [
    "List any suspicious patterns detected"  
],
"recommendations": [
    "List your security recommendations"
]}}"""
    
    return invoke_bedrock_model(prompt)

# Prepare events description for analysis
events_description = df.head(50).to_string()  # Analyze last 50 events
analysis_results = analyze_events_with_bedrock(events_description)
analysis_results

## Visualize the Results
Let's create some visualizations to better understand the security events

In [None]:
# Create visualizations for security-focused analysis
plt.figure(figsize=(15, 12))

# Plot 1: Failed API calls by service
plt.subplot(2, 2, 1)
failed_calls = df[df['errorcode'].notna()]
if not failed_calls.empty:
    failed_calls['eventsource'].value_counts().head(10).plot(kind='bar')
    plt.title('Top 10 Services with Failed API Calls')
    plt.xlabel('AWS Service')
    plt.ylabel('Number of Failures')
    plt.xticks(rotation=45)
else:
    plt.text(0.5, 0.5, 'No failed API calls found', horizontalalignment='center')

# Plot 2: Unauthorized access attempts
plt.subplot(2, 2, 2)
unauthorized = df[df['errorcode'].isin(['AccessDenied', 'UnauthorizedOperation'])]
if not unauthorized.empty:
    unauthorized['eventname'].value_counts().head(10).plot(kind='bar')
    plt.title('Top 10 Unauthorized Access Attempts by API Action')
    plt.xlabel('API Action')
    plt.ylabel('Number of Attempts')
    plt.xticks(rotation=45)
else:
    plt.text(0.5, 0.5, 'No unauthorized access attempts found', horizontalalignment='center')

# Plot 3: Event frequency over time
plt.subplot(2, 2, 3)
df['eventtime'] = pd.to_datetime(df['eventtime'])
df.set_index('eventtime').resample('1H').size().plot()
plt.title('Event Frequency Over Time')
plt.xlabel('Time')
plt.ylabel('Number of Events')

# Plot 4: Security configuration changes
plt.subplot(2, 2, 4)
security_events = df[df['eventname'].str.contains('|'.join([
    'Create', 'Delete', 'Update', 'Put', 'Modify'
]), na=False) & 
    df['eventsource'].str.contains('|'.join([
        'iam', 'kms', 'cloudtrail', 'config', 'guardduty'
    ]), na=False)]
if not security_events.empty:
    security_events['eventname'].value_counts().head(10).plot(kind='bar')
    plt.title('Top 10 Security Configuration Changes')
    plt.xlabel('API Action')
    plt.ylabel('Number of Changes')
    plt.xticks(rotation=45)
else:
    plt.text(0.5, 0.5, 'No security configuration changes found', horizontalalignment='center')

plt.tight_layout()
plt.show()

## Display Analysis Results

In [None]:
# Extract the content text from the response
content_text = analysis_results['content'][0]['text']

# Parse the JSON string into a dictionary
findings_dict = json.loads(content_text)

print("Key Findings:")
for finding in findings_dict['key_findings']:
    print(f"- {finding}")

print("\nSuspicious Patterns:")
for pattern in findings_dict['suspicious_patterns']:
    print(f"- {pattern}")

print("\nRecommendations:")
for recommendation in findings_dict['recommendations']:
    print(f"- {recommendation}")


## Generate Additional Analysis Suggestions Using Amazon Bedrock

This code section uses Amazon Bedrock's Claude model to generate intelligent suggestions for further analysis of the CloudTrail logs. It:

1. Takes the findings from our initial analysis
2. Constructs a prompt asking Claude to suggest:
   - Specific Athena SQL queries to investigate suspicious patterns
   - Additional visualization ideas for better understanding security events  
   - Relevant CloudTrail fields that should be analyzed for more context
3. Returns structured suggestions in JSON format for easy parsing and implementation

The suggestions help guide deeper investigation of potential security issues identified in the logs.

Create a prompt for Claude to generate additional analysis queries

In [None]:
# Create a prompt for Claude to generate additional analysis queries
def analyze_events_with_bedrock(findings_dict):
    prompt = f"""Based on these findings from the CloudTrail logs analysis:

    Key Findings:
    {chr(10).join([f'- {finding}' for finding in findings_dict['key_findings']])}

    Suspicious Patterns:
    {chr(10).join([f'- {pattern}' for pattern in findings_dict['suspicious_patterns']])}

    Please suggest:
    1. Specific Athena SQL queries to investigate these patterns in more detail
    2. Additional visualizations that would help understand the security events better
    3. What other CloudTrail fields we should analyze to get more context

    Return your response in this exact JSON format:
    {{
        "suggested_queries": [
            {{
                "description": "Description of what the query analyzes",
                "query": "The SQL query"
            }}
        ],
        "visualization_ideas": [
            "Description of visualization idea"
        ],
        "fields_to_analyze": [
            {{
                "field": "CloudTrail field name",
                "reason": "Reason to analyze this field"
            }}
        ]
    }}"""
    
    return invoke_bedrock_model(prompt)

# Parse and display Claude's suggestions
analysis_suggestions = analyze_events_with_bedrock(findings_dict)
analysis_suggestions

In [None]:
print("\nSuggested Queries for Further Analysis:")
suggested_contents = json.loads(analysis_suggestions['content'][0]['text'])

for query in suggested_contents['suggested_queries']:
    print(f"\nQuery: {query['description']}")
    print(f"SQL: {query['query']}")
    
print("\nVisualization Suggestions:")
for viz in suggested_contents['visualization_ideas']:
    print(f"- {viz}")
    
print("\nAdditional Fields to Analyze:")
for field in suggested_contents['fields_to_analyze']:
    print(f"- {field['field']}: {field['reason']}")
