In [None]:
!pip3 install boto3

In [None]:
#Imports
import requests
import json
import time
import http.client
try:
    import boto3
    from botocore.exceptions import NoCredentialsError
except ImportError as e:
    boto3 = None
    print("⚠️ boto3 failed to import, possibly due to SSL issues or environment problems:", e)
from datetime import datetime, timedelta

# === Configuration ===

XSIAM_BASE_URL = "https://api-cribldev.xdr.us.paloaltonetworks.com"
S3_BUCKET_NAME = "gbcortex"

# Harcoded for demo, but ideally should be stored in environment variables in production
API_KEY = "KEY HAS BEEN TAKING OUT" # Replace with your actual API key
XDR_AUTH_ID = "ID HAS BEEN TAKING OOUT" # API key ID for x-xdr-auth-id header. Replace with your actual ID

AWS_REGION = "us-east-2"  # Default region
# Harcoded for demo, but ideally should be stored in environment variables in production
AWS_ACCESS_KEY_ID = "KEY HAS BEEN TAKING OUT" # Replace with your actual key
AWS_SECRET_ACCESS_KEY = "KEY HAS BEEN TAKING OUT" # Replace with your actual key

#------ This section for establishing connection to S3 bucket and getting API headers from Cortex XSIAM
if boto3:
    try:
        # Initialize S3 client. Boto3 automatically looks for credentials in environment variables or ~/.aws/credentials.
        s3_client = boto3.client(
            's3',
            region_name=AWS_REGION,
            aws_access_key_id=AWS_ACCESS_KEY_ID,
            aws_secret_access_key=AWS_SECRET_ACCESS_KEY
        )

        print(f"S3 Client is set for bucket: '{S3_BUCKET_NAME}'")
    except Exception as e:
        print("❌ Failed to initialize boto3 S3 client. Ensure AWS credentials and region are configured.", e)
        s3_client = None
else:
    s3_client = None

def get_headers(api_key: str, auth_id: str = None) -> dict:
    """Generates standard API headers including Authorization and optional x-xdr-auth-id."""
    headers = {
        "Authorization": api_key,
        "Content-Type": "application/json"
    }
    if auth_id:
        headers['x-xdr-auth-id'] = auth_id
    return headers

# Print generated headers for verification
print("Alerts API Headers:", get_headers(API_KEY, XDR_AUTH_ID))

#Date object conversion for later fetching the alerts within certain time frame
def to_millis(date_obj: datetime) -> int:
    """Converts a datetime object to milliseconds since epoch."""
    return int(date_obj.timestamp() * 1000)
    
#--------This section clears json objects within targeted S3 bucket before loading the latest alerts into it
def clear_s3_alerts_folder(bucket_name: str, s3_client):
    """Deletes all objects within the 'alerts/' prefix in the S3 bucket."""
    if not s3_client:
        print("❌ S3 client not initialized. Skipping S3 folder clearing.")
        return

    print(f"\n--- Attempting to clear alerts folder in s3://{bucket_name}/alerts/ ---")
    try:
        objects_to_delete = []
        paginator = s3_client.get_paginator('list_objects_v2')
        pages = paginator.paginate(Bucket=bucket_name, Prefix='alerts/')

        for page in pages:
            if 'Contents' in page:
                for obj in page['Contents']:
                    objects_to_delete.append({'Key': obj['Key']})

        if objects_to_delete:
            print(f"Found {len(objects_to_delete)} objects to delete in s3://{bucket_name}/alerts/")
            for i in range(0, len(objects_to_delete), 1000):
                batch = objects_to_delete[i:i+1000]
                delete_keys = {'Objects': batch}
                print(f"  Deleting batch {i//1000 + 1} of {len(batch)} objects...")
                response = s3_client.delete_objects(
                    Bucket=bucket_name,
                    Delete=delete_keys
                )
                if 'Errors' in response:
                    print(f"❌ Errors during batch delete: {response['Errors']}")
                else:
                    print(f"  Batch delete successful. Deleted {len(batch)} objects.")
            print("✅ Finished clearing alerts folder.")
        else:
            print("No objects found in s3://{bucket_name}/alerts/ to clear.")

    except NoCredentialsError:
        print(f"❌ AWS credentials not found. S3 folder clearing failed.")
    except Exception as e:
        print(f"❌ An error occurred during S3 folder clearing: {e}")
    print("--- S3 folder clearing process finished ---")

# --- Section for fetching alerts from Cortex XSIAM ---
def fetch_alerts_page(api_key: str, next_page_token: str = None) -> dict:
    """Fetches a single page of alerts from the Cortex XSIAM Incident Management API."""
    
    # Setting correct API endpoint url for retriving alerts per Cortex XSIAM Incident Management doc
    #https://docs-cortex.paloaltonetworks.com/r/Cortex-XDR-REST-API/Get-Alerts-Multi-Events-v2
    url = f"{XSIAM_BASE_URL}/public_api/v2/alerts/get_alerts_multi_events/"

    #Setting up payload request
    payload_request_data = {
        "use_page_token": True, # Use page token for pagination
        "search_to": 100, # Request maximum 100 results per page
        "sort": {"field": "creation_time", "keyword": "desc"},
        # Filters are temporarily removed for debugging the 500 error.
        # Re-add filters later if the base call with pagination works.
        # "filters": [{
        #     "field": "creation_time",
        #     "operator": "in_range",
        #     "value": {
        #          "start": to_millis(datetime.now() - timedelta(days=30)),
        #          "end": to_millis(datetime.now())
        #     }
        # }]
    }

    # Adding next_page_token to payload if provided (for subsequent pages)
    if next_page_token:
        payload_request_data["next_page_token"] = next_page_token

    payload = {"request_data": payload_request_data}

    print(f"Fetching alerts with payload: {json.dumps(payload)}")

    # Use headers with x-xdr-auth-id for v2 alerts endpoint
    headers = get_headers(api_key, XDR_AUTH_ID)
    response = requests.post(url, headers=headers, json=payload)

    # Raise an HTTPError for bad responses (4xx or 5xx)
    response.raise_for_status()

    # Return the full response data
    return response.json()

def upload_alert_to_s3(alert: dict, bucket_name: str, s3_client):
    """Uploads a single alert dictionary as a JSON object to an S3 bucket."""
    if not s3_client:
        print("❌ S3 client not initialized. Skipping S3 upload.")
        return

    # Generate a unique object key (filename) for the alert in S3 using alert_id
    # This allows overwriting previous versions if the script is run again.
    key = f"unknown_alert_{datetime.now().strftime('%Y%m%d%H%M%S%f')}"
    alert_id = alert.get('alert_id', key) # Use a default if alert_id is missing, add timestamp for uniqueness in this case
    object_key = f"alerts/{alert_id}.json"

    # Convert the alert dictionary to a JSON string
    try:
        alert_json_data = json.dumps(alert, indent=2)
    except TypeError as e:
        print(f"❌ Failed to serialize alert {alert_id} to JSON: {e}")
        print(f"Alert data: {alert}") # Log the problematic alert data
        return

    # Upload the JSON data to S3
    try:
        print(f"⬆️ Attempting to upload alert {alert_id} to s3://{bucket_name}/{object_key}")
        s3_client.put_object(
            Bucket=bucket_name,
            Key=object_key,
            Body=alert_json_data,
            ContentType='application/json'
        )
        print(f"✅ Successfully uploaded alert {alert_id} to S3.")
    except NoCredentialsError:
        print(f"❌ AWS credentials not found for alert {alert_id}. S3 upload failed.")
    except Exception as e:
        print(f"❌ Failed to upload alert {alert_id} to S3: {e}")

def fetch_all_alerts(api_key: str):
    """Fetches all alerts page by page and uploads them to S3."""
    next_page_token = None
    page_count = 0
    total_uploaded_count = 0

    print("\n--- Starting alert fetching and S3 upload process ---")

    while True:
        print(f"Fetching page: {page_count}...")
        try:
            response_data = fetch_alerts_page(api_key, next_page_token=next_page_token)

            if response_data and 'reply' in response_data and 'alerts' in response_data['reply'] is not None:
                alerts_on_page = response_data['reply']['alerts']
                page_alert_count = len(alerts_on_page)
                print(f"Fetched {page_alert_count} alerts on page {page_count}.")

                if alerts_on_page:
                    print(f"--- Starting S3 upload for {page_alert_count} alerts on page {page_count} ---")
                    for i, alert in enumerate(alerts_on_page):
                        print(f"  Processing alert {i + 1}/{page_alert_count} on page {page_count}...")
                        upload_alert_to_s3(alert, S3_BUCKET_NAME, s3_client)
                        total_uploaded_count += 1
                    print(f"--- Finished S3 upload for alerts on page {page_count} ---")

                # Get the next page token from the response
                next_page_token = response_data['reply'].get('next_page_token')
                print(f"Next page token for page {page_count}: {next_page_token}")

                # If there is no next page token, break the loop
                if not next_page_token or page_alert_count == 0: # Also break if a page returns 0 alerts (can happen on last page)
                    print("No more pages to fetch or no alerts on the last page.")
                    break

                page_count += 1
                # Optional: Add a small delay between requests to avoid rate limiting
                time.sleep(1) 

            else:
                 print(f"❌ Unexpected response structure or no 'reply'/'alerts' in response on page {page_count}: {response_data}")
                 break # Exit loop on unexpected response

        except requests.exceptions.RequestException as e:
            print(f"❌ Request failed while fetching page {page_count}: {e}")
            break # Exit loop on request error
        except Exception as e:
            print(f"❌ An unexpected error occurred while fetching page {page_count}: {e}")
            break # Exit loop on other errors

    print(f"--- Finished alert fetching and S3 upload. Total alerts uploaded: {total_uploaded_count} ---")

def verify_alerts_in_s3(bucket_name: str, s3_client):
    """Lists objects in the 'alerts/' prefix of the S3 bucket and reports the count."""
    if not s3_client:
        print("❌ S3 client not initialized. Skipping S3 verification.")
        return

    print(f"\n--- Verifying alerts in S3 bucket '{bucket_name}' ---")
    object_count = 0
    try:
        paginator = s3_client.get_paginator('list_objects_v2')
        # List objects within the 'alerts/' prefix
        pages = paginator.paginate(Bucket=bucket_name, Prefix='alerts/')

        for page in pages:
            if 'Contents' in page:
                object_count += len(page['Contents'])

        print(f"✅ Found {object_count} alert objects in s3://{bucket_name}/alerts/")

    except NoCredentialsError:
        print(f"❌ AWS credentials not found. S3 verification failed.")
    except Exception as e:
        print(f"❌ An error occurred during S3 verification: {e}")

    print("--- S3 verification complete ---")

# === Main Execution ===

# Clear existing alerts folder in S3 for demo purposes
clear_s3_alerts_folder(S3_BUCKET_NAME, s3_client)

# Fetch all alerts and upload to S3
fetch_all_alerts(API_KEY)

# Verify the number of alerts uploaded to S3
verify_alerts_in_s3(S3_BUCKET_NAME, s3_client)