# Multi-Cloud Public Outage Collector.

This notebook collects **public outage data** from:
- AWS
- Azure
- GCP

In [None]:
!uv pip install feedparser requests dotenv beautifulsoup4 lxml


In [None]:
from datetime import datetime, timezone
import os
from dotenv import load_dotenv
from IPython.display import Markdown, display
from openai import OpenAI


In [None]:
load_dotenv(override=True)
api_key = os.getenv('OPENAI_API_KEY')

# Check the key

if not api_key:
    print("No API key was found - please head over to the troubleshooting notebook in this folder to identify & fix!")
elif not api_key.startswith("sk-proj-"):
    print("An API key was found, but it doesn't start sk-proj-; please check you're using the right key - see troubleshooting notebook")
elif api_key.strip() != api_key:
    print("An API key was found, but it looks like it might have space or tab characters at the start or end - please remove them - see troubleshooting notebook")
else:
    print("API key found and looks good so far!")

In [None]:
def now_iso():
    return datetime.now(timezone.utc).isoformat()

In [None]:
import json
from bs4 import BeautifulSoup
from datetime import datetime, timezone
import re
import requests

In [None]:
def fetch_aws_incidents():
    url = "https://status.aws.amazon.com/history.json"

    try:
        resp = requests.get(
            url,
            headers={"User-Agent": "Mozilla/5.0"},
            timeout=20
        )
        resp.raise_for_status()
        data = resp.json()
    except Exception:
        # AWS no longer exposes public history JSON
        return [{
            "cloud": "aws",
            "service": "all",
            "region": "global",
            "title": "AWS does not expose public historical outage data via JSON APIs",
            "status": "unsupported",
            "start_time": None,
            "end_time": None,
            "details": (
                "AWS Health historical events require either "
                "1) AWS Health API with Business/Enterprise support, or "
                "2) JavaScript execution in a browser context."
            ),
            "source_url": "https://health.aws.amazon.com/health/status"
        }]

    incidents = []

    for date, events in data.items():
        for e in events:
            incidents.append({
                "cloud": "aws",
                "service": e.get("service"),
                "region": e.get("region") or "global",
                "title": e.get("summary"),
                "status": e.get("status"),
                "start_time": date,
                "end_time": None,
                "details": e.get("description"),
                "source_url": "https://health.aws.amazon.com/health/status"
            })

    return incidents


In [None]:
def fetch_azure_incidents():
    url = "https://azure.status.microsoft/en-us/status/history/"
    html = requests.get(
        url,
        headers={"User-Agent": "Mozilla/5.0"},
        timeout=20
    ).text

    soup = BeautifulSoup(html, "lxml")
    results = []

    # Each month block
    for month_header in soup.select("div.month-title-container h2"):
        month_text = month_header.get_text(strip=True)

        wrapper = (
            month_header
            .find_parent("div")
            .find_next_sibling("div", class_="month-incident-container-wrapper")
        )
        if not wrapper:
            continue

        # Each incident row
        for row in wrapper.select("div.row"):
            day_el = row.select_one(".incident-history-day")
            title_el = row.select_one(".incident-history-title")
            tracking_el = row.select_one(".incident-history-tracking-id")
            body_el = row.select_one(".incident-history-collapse .card-body")

            if not all([day_el, title_el, tracking_el, body_el]):
                continue

            # Normalize date
            day = day_el.get_text(strip=True)
            try:
                date_obj = datetime.strptime(
                    f"{month_text} {day}", "%B %Y %d"
                )
                iso_date = date_obj.date().isoformat()
            except Exception:
                iso_date = None

            title = title_el.get_text(strip=True)
            tracking_id = (
                tracking_el.get_text(strip=True)
                .replace("Tracking ID:", "")
                .strip()
            )

            raw_html = str(body_el)
            clean_text = body_el.get_text("\n", strip=True)

            # Extract PIR sections
            sections = {}
            current = None

            def norm(s):
                return re.sub(r"\s+", " ", s.lower())

            for el in body_el.find_all(["strong", "p", "li"]):
                text = el.get_text(strip=True)
                key = norm(text)

                if "what happened" in key:
                    current = "what_happened"
                    sections[current] = []
                elif "what went wrong" in key:
                    current = "what_went_wrong"
                    sections[current] = []
                elif "how did we respond" in key:
                    current = "how_did_we_respond"
                    sections[current] = []
                elif "how are we making" in key:
                    current = "mitigation"
                    sections[current] = []
                elif "how can customers" in key:
                    current = "customer_guidance"
                    sections[current] = []
                elif current:
                    sections[current].append(text)

            sections = {k: "\n".join(v) for k, v in sections.items()}

            results.append({
                "cloud": "azure",
                "month": month_text,
                "date": iso_date,
                "tracking_id": tracking_id,
                "title": title,
                "text": clean_text,
                "sections": sections,
                "raw_html": raw_html,
                "source_url": url
            })

    return results

In [None]:
def fetch_gcp_incidents():
    url = "https://status.cloud.google.com/incidents.json"
    data = requests.get(url, timeout=15).json()

    incidents = []
    for inc in data:
        incidents.append({
            "cloud": "gcp",
            "service": ", ".join(inc.get("services", [])),
            "region": ", ".join(inc.get("currently_affected_locations", [])) or "global",
            "title": inc.get("external_desc"),
            "status": inc.get("status"),
            "start_time": inc.get("begin"),
            "end_time": inc.get("end"),
            "url": inc.get("uri")
        })

    return incidents

In [None]:
def get_cloud_outages_json():
    incidents = []
    incidents.extend(fetch_aws_incidents())
    incidents.extend(fetch_azure_incidents())
    incidents.extend(fetch_gcp_incidents())

    payload = {
        "generated_at": datetime.now(timezone.utc).isoformat(),
        "source": "public-status-pages",
        "incident_count": len(incidents),
        "incidents": incidents
    }

    return json.dumps(payload, separators=(",", ":"), ensure_ascii=False)

In [None]:
outage_json = get_cloud_outages_json()

In [None]:
system_prompt = """
You are a snarky assistant that analyzes the outages of 3 different cloud providers and provider suggestions on which cloud provider I choose based on the outage.
Provides a short, snarky, humorous summary, ignoring text that might be navigation related.
Respond in markdown. Do not wrap the markdown in a code block - respond just with the markdown.
"""

In [None]:
user_prompt_prefix = """
Here are the contents of the outage for 90 days from aws, azure and gcp. If a cloud service provider is not giving enough outage details, consider that as well.
Some cases the cloud provider (eg. AWS) gives details but it the data is retrieved dynamically from server and hence cannot be scraped from webpage. But the data can be accessed directly going to the browser and click different events. No payment needed.
Provide a short summary of the outage and your recommendation on which service provider to choose and why.
If it includes major and minor outages and duration of outage, then summarize these too.
"""

In [None]:
def messages_for(outage_data):
    return [
        {"role": "system", "content": system_prompt},
        {"role": "user", "content": user_prompt_prefix + outage_data}
    ]

In [None]:
def summarize(outage_data):
    openai = OpenAI()
    response = openai.chat.completions.create(
        model = "gpt-4.1-mini",
        messages = messages_for(outage_data)
    )
    return response.choices[0].message.content

In [None]:
def display_summary(outage_data):
    summary = summarize(outage_data)
    display(Markdown(summary))

In [None]:
display_summary(outage_json)