In [None]:
import requests
import json
import zipfile
import os
from google.colab import files

# ===============================
# 1. ARRAY OF FIRECRAWL API KEYS
# ===============================
FIRECRAWL_KEYS = [
    "fc_key_1_here",
    "fc_key_2_here",
    "fc_key_3_here"
]

# ====================
# 2. ARRAY OF URLS
# ====================
URLS = [
    "https://example.com",
    "https://openai.com",
    "https://wikipedia.org"
]

# ======================================
# 3. REQUEST FUNCTION TO GET RAW HTML
# ======================================
def get_raw_html(url, api_key):
    """
    Sends a Firecrawl scrape request and returns raw HTML.
    """
    api_url = "https://api.firecrawl.dev/v1/scrape"

    headers = {
        "Content-Type": "application/json",
        "Authorization": f"Bearer {api_key}"
    }

    payload = {
        "url": url,
        "formats": ["rawHtml"]
    }

    response = requests.post(api_url, headers=headers, data=json.dumps(payload))

    if response.status_code == 200:
        try:
            return response.json()["rawHtml"]
        except KeyError:
            print("⚠️ No rawHtml returned:", response.json())
            return None
    else:
        print(f"❌ Error {response.status_code} for {url}")
        return None


# ================================
# 4. FETCH + SAVE HTML FILES
# ================================
os.makedirs("html_raw", exist_ok=True)

for i, url in enumerate(URLS):
    api_key = FIRECRAWL_KEYS[i % len(FIRECRAWL_KEYS)]  # rotate API key

    print(f"Fetching: {url} using key {api_key[:10]}...")

    html_content = get_raw_html(url, api_key)

    if html_content:
        filename = f"html_raw/page_{i+1}.html"
        with open(filename, "w", encoding="utf-8") as f:
            f.write(html_content)
        print(f"✔ Saved: {filename}")
    else:
        print(f"❌ Failed to fetch: {url}")


# ======================
# 5. ZIP HTML FILES
# ======================
zip_filename = "html-raw.zip"

with zipfile.ZipFile(zip_filename, "w") as zipf:
    for file in os.listdir("html_raw"):
        filepath = os.path.join("html_raw", file)
        zipf.write(filepath, arcname=file)

print("✔ Zipped into:", zip_filename)


# =========================
# 6. DOWNLOAD ZIP FILE
# =========================
files.download(zip_filename)
print("⬇️ Download should begin automatically.")