In [2]:
# SpaceX Data Collection via REST API

This notebook retrieves historical launch data from the public SpaceX REST API  
and prepares a raw dataset for downstream wrangling and analysis.

**Outputs**: `spacex_api_raw.csv`

Collecting dash
  Using cached dash-3.2.0-py3-none-any.whl.metadata (10 kB)
[31mERROR: Could not find a version that satisfies the requirement sqlite3 (from versions: none)[0m[31m
[0m[31mERROR: No matching distribution found for sqlite3[0m[31m
[0mNote: you may need to restart the kernel to use updated packages.


In [None]:
import requests, pandas as pd, numpy as np

# Helper function to fetch data
def fetch_collection(url):
    r = requests.get(url, timeout=60)
    r.raise_for_status()
    return r.json()

# Endpoints (SpaceX v4 API)
LAUNCHES_URL = "https://api.spacexdata.com/v4/launches"
ROCKETS_URL  = "https://api.spacexdata.com/v4/rockets"
PADS_URL     = "https://api.spacexdata.com/v4/launchpads"
PAYLOADS_URL = "https://api.spacexdata.com/v4/payloads"

launches  = fetch_collection(LAUNCHES_URL)
rockets   = {d["id"]: d for d in fetch_collection(ROCKETS_URL)}
pads      = {d["id"]: d for d in fetch_collection(PADS_URL)}
payloads  = {d["id"]: d for d in fetch_collection(PAYLOADS_URL)}

len(launches), len(rockets), len(pads), len(payloads)


In [None]:
# Normalize launches into a DataFrame
rows = []
for L in launches:
    if not L.get("date_utc"):
        continue
    date = pd.to_datetime(L["date_utc"], errors="coerce")
    rocket = rockets.get(L.get("rocket"), {})
    pad    = pads.get(L.get("launchpad"), {})
    site   = pad.get("name")
    
    payload_ids = L.get("payloads") or []
    payload_masses = [payloads[pid].get("mass_kg") for pid in payload_ids if payloads.get(pid)]
    payload_mass = float(np.nansum(payload_masses)) if payload_masses else None
    
    landing_outcome = None
    cls = 0
    if L.get("cores"):
        c0 = L["cores"][0]
        if c0.get("landing_attempt"):
            if c0.get("landing_success"):
                landing_outcome = f"Success ({c0.get('landing_type')})"
                cls = 1
            else:
                landing_outcome = f"Failure ({c0.get('landing_type')})"
        else:
            landing_outcome = "No attempt"

    rows.append({
        "FlightNumber": L.get("flight_number"),
        "DateUTC": date,
        "BoosterVersion": rocket.get("name"),
        "PayloadMass": payload_mass,
        "Orbit": payloads[payload_ids[0]].get("orbit") if payload_ids else None,
        "Site": site,
        "LandingOutcome": landing_outcome,
        "Class": cls
    })

df_api = pd.DataFrame(rows).sort_values("DateUTC").reset_index(drop=True)
df_api.head()


In [None]:
# Save to CSV
df_api.to_csv("spacex_api_raw.csv", index=False)
print(f"Saved {len(df_api)} rows to spacex_api_raw.csv")
