In [None]:
import json, time, urllib.parse, requests
import pandas as pd
from tqdm import tqdm

BASE = "https://www.unoosa.org/oosa/osoindex/waxs-search.json"

In [None]:
def build_criteria(filters=None, start_at=0, sortings=None):
    return {
        "filters": filters or [],
        "sortings": sortings or [{"fieldName":"object.launch.dateOfLaunch_s1","dir":"desc"}],
        "startAt": int(start_at),
    }

In [None]:
def fetch_page(session: requests.Session, criteria: dict, cookies=None, timeout=30):
    crit = json.dumps(criteria, separators=(",", ":"))
    url = f"{BASE}?criteria={urllib.parse.quote(crit, safe='')}"
    headers = {
        "Accept": "application/json, text/plain, */*",
        "Referer": "https://www.unoosa.org/oosa/osoindex/search-ng.jspx?lf_id=",
        "User-Agent": "Mozilla/5.0",
    }
    r = session.get(url, headers=headers, cookies=cookies or {}, timeout=timeout)
    r.raise_for_status()
    data = r.json()
    found = data.get("found", data.get("responseData", {}).get("found"))
    results = data.get("results", data.get("responseData", {}).get("results", []))
    return int(found or 0), results or []

In [None]:
def flatten(d, parent="", sep="."):
    out = {}
    for k, v in (d or {}).items():
        nk = f"{parent}{sep}{k}" if parent else k
        if isinstance(v, dict):
            out.update(flatten(v, nk, sep))
        elif isinstance(v, list):
            out[nk] = json.dumps(v, ensure_ascii=False)
        else:
            out[nk] = v
    return out

In [None]:
def fetch_all(filters=None, sortings=None, cookies=None, sleep_s=0.5, limit=None):
    session = requests.Session()
    found, results = fetch_page(session, build_criteria(filters, 0, sortings), cookies=cookies)
    page_size = len(results)
    rows = [flatten(r) for r in results]

    # cap progress to limit if provided
    target = min(found, limit) if limit is not None else found
    from tqdm import tqdm
    pbar = tqdm(total=target, desc="Download UNOOSA", unit="rows")
    pbar.update(min(len(results), target))

    if limit is not None and len(rows) >= limit:
        pbar.close()
        return pd.DataFrame(rows[:limit])

    start_at = page_size
    while start_at < found and page_size > 0:
        time.sleep(sleep_s)
        found2, results = fetch_page(session, build_criteria(filters, start_at, sortings), cookies=cookies)
        found = max(found, found2 or found)
        if not results:
            break
        rows.extend(flatten(r) for r in results)
        page_size = len(results)
        start_at += page_size

        # update and stop when limit reached
        if limit is not None:
            remaining = max(0, limit - (pbar.n))
            pbar.update(min(len(results), remaining))
            if len(rows) >= limit:
                pbar.close()
                return pd.DataFrame(rows[:limit])
        else:
            pbar.update(len(results))

    pbar.close()
    return pd.DataFrame(rows if limit is None else rows[:limit])

In [18]:
# Optional: cookies = {"JSESSIONID":"...", "_ga":"...", "UNOOSA-NSLB":"..."}  # redact secrets
cookies = None

df_unoosa = fetch_all(
    filters=[], 
    sortings=[{"fieldName":"object.launch.dateOfLaunch_s1","dir":"desc"}],
    cookies=cookies,
)
df_unoosa.head()

Download UNOOSA: 100%|██████████| 21289/21289 [24:28<00:00, 14.49rows/s]


Unnamed: 0,id,uri,values.object.internationalDesignator_s1,values.object.internationalDesignator@official_s1,values.object.nationalDesignator_s1,values.object.nameOfSpaceObjectIno_s1,values.object.nameOfSpaceObjectO_s1,values.object.launch.stateOfRegistry_s1,values.object.launch.stateOfRegistry@official_s1,values.object.launch.dateOfLaunch_s1,...,values.object.launch.dateOfLaunch@official_s1,values.object.status.dateOfDecay@official_s1,values.object.functionOfSpaceObject_s1,values.object.remark_s1,values.object.status.webSite_s1,values.object.unRegistration.registrationDocuments.document@uri_s,values.object.unRegistration.registrationDocuments.document..document.symbol_s,values.object.status.gsoLocation@official_s1,values.object.unRegistration.decayDocuments.document@uri_s,values.object.unRegistration.decayDocuments.document..document.symbol_s
0,"102,en,/osoindex/data/objects/2025/2025-085q_2...",/osoindex/data/objects/2025/2025-085q_24495.html,2025-085Q,False,,STARLINK 33861,,USA,False,2025-04-28,...,False,False,------,Not registered with the United Nations. Date o...,,,,,,
1,"102,en,/osoindex/data/objects/2025/2025-085s_2...",/osoindex/data/objects/2025/2025-085s_24497.html,2025-085S,False,,STARLINK 33887,,USA,False,2025-04-28,...,False,False,------,Not registered with the United Nations. Date o...,,,,,,
2,"102,en,/osoindex/data/objects/2025/2025-085t_2...",/osoindex/data/objects/2025/2025-085t_24498.html,2025-085T,False,,STARLINK 33886,,USA,False,2025-04-28,...,False,False,------,Not registered with the United Nations. Date o...,,,,,,
3,"102,en,/osoindex/data/objects/2025/2025-085u_2...",/osoindex/data/objects/2025/2025-085u_24499.html,2025-085U,False,,STARLINK 33840,,USA,False,2025-04-28,...,False,False,------,Not registered with the United Nations. Date o...,,,,,,
4,"102,en,/osoindex/data/objects/2025/2025-085v_2...",/osoindex/data/objects/2025/2025-085v_24500.html,2025-085V,False,,STARLINK 33851,,USA,False,2025-04-28,...,False,False,------,Not registered with the United Nations. Date o...,,,,,,


In [20]:
from pathlib import Path

# set your target directory and filename
output_dir = Path("/Users/aaeush/Desktop/Drive/Drive/Academics/Py Project/MyCode/OrbitIQ/exports")
output_dir.mkdir(parents=True, exist_ok=True)

out_path = output_dir / "unoosa_index_of_objects_launched_into_space.csv"
df_unoosa.to_csv(out_path, index=False, encoding="utf-8")
print(f"Saved CSV to: {out_path}")

Saved CSV to: /Users/aaeush/Desktop/Drive/Drive/Academics/Py Project/MyCode/OrbitIQ/exports/unoosa_index_of_objects_launched_into_space.csv
