In [2]:
pip install requests pandas geopandas shapely

You should consider upgrading via the '/usr/local/bin/python3 -m pip install --upgrade pip' command.[0m[33m
[0mNote: you may need to restart the kernel to use updated packages.


In [6]:
"""
Fetch Carbon Mapper CH4 plumes by bbox, clip to Myanmar ADM1 (HDX),
apply a tolerance buffer (2–5 km), and output filtered results.

Inputs:
    - CARBONMAPPER_TOKEN  : environment variable with your Carbon Mapper API token
    - HDX_ADM1_PATH       : path to HDX Myanmar Admin Level-1 (e.g., GeoJSON/Shapefile)

Outputs:
    - myanmar_ch4_plumes.csv : All CH4 plumes within Myanmar boundary
"""

import os
import sys
import requests
import pandas as pd
import geopandas as gpd
from shapely.geometry import Point
from io import StringIO
import logging

In [7]:

API_BASE = "https://api.carbonmapper.org/api/v1"

# Enter token here
API_TOKEN = "eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJ0b2tlbl90eXBlIjoiYWNjZXNzIiwiZXhwIjoxNzYxMTI4MDczLCJpYXQiOjE3NjA1MjMyNzMsImp0aSI6ImYxYjc2MGUxN2IyMjQwZWFhOGU2ODMxYzNlY2VhZGQ5Iiwic2NvcGUiOiJzdGFjIGNhdGFsb2c6cmVhZCIsImdyb3VwcyI6IlB1YmxpYyIsImFsbF9ncm91cF9uYW1lcyI6eyJjb21tb24iOlsiUHVibGljIl19LCJvcmdhbml6YXRpb25zIjoiIiwic2V0dGluZ3MiOnt9LCJpc19zdGFmZiI6ZmFsc2UsImlzX3N1cGVydXNlciI6ZmFsc2UsInVzZXJfaWQiOjE2NjMzfQ.9t-Y1GMFxhlm9pjkUB7s7-6OpFA0wvwul-e14qBRwYY"

# Geographic bounds (Myanmar approximate bbox: [min_lon, min_lat, max_lon, max_lat])
MYANMAR_BBOX = [92.15, 9.5, 101.15, 28.6]

# HDX Myanmar Admin-1 boundary file path 
HDX_ADM1_PATH = "./files/myanmar_admin_boundaries.json"

# Spatial tolerance buffer around Myanmar boundary (in kilometres)
BUFFER_KM = 3.0

# API pagination settings
LIMIT = 1000
TIMEOUT = 60

# Safety cap for pagination (prevents runaway loops)
MAX_RECORDS = 200000

# Output file
OUT_CSV = "myanmar_ch4_plumes.csv"

logging.basicConfig(
    level=logging.INFO,
    format="%(asctime)s %(levelname)s %(message)s",
)
logger = logging.getLogger("carbonmapper_mmr")


def build_session() -> requests.Session:
    if not API_TOKEN or API_TOKEN.strip() == "":
        logger.error("Set API_TOKEN in the script to your full Carbon Mapper JWT.")
        sys.exit(1)

    s = requests.Session()
    s.headers.update({
        "Authorization": f"Bearer {API_TOKEN}",
        "Accept": "text/csv",
    })
    return s

def fetch_chunk(session: requests.Session, offset: int) -> pd.DataFrame:
    params = {
        "bbox": MYANMAR_BBOX,
        "plume_gas": "CH4",
        "limit": LIMIT,
        "offset": offset,
    }
    try:
        r = session.get(
            f"{API_BASE}/catalog/plume-csv",
            params=params,
            timeout=TIMEOUT,
        )
        r.raise_for_status()
    except requests.RequestException as e:
        msg = getattr(getattr(e, "response", None), "text", "")
        logger.error(
            "API request failed at offset %s: %s%s",
            offset, str(e), f" | Response: {msg[:300]}" if msg else ""
        )
        sys.exit(1)

    text = r.text.strip()
    if not text:
        return pd.DataFrame()
    return pd.read_csv(StringIO(text))


def load_admin_polygon(path: str, buffer_km: float) -> gpd.GeoDataFrame:
    if not path or not os.path.exists(path):
        raise FileNotFoundError(f"Admin boundary file not found: {path!r}")

    admin = gpd.read_file(path)

    # Normalise CRS to WGS84
    if admin.crs is None:
        admin = admin.set_crs("EPSG:4326")
    else:
        admin = admin.to_crs("EPSG:4326")

    # Fix invalid geometries (common in real-world GIS data)
    admin["geometry"] = admin.buffer(0)

    # Dissolve to a single polygon
    admin["__all__"] = 1
    adm1_single = admin.dissolve(by="__all__", as_index=False)[["geometry"]]

    # Buffer in metres via projected CRS, then return to WGS84
    adm1_3857 = adm1_single.to_crs(3857)
    adm1_3857["geometry"] = adm1_3857.buffer(buffer_km * 1000.0)
    return adm1_3857.to_crs(4326)


def build_points_gdf(df: pd.DataFrame) -> gpd.GeoDataFrame:
    lat_candidates = ["plume_latitude", "latitude", "lat"]
    lon_candidates = ["plume_longitude", "longitude", "lon"]

    lat_col = next((c for c in lat_candidates if c in df.columns), None)
    lon_col = next((c for c in lon_candidates if c in df.columns), None)

    if not lat_col or not lon_col:
        raise ValueError(
            f"Could not find latitude/longitude columns in API data. "
            f"Available columns: {df.columns.tolist()}"
        )

    pts = df.copy()
    pts[lat_col] = pd.to_numeric(pts[lat_col], errors="coerce")
    pts[lon_col] = pd.to_numeric(pts[lon_col], errors="coerce")
    pts = pts.dropna(subset=[lat_col, lon_col])

    gdf = gpd.GeoDataFrame(
        pts,
        geometry=gpd.points_from_xy(pts[lon_col], pts[lat_col]),
        crs="EPSG:4326",
    )
    return gdf


def main():
    session = build_session()

    # 1) Download plumes by bbox with pagination
    chunks = []
    offset = 0
    fetched = 0

    while True:
        logger.info("Fetching offset %s …", offset)
        df_chunk = fetch_chunk(session, offset)
        logger.info("Got %s rows", len(df_chunk))

        if df_chunk.empty:
            break

        chunks.append(df_chunk)
        fetched += len(df_chunk)

        if len(df_chunk) < LIMIT:
            break

        if fetched >= MAX_RECORDS:
            logger.warning("Reached MAX_RECORDS cap (%s). Stopping pagination.", MAX_RECORDS)
            break

        offset += LIMIT

    if not chunks:
        logger.info("No data downloaded from API.")
        sys.exit(0)

    raw = pd.concat(chunks, ignore_index=True)
    logger.info("Total downloaded: %s plumes", len(raw))

    # 2) Build GeoDataFrame of points
    try:
        gdf_pts = build_points_gdf(raw)
    except ValueError as e:
        logger.error("%s", e)
        sys.exit(1)
    logger.info("Valid coordinate rows: %s", len(gdf_pts))

    # 3) Load admin boundary and apply buffer
    try:
        adm1 = load_admin_polygon(HDX_ADM1_PATH, BUFFER_KM)
    except FileNotFoundError as e:
        logger.error("%s", e)
        sys.exit(1)

    # 4) Spatial join - keep only points inside buffered Myanmar
    joined = gpd.sjoin(gdf_pts, adm1, how="inner", predicate="within")
    kept = joined.drop(columns=["index_right"], errors="ignore")
    logger.info("Plumes inside Myanmar (buffered %.1f km): %s", BUFFER_KM, len(kept))

    # 5) Deduplicate by plume_id if present
    if "plume_id" in kept.columns:
        before = len(kept)
        kept = kept.drop_duplicates(subset=["plume_id"])
        removed = before - len(kept)
        if removed > 0:
            logger.info("Removed %s duplicate plume_id(s). Final: %s unique plumes", removed, len(kept))
    else:
        logger.warning("Column 'plume_id' not found; skipping deduplication.")

    # 6) Save output
    kept_csv = kept.drop(columns=["geometry"], errors="ignore")
    kept_csv.to_csv(OUT_CSV, index=False)
    logger.info("Saved: %s", OUT_CSV)
    logger.info("Total plumes: %s", len(kept_csv))


if __name__ == "__main__":
    main()


2025-10-15 12:35:36,143 INFO Fetching offset 0 …
2025-10-15 12:35:37,539 INFO Got 143 rows
2025-10-15 12:35:37,539 INFO Total downloaded: 143 plumes
2025-10-15 12:35:37,573 INFO Valid coordinate rows: 143
2025-10-15 12:35:39,651 INFO Plumes inside Myanmar (buffered 3.0 km): 12
2025-10-15 12:35:39,665 INFO Saved: myanmar_ch4_plumes.csv
2025-10-15 12:35:39,666 INFO Total plumes: 12
