In [None]:
# --- GE vs CoastSat ANNUAL MEAN (single axis, 2016–2024, site-coloured error bars) ---
import arcpy, os, pandas as pd, numpy as np, matplotlib.pyplot as plt
from matplotlib.dates import AutoDateLocator, ConciseDateFormatter

# ===== EDIT THESE =====
GDB = r"D:\GIS\REEF_ISLAND_RESEARCH.gdb"

GE_FC = {
    "Kodingarengkeke_GE_poly_Project": "Kodingareng Keke",
    "Podangcaddi_GE_poly_Project":     "Podang Podang Caddi",
    "Podanglompo_GE_poly_Project":     "Podang Podang Lompo",
}

# These are *preferred* names; script will also try common alternatives
GE_DATE_FIELD  = "Date_std"   # old name – kept as first candidate
GE_AREA_FIELD  = "area_ha"
GE_SITE_FIELD  = "Site"       # optional

CSAT_FC = {
    "Csat_update_kodkeke_poly":  "Kodingareng Keke",
    "Csat_update_podcaddi_poly": "Podang Podang Caddi",
    "Csat_update_podlompo_poly": "Podang Podang Lompo",
}
CSAT_DATE_FIELD = "date"
CSAT_AREA_FIELD = "poly_area_ha"

SITE_ORDER  = ["Podang Podang Lompo", "Podang Podang Caddi", "Kodingareng Keke"]
SITE_COLORS = {
    "Kodingareng Keke": "#1f77b4",
    "Podang Podang Caddi": "#2ca02c",
    "Podang Podang Lompo": "#9467bd"
}

OUT_PNG = r"D:\GIS\plots\ge_vs_coastsat_ANNUAL_mean_ONEAXIS_COLOUR_ERR.png"
START_YR, END_YR = 2016, 2024
COLORED_ERR = True  # <-- Set to False if you prefer black error bars
# ======================

arcpy.env.workspace = GDB

# ---------- helper to resolve field names flexibly ----------
def find_field(fc_name, candidates, required=True):
    """
    Return the actual field name from a list of candidate names (case-insensitive).
    If required=True and nothing is found, raise a RuntimeError.
    """
    # Build a dict of lower->actual field name
    fld_map = {f.name.lower(): f.name for f in arcpy.ListFields(fc_name)}
    if isinstance(candidates, (list, tuple)):
        for c in candidates:
            nm = fld_map.get(str(c).lower())
            if nm:
                return nm
    else:
        nm = fld_map.get(str(candidates).lower())
        if nm:
            return nm

    if required:
        raise RuntimeError(f"Cannot find any of fields {candidates} in {fc_name}")
    return None

# ---------- GE annual means ----------
def ge_annual_means(fc_name, fallback_site):
    """Return annual mean + SD as error for GE."""
    # Resolve actual field names from candidates
    date_field = find_field(
        fc_name,
        [GE_DATE_FIELD, "Date", "date", "DATE", "AcqDate"]  # add more if needed
    )
    area_field = find_field(
        fc_name,
        [GE_AREA_FIELD, "area_ha", "Area_ha", "AREA_HA", "area", "Area"]
    )
    site_field = find_field(
        fc_name,
        [GE_SITE_FIELD, "site", "Site", "SITE"],
        required=False
    )

    fields = [date_field, area_field] + ([site_field] if site_field else [])

    rows = []
    with arcpy.da.SearchCursor(fc_name, fields) as cur:
        for r in cur:
            dt_raw, area = r[0], r[1]
            site = r[2] if site_field else fallback_site

            if not dt_raw:
                continue

            # ArcGIS DATE often comes as Python datetime; pd.to_datetime handles both
            dt = pd.to_datetime(dt_raw, dayfirst=True, errors="coerce")
            if pd.isna(dt):
                continue

            try:
                a = float(area)
            except:
                continue

            rows.append((site, dt, a))

    if not rows:
        return pd.DataFrame(columns=["Site","date","area_ha","err","Source"])

    df = pd.DataFrame(rows, columns=["Site","date","area_ha"])
    df["year"] = df["date"].dt.year

    g = df.groupby(["Site","year"]).agg(
        mean=("area_ha","mean"),
        sd=("area_ha","std")
    ).reset_index()

    # Represent each year at 1 Jan of that year
    g["date"] = pd.to_datetime(g["year"].astype(str) + "-01-01")
    g["Source"] = "GE (annual mean)"
    g.rename(columns={"mean":"area_ha","sd":"err"}, inplace=True)
    g["err"] = g["err"].fillna(0.0)

    return g[["Site","date","area_ha","err","Source"]]

# ---------- CoastSat annual means ----------
def cs_annual_means(fc_name, site):
    """Return annual mean + SD as error for CoastSat."""
    date_field = find_field(
        fc_name,
        [CSAT_DATE_FIELD, "Date", "date", "DATE"]
    )
    area_field = find_field(
        fc_name,
        [CSAT_AREA_FIELD, "poly_area_ha", "area_ha", "Area_ha", "area", "Area"]
    )

    rows = []
    with arcpy.da.SearchCursor(fc_name, [date_field, area_field]) as cur:
        for dt_raw, area in cur:
            if not dt_raw:
                continue

            dt = pd.to_datetime(dt_raw, errors="coerce")
            if pd.isna(dt):
                continue

            try:
                a = float(area)
            except:
                continue

            rows.append((site, dt, a))

    if not rows:
        return pd.DataFrame(columns=["Site","date","area_ha","err","Source"])

    df = pd.DataFrame(rows, columns=["Site","date","area_ha"])
    df["year"] = df["date"].dt.year

    g = df.groupby(["Site","year"]).agg(
        mean=("area_ha","mean"),
        sd=("area_ha","std")
    ).reset_index()

    g["date"] = pd.to_datetime(g["year"].astype(str) + "-01-01")
    g["Source"] = "CoastSat (annual mean)"
    g.rename(columns={"mean":"area_ha","sd":"err"}, inplace=True)
    g["err"] = g["err"].fillna(0.0)

    return g[["Site","date","area_ha","err","Source"]]

# ---------- build & clip combined dataframe ----------
frames = []
for fc, s in GE_FC.items():
    frames.append(ge_annual_means(fc, s))

for fc, s in CSAT_FC.items():
    frames.append(cs_annual_means(fc, s))

data = pd.concat(frames, ignore_index=True)

data = data[
    (data["Site"].isin(SITE_ORDER)) &
    (data["date"].dt.year.between(START_YR, END_YR))
].copy()

if data.empty:
    raise RuntimeError("No data remain after year clipping.")

# ---------- plot ONE axis ----------
fig, ax = plt.subplots(figsize=(12, 6))

for site in SITE_ORDER:
    c = SITE_COLORS.get(site, "C0")
    ds = data[data["Site"] == site].sort_values("date")

    # GE
    ge = ds[ds["Source"].str.startswith("GE")]
    if not ge.empty:
        ax.plot(
            ge["date"], ge["area_ha"],
            color=c, linewidth=2.2,
            marker="o", markersize=5, linestyle="-"
        )
        ax.errorbar(
            ge["date"], ge["area_ha"], yerr=ge["err"],
            fmt="none",
            ecolor=c if COLORED_ERR else "black",
            elinewidth=1.0, capsize=3, zorder=5
        )

    # CoastSat
    cs = ds[ds["Source"].str.startswith("CoastSat")]
    if not cs.empty:
        ax.plot(
            cs["date"], cs["area_ha"],
            color=c, linewidth=2.4,
            marker="o", markersize=4, linestyle="--", alpha=0.95
        )
        ax.errorbar(
            cs["date"], cs["area_ha"], yerr=cs["err"],
            fmt="none",
            ecolor=c if COLORED_ERR else "black",
            elinewidth=1.0, capsize=3, zorder=5
        )

# ---------- axes & formatting ----------
ax.grid(True, linewidth=0.4, alpha=0.5)
ax.set_ylabel("Planform Area (ha)", fontsize=20)
ax.set_xlabel("Year", fontsize=20)
ax.tick_params(axis="both", which="major", labelsize=16, width=1.5, length=7)
ax.tick_params(axis="both", which="minor", labelsize=14, width=1.2, length=4)

locator = AutoDateLocator()
formatter = ConciseDateFormatter(locator)
ax.xaxis.set_major_locator(locator)
ax.xaxis.set_major_formatter(formatter)

# no legend (series are implicit)
if ax.get_legend():
    for leg in ax.legend_.legendHandles:
        leg.set_visible(False)

fig.tight_layout(rect=[0, 0, 1, 0.96])
os.makedirs(os.path.dirname(OUT_PNG), exist_ok=True)
fig.savefig(OUT_PNG, dpi=300)
print(f"Saved: {OUT_PNG}")
plt.show()
