In [None]:
# --- Shoreline count heatmap (Year x Month) with right-side vertical legend ---
# Paste into ArcGIS Pro's Python window.
import arcpy, os, numpy as np, pandas as pd, matplotlib.pyplot as plt
from matplotlib.colors import Normalize
from matplotlib.patches import Patch

# ========= USER INPUTS =========
gdb = r"D:\GIS\REEF_ISLAND_RESEARCH.gdb"
feature_classes = [
    "Csat_update_kodkeke_poly",
    "Csat_update_podcaddi_poly",
    "Csat_update_podlompo_poly"
]
site_labels = ["Kodingareng Keke (Small Cay)", "Podang podang Caddi (Pristine Cay)", "Podang podang Lompo (Built-Up Cay)"]
date_field = "date"
out_png = r"D:\GIS\plots\onMyThesis\shoreline_counts_heatmap_right_legend.png"
# =================================

arcpy.env.workspace = gdb

def fc_dates_to_df(fc_path, site_name, date_field):
    arr = arcpy.da.FeatureClassToNumPyArray(fc_path, [date_field], null_value=None)
    df = pd.DataFrame(arr)
    df["Date"] = pd.to_datetime(df[date_field], errors="coerce", utc=True).dt.tz_localize(None)
    df = df.dropna(subset=["Date"]).copy()
    df["Site"] = site_name
    return df[["Date", "Site"]]

# Load
frames = []
for fc, label in zip(feature_classes, site_labels):
    fc_path = os.path.join(gdb, fc) if os.path.dirname(fc) == "" else fc
    if not arcpy.Exists(fc_path):
        arcpy.AddWarning(f"Not found: {fc_path}")
        continue
    frames.append(fc_dates_to_df(fc_path, label, date_field))
if not frames:
    raise RuntimeError("No valid feature classes found.")

df_all = pd.concat(frames, ignore_index=True)

# Year and month bins
years  = np.arange(df_all["Date"].dt.year.min(), df_all["Date"].dt.year.max() + 1)
months = np.arange(1, 13)
month_labels = ["Jan","Feb","Mar","Apr","May","Jun","Jul","Aug","Sep","Oct","Nov","Dec"]

def site_matrix(one_site_df):
    tmp = (one_site_df
           .assign(Year = one_site_df["Date"].dt.year,
                   Month = one_site_df["Date"].dt.month)
           .groupby(["Year","Month"]).size()
           .reindex(pd.MultiIndex.from_product([years, months]), fill_value=0)
           .unstack(level=1)
           .reindex(index=years))
    return tmp.values  # rows=years, cols=months

# Build matrices, share color scale
mats, vmax = [], 0
for name in site_labels:
    mat = site_matrix(df_all[df_all["Site"] == name])
    mats.append(mat)
    vmax = max(vmax, int(mat.max()))
norm = Normalize(vmin=0, vmax=max(1, vmax))

# ---- Figure: narrower plots, room for right legend ----
fig_h = 7.0
fig_w = 10.0  # a bit narrower than before
fig, axes = plt.subplots(len(mats), 1, figsize=(fig_w, fig_h), dpi=150, sharex=True)
if len(mats) == 1:
    axes = [axes]

# Leave space on right for vertical colorbar (legend)
plt.subplots_adjust(right=0.86, left=0.08, top=0.95, bottom=0.11, hspace=0.35)

im = None
for ax, mat, title in zip(axes, mats, site_labels):
    X = np.arange(0, 13, 1)
    Y = np.arange(0, len(years)+1, 1)
    im = ax.pcolormesh(X, Y, mat, cmap="inferno", norm=norm,
                       edgecolors="white", linewidth=0.8, antialiased=False)
    # Invert y so earliest is at top
    ax.set_ylim(len(years), 0)
    # Month ticks (only bottom axis shows labels)
    axes[-1].set_xticks(np.arange(12) + 0.5)
    axes[-1].set_xticklabels(month_labels, fontsize=11)
    ax.set_title(title, fontsize=14, weight="bold", pad=6)

# ---- Year ticks every 2 years starting at the first (e.g., 2015, 2017, ...) ----
mask_every2 = ((years - years[0]) % 2 == 0)
yt_pos   = np.where(mask_every2)[0] + 0.5
yt_labs  = [str(y) for y in years[mask_every2]]
for ax in axes:
    ax.set_yticks(yt_pos)
    ax.set_yticklabels(yt_labs, fontsize=10)

# ---- Put the single Y-axis label only on the middle panel ----
mid = len(axes)//2
for i, ax in enumerate(axes):
    ax.set_ylabel("" if i != mid else "Year", fontsize=12)
# Nudge the middle label a bit left so it visually centers across the stack
axes[mid].yaxis.set_label_coords(-0.06, 0.5)

# X-axis label
axes[-1].set_xlabel("Month", fontsize=12)

# ---- Vertical colorbar on the right ----
# Oriented vertically, with readable label.
cbar = fig.colorbar(im, ax=axes, orientation="vertical",
                    fraction=0.05, pad=0.02)  # fraction controls bar thickness
cbar.set_label("Count", rotation=270, labelpad=14, fontsize=12)
# Ticks: integers up to vmax with sensible step
step = 1 if vmax <= 10 else (2 if vmax <= 20 else 5)
cbar.set_ticks(np.arange(0, vmax + 1, step))
cbar.ax.tick_params(labelsize=10)

# Save
os.makedirs(os.path.dirname(out_png), exist_ok=True)
plt.savefig(out_png, bbox_inches="tight")
plt.close()
print(f"Saved heatmap with right legend to: {out_png}")

# ---- Calculate proportion of data during dry season (May–Oct) ----
df_all["Year"] = df_all["Date"].dt.year
df_all["Month"] = df_all["Date"].dt.month


# Define dry season months
dry_months = [5, 6, 7, 8, 9, 10]

results = []
for site in site_labels:
    site_df = df_all[df_all["Site"] == site]
    total_count = len(site_df)
    dry_count = len(site_df[site_df["Month"].isin(dry_months)])
    proportion = dry_count / total_count if total_count > 0 else np.nan
    results.append({
        "Site": site,
        "Total_Data": total_count,
        "Dry_Season_Data": dry_count,
        "Proportion_Dry_Season": round(proportion, 3)
    })

prop_df = pd.DataFrame(results)
print("\n=== Proportion of Data During Dry Season (May–Oct) ===")
print(prop_df.to_string(index=False))

# ------------------------------------------------------------------
# ---- Monthly mean image counts (for your sentence) ---------------
# This uses the same Year/Month grid as the heatmap, filling gaps with 0
# so the averages really are "per year" over the whole period.
# ------------------------------------------------------------------

# year–month grid
year_range  = np.arange(df_all["Year"].min(), df_all["Year"].max() + 1)
month_range = np.arange(1, 13)
mi = pd.MultiIndex.from_product([year_range, month_range], names=["Year", "Month"])

# === Overall (all sites combined) ===
overall_counts = (df_all
                  .groupby(["Year", "Month"])
                  .size()
                  .reindex(mi, fill_value=0)
                  .rename("Count")
                  .reset_index())

overall_means = overall_counts.groupby("Month")["Count"].mean()
best_month_num   = int(overall_means.idxmax())
best_month_label = month_labels[best_month_num - 1]
best_month_mean  = overall_means.max()

print("\n=== Overall monthly coverage (all sites combined) ===")
print(f"Years included: {year_range[0]}–{year_range[-1]} (n={len(year_range)})")
print(
    f"On average the month with the most images was {best_month_label} "
    f"with an average of {best_month_mean:.2f} images per year over this period."
)

# === Optional: same summary per site (so you can compare) ===
print("\n=== Monthly coverage by site ===")
for site in site_labels:
    site_df = df_all[df_all["Site"] == site]

    site_counts = (site_df
                   .groupby(["Year", "Month"])
                   .size()
                   .reindex(mi, fill_value=0)
                   .rename("Count")
                   .reset_index())

    site_means   = site_counts.groupby("Month")["Count"].mean()
    s_best_month = int(site_means.idxmax())
    s_label      = month_labels[s_best_month - 1]
    s_mean       = site_means.max()

    print(
        f"{site}: {s_label} has the highest mean image count "
        f"({s_mean:.2f} images per year)."
    )


print("Inputs:")
print("  GDB:", gdb)
print("  Feature classes:", feature_classes)
print("  Date field:", date_field)
print("  Output:", out_png)
print("Counts per site:")
print(df_all.groupby("Site").size())
print("Date range:", df_all["Date"].min(), "to", df_all["Date"].max())
