<a href="https://colab.research.google.com/github/eth0-02/Astro-Theme-Creek/blob/master/ratio_unpivot_with_hybasid.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
# ✅ DEMAND→AVAILABILITY RATIO: Wide (Jan..Dec) → Long (Month, Ratio)
# Includes ID columns only if present & non-empty.
# ID columns supported (case-insensitive):
#   - basin  ← ["BNAME","Basin","BASIN","Basin_Name","basin_name","basin"]
#   - fid    ← ["fid","sub","sub_id","subbasin_id","id"]
#   - HYBAS_ID ← ["HYBAS_ID","hybas_id","hybasid"]
#   - county ← ["county"]
#
# Final columns per file (order): [basin?] [fid?] [HYBAS_ID?] [county?] Month Month_Number Ratio

from google.colab import files
import pandas as pd, re, io

print("📂 Please upload one or more CSV files…")
uploaded = files.upload()

# --- Month helpers ---
MONTHS = ["Jan","Feb","Mar","Apr","May","Jun","Jul","Aug","Sep","Oct","Nov","Dec"]
MONTH_TO_NUM = {m:i for i,m in enumerate(MONTHS,1)}

# --- ID column candidates (case-insensitive) ---
BASIN_CANDIDATES   = ["bname", "basin", "basin_name"]
FID_CANDIDATES     = ["fid", "sub", "sub_id", "subbasin_id", "id"]  # (no hybas_id here)
HYBASID_CANDIDATES = ["hybas_id", "hybasid"]
COUNTY_CANDIDATES  = ["county"]

def find_first_present(df, candidates):
    """Return the first matching column name (original casing) or None."""
    lower = {c.lower(): c for c in df.columns}
    for name in candidates:
        if name in lower:
            return lower[name]
    return None

def wide_to_long_ratio(df_raw: pd.DataFrame):
    """
    Convert a single wide Ratio table (Jan..Dec columns) to long.
    - Detects month columns (Jan..Dec) by header prefix.
    - Creates ID columns basin / fid / HYBAS_ID / county only if present & non-empty.
    - Returns final long DataFrame with ordered columns.
    """
    df = df_raw.copy()

    # 1) Identify month columns present
    month_cols = [c for c in df.columns if str(c)[:3].capitalize() in MONTHS]
    if not month_cols:
        return None, "No month columns found (expected headers like Jan..Dec)."

    # 2) Melt wide → long
    id_vars = [c for c in df.columns if c not in month_cols]
    long_df = df.melt(id_vars=id_vars, value_vars=month_cols,
                      var_name="Month", value_name="Ratio")

    # 3) Normalize Month + Month_Number
    long_df["Month"] = long_df["Month"].astype(str).str[:3].str.capitalize()
    long_df["Month_Number"] = long_df["Month"].map(MONTH_TO_NUM).astype("Int64")

    # 4) Numeric Ratio
    long_df["Ratio"] = pd.to_numeric(long_df["Ratio"], errors="coerce")

    # 5) Map possible identifiers (case-insensitive)
    basin_src   = find_first_present(df, BASIN_CANDIDATES)
    fid_src     = find_first_present(df, FID_CANDIDATES)
    hybas_src   = find_first_present(df, HYBASID_CANDIDATES)
    county_src  = find_first_present(df, COUNTY_CANDIDATES)

    # 6) Create ID columns only if source exists AND has at least one non-null value
    present_id_cols = []
    if basin_src is not None and df[basin_src].notna().any():
        long_df["basin"] = long_df[basin_src]
        present_id_cols.append("basin")
    if fid_src is not None and df[fid_src].notna().any():
        long_df["fid"] = long_df[fid_src]
        present_id_cols.append("fid")
    if hybas_src is not None and df[hybas_src].notna().any():
        long_df["HYBAS_ID"] = long_df[hybas_src]
        present_id_cols.append("HYBAS_ID")
    if county_src is not None and df[county_src].notna().any():
        long_df["county"] = long_df[county_src]
        present_id_cols.append("county")

    # 7) Build final column order: present IDs + Month + Month_Number + Ratio
    final_cols = present_id_cols + ["Month", "Month_Number", "Ratio"]
    out = long_df[final_cols].copy()
    return out, None

# --- Process each uploaded file ---
for fname, content in uploaded.items():
    try:
        # Robust read with a couple encodings
        df = None
        for kwargs in [
            {"encoding": "utf-8", "on_bad_lines": "skip"},
            {"encoding": "utf-8-sig", "on_bad_lines": "skip"},
            {"encoding": "latin-1", "on_bad_lines": "skip"},
        ]:
            try:
                df = pd.read_csv(io.BytesIO(content), **kwargs)
                break
            except Exception:
                pass

        if df is None:
            print(f"❌ Could not read {fname}. Try saving as UTF-8 CSV.")
            continue

        out_df, err = wide_to_long_ratio(df)
        if err:
            print(f"⚠️ Skipped {fname}: {err}")
            print("   Headers seen:", list(df.columns))
            continue

        outname = re.sub(r"\.csv$", "", fname) + "_FINAL_RATIO3.csv"
        out_df.to_csv(outname, index=False)
        print(f"✅ Saved {outname} — columns: {', '.join(out_df.columns)}")
        files.download(outname)

    except Exception as e:
        print(f"❌ Error processing {fname}: {e}")


📂 Please upload one or more CSV files…


Saving SUB DEMAND TO AVAILABILITY RATIO.csv to SUB DEMAND TO AVAILABILITY RATIO.csv
✅ Saved SUB DEMAND TO AVAILABILITY RATIO_FINAL_RATIO3.csv — columns: fid, HYBAS_ID, Month, Month_Number, Ratio


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>