<a href="https://colab.research.google.com/github/eth0-02/Astro-Theme-Creek/blob/master/RATIO.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
# === ONE-BLOCK COLAB SCRIPT (NO MERGED, NO SOURCE COLUMN) ===
# What it does:
# 1) Prompts you to upload EXACTLY these three files (strict names):
#       - BASIN RATIO RAW.csv
#       - COUNTY RATIO RAW.csv
#       - SUB RATIO RAW.csv
# 2) Cleans each: drops empty columns/rows, auto-detects ID columns:
#       - Basin  -> bname
#       - County -> county
#       - Sub    -> hybas_id
#    Also keeps 'fid' if present & not all-empty.
# 3) Converts wide -> long, adds MonthName/MonthNumber, rounds Value to 2 d.p.
# 4) Saves *_LONG.csv for each source (no merged output, no Source column).

import pandas as pd
import re
from pathlib import Path
from google.colab import files

# -------- STRICT FILENAMES --------
INPUT_FILES = {
    "BASIN RATIO RAW": "/content/BASIN RATIO RAW.csv",
    "COUNTY RATIO RAW": "/content/COUNTY RATIO RAW.csv",
    "SUB RATIO RAW": "/content/SUB RATIO RAW.csv",
}
OUTPUT_DIR = Path("/content")

# -------- UPLOAD --------
print("Please select EXACTLY these three files:\n",
      "- BASIN RATIO RAW.csv\n- COUNTY RATIO RAW.csv\n- SUB RATIO RAW.csv")
uploaded = files.upload()

# -------- HELPERS --------
MONTHS = ["Jan","Feb","Mar","Apr","May","Jun","Jul","Aug","Sep","Oct","Nov","Dec"]
MONTH_LOOKUP = {m.lower(): i+1 for i, m in enumerate(MONTHS)}

def normalize_month(colname: str):
    s = str(colname).strip()
    if not s:
        return None, None
    letters = re.sub(r"[^A-Za-z]", "", s)
    if not letters:
        return None, None
    key = letters[:3].lower()
    if key in MONTH_LOOKUP:
        mn = MONTHS[MONTH_LOOKUP[key]-1]
        return mn, MONTH_LOOKUP[key]
    return None, None

def drop_empty_columns(df: pd.DataFrame):
    def is_empty(s: pd.Series):
        return s.replace("", pd.NA).isna().all()
    keep = [c for c in df.columns if not is_empty(df[c])]
    return df[keep]

def detect_ids(df: pd.DataFrame):
    cols_lower = {c.lower(): c for c in df.columns}
    ids = []
    if "fid" in cols_lower:
        fid_col = cols_lower["fid"]
        if not df[fid_col].replace("", pd.NA).isna().all():
            ids.append(fid_col)
    if "bname" in cols_lower:      # basin
        ids.append(cols_lower["bname"])
    elif "county" in cols_lower:   # county
        ids.append(cols_lower["county"])
    elif "hybas_id" in cols_lower: # sub
        ids.append(cols_lower["hybas_id"])
    seen, ordered = set(), []
    for c in ids:
        if c not in seen:
            seen.add(c)
            ordered.append(c)
    return ordered

def to_long(df: pd.DataFrame):
    df = drop_empty_columns(df)
    id_vars = detect_ids(df)
    if id_vars:
        df[id_vars] = df[id_vars].apply(lambda s: s.replace("", pd.NA))
        df = df.dropna(subset=id_vars, how="all")
    value_vars = [c for c in df.columns if c not in id_vars]
    long_df = df.melt(id_vars=id_vars, value_vars=value_vars,
                      var_name="MonthRaw", value_name="Value")
    month_info = long_df["MonthRaw"].apply(normalize_month)
    long_df[["MonthName","MonthNumber"]] = pd.DataFrame(month_info.tolist(), index=long_df.index)
    long_df = long_df.dropna(subset=["MonthName","MonthNumber"])
    long_df["Value"] = pd.to_numeric(long_df["Value"], errors="coerce")
    long_df = long_df.dropna(subset=["Value"])
    long_df["Value"] = long_df["Value"].round(2)
    cols = id_vars + ["MonthName","MonthNumber","Value"]
    long_df = long_df[cols].sort_values(id_vars + ["MonthNumber"]).reset_index(drop=True)
    return long_df

# -------- PROCESS EACH FILE --------
outputs = {}
for label, path in INPUT_FILES.items():
    f = Path(path)
    if not f.exists():
        raise FileNotFoundError(f"Missing required file: {f.name} (check name & upload)")
    df = pd.read_csv(f)
    long_df = to_long(df)
    out_path = OUTPUT_DIR / f"{label}_LONG.csv"
    long_df.to_csv(out_path, index=False)
    outputs[label] = str(out_path)
    print(f"Saved -> {out_path}")

# -------- AUTO-DOWNLOAD --------
for label, out_path in outputs.items():
    try:
        files.download(out_path)
    except Exception as e:
        print(f"Download hint (if not Colab): {out_path} :: {e}")


Please select EXACTLY these three files:
 - BASIN RATIO RAW.csv
- COUNTY RATIO RAW.csv
- SUB RATIO RAW.csv


Saving BASIN RATIO RAW.csv to BASIN RATIO RAW.csv
Saving COUNTY RATIO RAW.csv to COUNTY RATIO RAW.csv
Saving SUB RATIO RAW.csv to SUB RATIO RAW.csv
Saved -> /content/BASIN RATIO RAW_LONG.csv
Saved -> /content/COUNTY RATIO RAW_LONG.csv
Saved -> /content/SUB RATIO RAW_LONG.csv


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>