<a href="https://colab.research.google.com/github/ihrisikesa/DWS/blob/main/Kartu_Progress_Sahabat_Sehat.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [12]:
# =======================
# AUTH (single source of truth)
# =======================
from google.colab import auth
auth.authenticate_user()

import google.auth
from googleapiclient.discovery import build
import gspread
from google.auth import default
from gspread_dataframe import get_as_dataframe, set_with_dataframe
import pandas as pd
import numpy as np
import re

# Scopes + creds
SCOPES = [
    "https://www.googleapis.com/auth/drive",
    "https://www.googleapis.com/auth/spreadsheets",
    "https://www.googleapis.com/auth/documents",
]
creds, _ = default()
creds = creds.with_scopes(SCOPES)

# Clients
gc        = gspread.authorize(creds)
drive_svc = build("drive", "v3", credentials=creds)
docs_svc  = build("docs",  "v1", credentials=creds)

# If you need Colab Drive FS:
from google.colab import drive as gdrive
gdrive.mount('/content/drive')

# =======================
# CONFIG
# =======================
SHEET_ID         = "1sYyHqoGWRdfhC6-bMQ1jjltibg7P6YALJsth1WE-UA4"   # Sheet ID master data
SHEET_TAB        = "Lobar"
TEMPLATE_DOC_ID  = "1J7HbsU6f7DibHDSTnPIfcTv9sv2silrSpFDTtWtJpiI"   # Template Documents ID
DEST_FOLDER_ID   = "1LVDm5DF9ZlV8dMla5lOliv-xHZWKkwF2"              # Folder di Google Drive (opsional)
OUTPUT_TAB       = "Laporan Kinerja Harian (LKH)"
SKIP_IF_HAS_LINK = True                  # True = jangan buat ulang jika link sudah ada
FILTER_DATE      = None                  # contoh: "2025-09-30" untuk satu hari saja, None = semua

# Optional: simple categorization for DWS score
def kategori_dws(x):
    if pd.isna(x): return ""
    x = float(x)
    if x >= 85: return "Sangat Baik"
    if x >= 70: return "Baik"
    if x >= 60: return "Cukup"
    return "Perlu Pendampingan"

# =======================
# Helpers
# =======================
MONTH_ID = ["Januari","Februari","Maret","April","Mei","Juni",
            "Juli","Agustus","September","Oktober","November","Desember"]

def tanggal_id(dt: pd.Timestamp) -> str:
    dt = pd.to_datetime(dt, errors="coerce")
    if pd.isna(dt): return ""
    return f"{dt.day} {MONTH_ID[dt.month-1]} {dt.year}"

def copy_template(new_name: str) -> str:
    body = {"name": new_name}
    if DEST_FOLDER_ID:
        body["parents"] = [DEST_FOLDER_ID]
    file = drive_svc.files().copy(fileId=TEMPLATE_DOC_ID, body=body).execute()
    return file["id"]

def replace_placeholders(doc_id: str, mapping: dict):
    # push both UPPER and lower/space variants
    requests = []
    for k, v in mapping.items():
        requests.append({
            "replaceAllText": {
                "containsText": {"text": f"{{{{{k}}}}}", "matchCase": True},
                "replaceText": "" if v is None else str(v)
            }
        })
    docs_svc.documents().batchUpdate(
        documentId=doc_id, body={"requests": requests}
    ).execute()

def fmt_id(v):
    if pd.isna(v):
        return ""
    s = str(v).strip().replace(",", ".")
    # remove trailing .0 / .000...
    if re.fullmatch(r"\d+(?:\.0+)?", s):
        return s.split(".")[0]
    # scientific notation
    try:
        if "e" in s.lower():
            return "{:.0f}".format(float(s))
    except:
        pass
    return s

# Quick permission check
_ = docs_svc.documents().get(documentId=TEMPLATE_DOC_ID).execute()
print("Template is accessible.")

# =======================
# Read the source Sheet
# =======================
ws = gc.open_by_key(SHEET_ID).worksheet(SHEET_TAB)
df = get_as_dataframe(ws, evaluate_formulas=True).dropna(how="all")

# Standardize expected columns (case-insensitive safety)
df.columns = [str(c).strip().lower() for c in df.columns]

# Ensure needed columns exist
needed = [
    "id","name","date","score_dws","rank_dws",
    "score_missmatch","rank_missmatch",
    "score_error_incompleteness","rank_error_incompletness","rank_final"
]
for col in needed:
    if col not in df.columns:
        df[col] = np.nan

# Parse date
df["date"] = pd.to_datetime(df["date"], errors="coerce")

# Parse numeric (handle comma decimals like 69,37)
def numify(series_like):
    s = pd.Series(series_like, dtype="string") \
          .str.replace(" ", "", regex=False) \
          .str.replace(",", ".", regex=False)
    return pd.to_numeric(s, errors="coerce")

df["score_dws"]                   = numify(df["score_dws"])
df["rank_dws"]                    = pd.to_numeric(df["rank_dws"], errors="coerce")
df["rank_error_incompletness"]    = pd.to_numeric(df["rank_error_incompletness"], errors="coerce")
df["rank_final"]                  = pd.to_numeric(df["rank_final"], errors="coerce")
df["score_missmatch"]             = numify(df["score_missmatch"])
df["rank_missmatch"]              = pd.to_numeric(df["rank_missmatch"], errors="coerce")
df["score_error_incompleteness"]  = numify(df["score_error_incompleteness"])

# Only rows with valid score & rank
df = df.loc[~df["score_dws"].isna() & ~df["rank_dws"].isna()].copy()

# Optional date filter
if FILTER_DATE:
    target_day = pd.to_datetime(FILTER_DATE).date()
    df = df[df["date"].dt.date.eq(target_day)]

if df.empty:
    print("No rows to generate (after filtering).")
else:
    # =======================
    # max rank per date (cohort size proxy)
    # =======================
    grp = df.dropna(subset=["score_dws"]).groupby(df["date"].dt.date, as_index=True)["rank_dws"]
    max_rank_by_date = grp.max().rename("max_rank_dws")
    df = df.merge(max_rank_by_date, left_on=df["date"].dt.date, right_index=True, how="left")

    df["kategori_dws"] = df["score_dws"].map(kategori_dws)

    # =======================
    # Skip logic (anti-join by id|name|date)
    # =======================
    sh = gc.open_by_key(SHEET_ID)
    try:
        ws_links = sh.worksheet(OUTPUT_TAB)
        existing_links = get_as_dataframe(ws_links).dropna(how="all")
    except Exception:
        ws_links = None
        existing_links = pd.DataFrame(columns=["ID","name","date","doc_url"])

    if SKIP_IF_HAS_LINK and not existing_links.empty:
        ex = existing_links.rename(columns=str.lower).copy()
        if "date" in ex.columns:
            ex["date"] = pd.to_datetime(ex["date"], errors="coerce").dt.date

        df["date_only"] = df["date"].dt.date
        df["__key"] = (
            df["id"].astype(str).str.strip() + "|" +
            df["name"].astype(str).str.strip() + "|" +
            df["date_only"].astype(str)
        )

        ex = ex[[c for c in ["id","name","date"] if c in ex.columns]].dropna(how="any")
        if not ex.empty:
            ex["__key"] = (
                ex["id"].astype(str).str.strip() + "|" +
                ex["name"].astype(str).str.strip() + "|" +
                ex["date"].astype(str)
            )
            before = len(df)
            df = df[~df["__key"].isin(set(ex["__key"]))].drop(columns=["__key","date_only"])
            print(f"Skip-if-has-link active: removed {before - len(df)} already-generated rows.")
        else:
            df = df.drop(columns=["date_only"], errors="ignore")

    print("Columns now:", df.columns.tolist())

    # =======================
    # Create Docs
    # =======================
    created = []
    for row in df.to_dict("records"):
        person_id   = fmt_id(row.get("id"))
        person_name = (row.get("name") or "").strip()
        dts         = pd.to_datetime(row.get("date"), errors="coerce")
        nice_date   = tanggal_id(dts)

        rank_dws  = "" if pd.isna(row.get("rank_dws")) else int(row.get("rank_dws"))
        max_rank  = "" if pd.isna(row.get("max_rank_dws")) else int(row.get("max_rank_dws"))
        score_dws = row.get("score_dws")
        kat_dws   = row.get("kategori_dws") or ""

        rank_mm   = row.get("rank_missmatch")
        rank_ei   = row.get("rank_error_incompletness")
        rank_fin  = row.get("rank_final")
        score_mm  = row.get("score_missmatch")
        score_err = row.get("score_error_incompleteness")

        doc_name   = f"LKH - {person_name} - {nice_date}"
        new_doc_id = copy_template(doc_name)

        # Support both lower/space and UPPERCASE placeholders
        mapping = {
            # Identity
            "NAMA": person_name, "name": person_name,
            "ID": person_id, "id": person_id,
            "DATE": nice_date, "TANGGAL": nice_date, "date": nice_date,

            # DWS section
            "RANK_DWS": rank_dws, "RANK_DWS": rank_dws,
            "MAX_RANK_DWS": max_rank, "MAX_RANK_DWS": max_rank,
            "SCORE_DWS": "" if pd.isna(score_dws) else f"{float(score_dws):.2f}",
            "KATEGORI_DWS": kat_dws, "KATEGORI_DWS": kat_dws,

            # Optional extras (only replaced if present in template)
            "rank_missmatch": "" if pd.isna(rank_mm) else int(rank_mm),
            "rank_error_incompletness": "" if pd.isna(rank_ei) else int(rank_ei),
            "rank_final": "" if pd.isna(rank_fin) else int(rank_fin),
            "score_missmatch": "" if pd.isna(score_mm) else f"{float(score_mm):.2f}",
            "score_error_incompleteness": "" if pd.isna(score_err) else f"{float(score_err):.2f}",
        }

        replace_placeholders(new_doc_id, mapping)

        created.append({
            "ID": person_id,
            "name": person_name,
            "date": dts.date(),
            "doc_url": f"https://docs.google.com/document/d/{new_doc_id}/edit"
        })

    links_df = pd.DataFrame(created)
    print(f"Created {len(links_df)} documents.")

    # =======================
    # Write/append links tab (de-duplicate by ID+name+date)
    # =======================
    sh = gc.open_by_key(SHEET_ID)
    if ws_links is None:
        ws_links = sh.add_worksheet(title=OUTPUT_TAB, rows=200, cols=8)
        set_with_dataframe(ws_links, links_df, include_index=False, resize=True)
    else:
        existing = get_as_dataframe(ws_links).dropna(how="all")
        out = pd.concat([existing, links_df], ignore_index=True)

        # normalize & dedupe
        out.columns = [str(c).strip() for c in out.columns]
        if "date" in out.columns:
            out["date"] = pd.to_datetime(out["date"], errors="coerce").dt.date
        dedupe_keys = [c for c in ["ID","name","date"] if c in out.columns]
        if dedupe_keys:
            out = out.drop_duplicates(subset=dedupe_keys, keep="first")

        ws_links.clear()
        set_with_dataframe(ws_links, out, include_index=False, resize=True)

    print("Links updated:", OUTPUT_TAB)


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
Template is accessible.
Columns now: ['key_0', 'id', 'name', 'date', 'score_error_incompletness', 'score_dws', 'score_missmatch', 'rank_dws', 'rank_missmatch', 'rank_error_incompletness', 'rank_rank', 'score_error_incompleteness', 'rank_final', 'max_rank_dws', 'kategori_dws']
Created 189 documents.
Links updated: Laporan Kinerja Harian (LKH)
