In [5]:
"""
Update polygon attributes from GitHub CSVs by UID (Notes + Costs)
"""

import os
import re
import tempfile
import urllib.request
import arcpy

arcpy.env.overwriteOutput = True

In [6]:
### Change once final 
PROJECT_AREAS_IN = r"C:\Users\aimee\Documents\SewerBillingCleanup\WW_Overlay\SpreadsheetScript\ClusterInfo_1_2_TestShapefile.shp"
OUTPUT_GDB = r"C:\Users\aimee\Documents\HCPT2025\hcpt_work.gdb"
OUTPUT_FC_NAME = "project_areas_updated"

NOTES_CSV_URL = "https://raw.githubusercontent.com/cshuler/WW_Overlay_2024/main/Data/Project_Area_csvs/Project_Areas_Notes_Test.csv"
COSTS_CSV_URL = "https://raw.githubusercontent.com/cshuler/WW_Overlay_2024/main/Data/Project_Area_csvs/Project_Areas_Notes_Costs_Test.csv"

UID_FIELD = "UID"
JOIN_UID_TEXT = "UID_TXT"

# If True: blank/NULL in CSV will NOT overwrite existing values
# If False: blank/NULL in CSV WILL clear target values
DONT_OVERWRITE_WITH_NULLS = False

# NOTES exclusions (skip these columns from notes)
EXCLUDE_NOTES_SOURCE_COLUMNS = {
    "uid",
    "name_1",
    "name",
    "projareaname",
    "proposed name change",
    "proposed_name_change",
}

# COST fields to update (exclude cluster-derived fields)
COST_FIELD_MAP = {
    "SUM_length": ("SUM_length", "DOUBLE"),
    "Trunk_Line": ("Trunk_Line", "DOUBLE"),
    "Cost_per_cp": ("Cost_per_cp", "DOUBLE"),
    "Cost_Estim": ("Cost_Estim", "DOUBLE"),
    "Cost_per_cp_actual": ("CostCpActual", "DOUBLE"),
    "CIP_total": ("CIP_total", "DOUBLE"),
}


In [7]:
# -----------------------------
# SAFE CASTING / UID CLEANING
# -----------------------------
CODE_BLOCK = r"""
def uid_txt(v):
    if v is None:
        return None
    s = str(v).strip()
    return s if s != "" else None

def to_text(v, max_len):
    if v is None:
        return None
    s = str(v).replace('\r\n','\n').replace('\r','\n')
    if s.strip() == "":
        return None
    if max_len and len(s) > max_len:
        return s[:max_len]
    return s

def choose_text(v, oldv, max_len):
    # keep old if blank/null
    if v is None:
        return oldv
    s = str(v).replace('\r\n','\n').replace('\r','\n')
    if s.strip() == "":
        return oldv
    if max_len and len(s) > max_len:
        return s[:max_len]
    return s

def to_float(v):
    if v is None:
        return None
    if isinstance(v, (int, float)):
        return float(v)
    s = str(v).strip()
    if s == "" or s.lower() in ("na","n/a","null","none","nan","-","—"):
        return None
    s = s.replace(",", "").replace("$", "")
    try:
        return float(s)
    except:
        return None

def choose_float(v, oldv):
    # keep old if blank/null/invalid
    if v is None:
        return oldv
    if isinstance(v, (int, float)):
        return float(v)
    s = str(v).strip()
    if s == "" or s.lower() in ("na","n/a","null","none","nan","-","—"):
        return oldv
    s = s.replace(",", "").replace("$", "")
    try:
        return float(s)
    except:
        return oldv
"""


In [8]:
# -----------------------------
# HELPERS
# -----------------------------
def ensure_gdb(gdb_path: str):
    folder = os.path.dirname(gdb_path)
    name = os.path.splitext(os.path.basename(gdb_path))[0]
    os.makedirs(folder, exist_ok=True)
    if not arcpy.Exists(gdb_path):
        arcpy.management.CreateFileGDB(folder, name)

def download_file(url: str, out_path: str):
    if "github.com" in url and "raw.githubusercontent.com" not in url:
        raise RuntimeError(f"Not a raw GitHub URL:\n{url}")
    urllib.request.urlretrieve(url, out_path)

def field_exists(dataset: str, field_name: str) -> bool:
    return any(f.name.lower() == field_name.lower() for f in arcpy.ListFields(dataset))

def resolve_field_name(dataset: str, desired: str):
    for f in arcpy.ListFields(dataset):
        if f.name.lower() == desired.lower():
            return f.name
    return None

def normalize(s: str) -> str:
    s = s.strip().lower()
    s = re.sub(r"[^a-z0-9]+", "_", s)
    s = re.sub(r"_+", "_", s).strip("_")
    return s

def make_gdb_field_name(src_name: str, existing_lower: set[str]) -> str:
    base = normalize(src_name)
    if not base:
        base = "field"
    if not base[0].isalpha():
        base = "f_" + base
    base = base[:64]

    name = base
    i = 1
    while name.lower() in existing_lower:
        suf = f"_{i}"
        name = (base[:64 - len(suf)] + suf)
        i += 1
    existing_lower.add(name.lower())
    return name

def add_field_like(fc: str, field_name: str, src_field_obj):
    if field_exists(fc, field_name):
        return
    if src_field_obj.type == "String":
        length = src_field_obj.length if src_field_obj.length and src_field_obj.length > 0 else 255
        length = min(max(int(length), 50), 8000)
        arcpy.management.AddField(fc, field_name, "TEXT", field_length=length)
    elif src_field_obj.type in ("Double", "Single"):
        arcpy.management.AddField(fc, field_name, "DOUBLE")
    elif src_field_obj.type in ("Integer", "SmallInteger"):
        arcpy.management.AddField(fc, field_name, "LONG")
    else:
        arcpy.management.AddField(fc, field_name, "TEXT", field_length=8000)

def add_field_if_missing(fc: str, field_name: str, field_type: str, length=None):
    if field_exists(fc, field_name):
        return
    kwargs = {}
    if field_type.upper() == "TEXT" and length:
        kwargs["field_length"] = int(length)
    arcpy.management.AddField(fc, field_name, field_type.upper(), **kwargs)

def get_field_obj(dataset: str, field_name: str):
    for f in arcpy.ListFields(dataset):
        if f.name.lower() == field_name.lower():
            return f
    return None

def ensure_text_join_field(dataset: str, src_uid_field: str, join_uid_field: str):
    if not field_exists(dataset, join_uid_field):
        arcpy.management.AddField(dataset, join_uid_field, "TEXT", field_length=100)
    arcpy.management.CalculateField(dataset, join_uid_field, f"uid_txt(!{src_uid_field}!)", "PYTHON3", CODE_BLOCK)

def import_csv_to_table(url: str, scratch_dir: str, scratch_gdb: str, table_name: str) -> str:
    local_csv = os.path.join(scratch_dir, f"{table_name}.csv")
    download_file(url, local_csv)
    out_table = os.path.join(scratch_gdb, table_name)
    arcpy.conversion.TableToTable(local_csv, scratch_gdb, table_name)
    return out_table


In [9]:
# -----------------------------
# MAIN
# -----------------------------
def main():
    if not arcpy.Exists(PROJECT_AREAS_IN):
        raise RuntimeError(f"Input polygons not found:\n{PROJECT_AREAS_IN}")
    if not field_exists(PROJECT_AREAS_IN, UID_FIELD):
        raise RuntimeError(f"UID field '{UID_FIELD}' not found in input polygons.")

    # Output FC in GDB
    ensure_gdb(OUTPUT_GDB)
    target_fc = os.path.join(OUTPUT_GDB, OUTPUT_FC_NAME)
    if arcpy.Exists(target_fc):
        arcpy.management.Delete(target_fc)
    arcpy.management.CopyFeatures(PROJECT_AREAS_IN, target_fc)
    print(f"Copied input polygons to: {target_fc}")

    # Scratch space
    scratch_dir = tempfile.mkdtemp(prefix="arcpy_notes_costs_update_")
    scratch_gdb = os.path.join(scratch_dir, "scratch.gdb")
    ensure_gdb(scratch_gdb)

    # Import CSVs
    notes_table = import_csv_to_table(NOTES_CSV_URL, scratch_dir, scratch_gdb, "notes")
    costs_table = import_csv_to_table(COSTS_CSV_URL, scratch_dir, scratch_gdb, "costs")

    for tname, tbl in [("notes", notes_table), ("costs", costs_table)]:
        if not field_exists(tbl, UID_FIELD):
            raise RuntimeError(f"UID field '{UID_FIELD}' not found in {tname} CSV after import.")

    # Create join keys (text) on all three datasets
    ensure_text_join_field(target_fc, UID_FIELD, JOIN_UID_TEXT)
    ensure_text_join_field(notes_table, UID_FIELD, JOIN_UID_TEXT)
    ensure_text_join_field(costs_table, UID_FIELD, JOIN_UID_TEXT)

    # Feature layer
    fc_lyr = "project_areas_updated_lyr"
    arcpy.management.MakeFeatureLayer(target_fc, fc_lyr)
    fc_prefix = arcpy.Describe(target_fc).name

    # -----------------------------
    # 1) NOTES: Update all fields except exclusions
    # -----------------------------
    tbl_prefix = arcpy.Describe(notes_table).name
    join_view = "notes_tblview"
    arcpy.management.MakeTableView(notes_table, join_view)
    arcpy.management.AddJoin(fc_lyr, JOIN_UID_TEXT, join_view, JOIN_UID_TEXT, "KEEP_ALL")

    existing_target = {f.name.lower() for f in arcpy.ListFields(target_fc)}
    ex_norm = {normalize(x) for x in EXCLUDE_NOTES_SOURCE_COLUMNS}

    notes_mapping = {}
    for sf in arcpy.ListFields(notes_table):
        if sf.type in ("OID", "Geometry"):
            continue
        if sf.name.lower() in (UID_FIELD.lower(), JOIN_UID_TEXT.lower()):
            continue
        if normalize(sf.name) in ex_norm:
            continue

        # Keep already-valid names (e.g., meeting_nt). Otherwise sanitize.
        if re.match(r"^[A-Za-z][A-Za-z0-9_]{0,63}$", sf.name):
            tgt = sf.name
            if tgt.lower() not in existing_target:
                existing_target.add(tgt.lower())
            notes_mapping[sf.name] = tgt
        else:
            tgt = make_gdb_field_name(sf.name, existing_target)
            notes_mapping[sf.name] = tgt

    print("NOTES field mapping (source -> target):")
    for k, v in notes_mapping.items():
        print(f"  {k} -> {v}")

    updated_notes = 0
    for src_name, tgt_name in notes_mapping.items():
        sf = get_field_obj(notes_table, src_name)
        if sf is None:
            continue

        add_field_like(target_fc, tgt_name, sf)
        tf = get_field_obj(target_fc, tgt_name)
        if tf is None:
            raise RuntimeError(f"Target field missing after add: {tgt_name}")

        target_ref = f"{fc_prefix}.{tgt_name}"
        source_ref = f"{tbl_prefix}.{src_name}"

        if tf.type == "String":
            max_len = tf.length if tf.length else 8000
            expr = (
                f"choose_text(!{source_ref}!, !{target_ref}!, {max_len})"
                if DONT_OVERWRITE_WITH_NULLS
                else f"to_text(!{source_ref}!, {max_len})"
            )
        else:
            expr = (
                f"choose_float(!{source_ref}!, !{target_ref}!)"
                if DONT_OVERWRITE_WITH_NULLS
                else f"to_float(!{source_ref}!)"
            )

        arcpy.management.CalculateField(fc_lyr, target_ref, expr, "PYTHON3", CODE_BLOCK)
        updated_notes += 1

    arcpy.management.RemoveJoin(fc_lyr, tbl_prefix)
    print(f"Updated {updated_notes} NOTES fields.")

    # -----------------------------
    # 2) COSTS: Update selected numeric fields only
    # -----------------------------
    tbl_prefix = arcpy.Describe(costs_table).name
    join_view = "costs_tblview"
    arcpy.management.MakeTableView(costs_table, join_view)
    arcpy.management.AddJoin(fc_lyr, JOIN_UID_TEXT, join_view, JOIN_UID_TEXT, "KEEP_ALL")

    updated_costs = 0
    for csv_col, (target_field, target_type) in COST_FIELD_MAP.items():
        src_col = resolve_field_name(costs_table, csv_col)
        if not src_col:
            raise RuntimeError(
                f"COSTS column '{csv_col}' not found after import.\n"
                f"Imported costs fields: {[f.name for f in arcpy.ListFields(costs_table)]}"
            )

        add_field_if_missing(target_fc, target_field, target_type)
        tgt_col = resolve_field_name(target_fc, target_field)
        if not tgt_col:
            raise RuntimeError(f"Failed to create/resolve target cost field '{target_field}'.")

        target_ref = f"{fc_prefix}.{tgt_col}"
        source_ref = f"{tbl_prefix}.{src_col}"

        expr = (
            f"choose_float(!{source_ref}!, !{target_ref}!)"
            if DONT_OVERWRITE_WITH_NULLS
            else f"to_float(!{source_ref}!)"
        )

        arcpy.management.CalculateField(fc_lyr, target_ref, expr, "PYTHON3", CODE_BLOCK)
        updated_costs += 1

    arcpy.management.RemoveJoin(fc_lyr, tbl_prefix)
    print(f"Updated {updated_costs} COSTS fields.")

    print("Done.")
    print(f"Output updated feature class: {target_fc}")

if __name__ == "__main__":
    main()


Copied input polygons to: C:\Users\aimee\Documents\HCPT2025\hcpt_work.gdb\project_areas_updated
NOTES field mapping (source -> target):
  Meeting_nt -> Meeting_nt
  DocsURL -> DocsURL
  Details -> Details
  Category___Status_Description -> Category___Status_Description
  InCIP -> InCIP
Updated 5 NOTES fields.
Updated 6 COSTS fields.
Done.
Output updated feature class: C:\Users\aimee\Documents\HCPT2025\hcpt_work.gdb\project_areas_updated
