In [1]:
# --- CONFIG ---
FOLDER = r"Z:\ACME 3 EDM Data\0014\header_data"       # <- change this to your folder
OUT_CSV = r"C:\ACMEcode\ACMEanalysis\notebooks\output.csv"    # <- change this to where you want the CSV

# --- CODE (no edits needed below) ---
from pathlib import Path
import re
import csv

# Files look like: "0014.xxxx.zzzz.txt" and we only use those with zzzz == 0000
NAME_RE = re.compile(r'^0014\.([^.]+)\.(\d{4})\.txt$')
LA_RE = re.compile(r'^LA_Movers\t([+-]?\d+(?:\.\d+)?)\s*$')

folder = Path(FOLDER)
if not folder.is_dir():
    raise NotADirectoryError(f"{folder} is not a directory")

# Map: xxxx -> list of candidate files (just in case there are multiples)
by_xxxx = {}
for p in folder.glob("*.txt"):
    m = NAME_RE.match(p.name)
    if not m:
        continue
    xxxx, zzzz = m.group(1), m.group(2)
    if zzzz != "0000" or int(xxxx) < 672:
        continue
    by_xxxx.setdefault(xxxx, []).append(p)

def first_la_movers_value(path: Path):
    # Return the first matching LA_Movers number in the file, or None if not found
    with path.open("r", encoding="utf-8", errors="replace") as f:
        for line in f:
            m = LA_RE.match(line.rstrip("\n"))
            if m:
                return m.group(1)
    return None

rows = []
for xxxx, paths in sorted(by_xxxx.items(), key=lambda kv: kv[0]):
    # If multiple matching files exist for the same xxxx, pick the lexicographically first for determinism
    path = sorted(paths, key=lambda p: p.name)[0]
    val = first_la_movers_value(path)
    if val is not None:
        rows.append((xxxx, val))

# Write CSV (header: xxxx,LA_Movers)
out = Path(OUT_CSV)
out.parent.mkdir(parents=True, exist_ok=True)
with out.open("w", newline="", encoding="utf-8") as f:
    w = csv.writer(f)
    w.writerow(["xxxx", "LA_Movers"])
    w.writerows(rows)

print(f"Wrote {len(rows)} rows to {out}")


Wrote 606 rows to C:\ACMEcode\ACMEanalysis\notebooks\output.csv
