**Run through visualizations and output datetime of collects to a csv**

In [None]:
folder = Path('/Volumes/External/TJ_SAR/02_preprocessed/background_gooddays')

In [None]:
from pathlib import Path
import csv
from datetime import datetime, timezone
from zoneinfo import ZoneInfo



# Timestamp formats
INPUT_FMT  = "%Y%m%dT%H%M%S"      # format in filename
OUTPUT_FMT = "%Y-%m-%d %H:%M:%S"  # human-readable output

# Timezones
UTC     = timezone.utc
PACIFIC = ZoneInfo("America/Los_Angeles")

# Prepare CSV output
csv_path = folder / "sentinel1_times.csv"
with open(csv_path, "w", newline="") as f:
    writer = csv.writer(f)
    # Write header with original filename first
    writer.writerow(["original_filename", "start_utc", "start_local"])

    for p in sorted(folder.glob("*.png")):
        parts = p.stem.split("_")
        if len(parts) < 6:
            # skip unexpected filenames
            continue

        raw = parts[4]  # UTC start timestamp, e.g. "20250421T014857"
        try:
            dt_utc = datetime.strptime(raw, INPUT_FMT).replace(tzinfo=UTC)
        except ValueError:
            # skip malformed timestamps
            continue

        # Convert to Pacific time
        dt_local = dt_utc.astimezone(PACIFIC)

        # Write row
        writer.writerow([
            p.name,
            dt_utc.strftime(OUTPUT_FMT),
            dt_local.strftime(OUTPUT_FMT)
        ])

print(f"Wrote {csv_path.name} with {sum(1 for _ in folder.glob('*.png'))} entries.")


Wrote sentinel1_times.csv with 122 entries.


convert files from _overlay.png csv to a folder of their original raw dataset

In [4]:
import pandas as pd
from pathlib import Path
import shutil

# ─── CONFIG ──────────────────────────────────────────────────────────
csv_file    = Path('/Volumes/External/TJ_SAR/_archive/test/sentinel1_times.csv')       # your CSV file
raw_folder  = Path('/Volumes/External/TJ_SAR/01_data/02_2025_2020')  # root of all your raw .zip files
output_dir = Path('/Volumes/External/TJ_SAR/01_data/03_2022_2025_gooddays')       # where to dump one-folder-per-scene
# ────────────────────────────────────────────────────────────────────

output_dir.mkdir(parents=True, exist_ok=True)

# Read just the original filenames
df = pd.read_csv(csv_file, usecols=['original_filename'])

for orig in df['original_filename'].dropna().unique():
    # strip suffix to get the scene base name
    scene_id = orig.rsplit('_pre_overlay.png', 1)[0]

    # find the zip(s) matching that base name
    matches = list(raw_folder.rglob(f'{scene_id}.zip'))
    if not matches:
        print(f'⚠️  No .zip found for scene "{scene_id}"')
        continue

    for zip_path in matches:
        dest = output_dir / zip_path.name
        # if you want to avoid overwriting, you could check .exists() here
        shutil.copy2(zip_path, dest)
        print(f'✔️  Copied "{zip_path.name}" to "{output_dir}"')


✔️  Copied "S1A_IW_GRDH_1SDV_20221203T015015_20221203T015044_046161_0586BC_72DC.zip" to "/Volumes/External/TJ_SAR/01_data/03_2022_2025_gooddays"
✔️  Copied "S1A_IW_GRDH_1SDV_20221215T015014_20221215T015043_046336_058CB1_E040.zip" to "/Volumes/External/TJ_SAR/01_data/03_2022_2025_gooddays"
✔️  Copied "S1A_IW_GRDH_1SDV_20221215T135316_20221215T135341_046343_058CF7_BB7D.zip" to "/Volumes/External/TJ_SAR/01_data/03_2022_2025_gooddays"
✔️  Copied "S1A_IW_GRDH_1SDV_20230103T134452_20230103T134517_046620_059664_9029.zip" to "/Volumes/External/TJ_SAR/01_data/03_2022_2025_gooddays"
✔️  Copied "S1A_IW_GRDH_1SDV_20230108T135315_20230108T135340_046693_0598D3_8F17.zip" to "/Volumes/External/TJ_SAR/01_data/03_2022_2025_gooddays"
✔️  Copied "S1A_IW_GRDH_1SDV_20230917T015020_20230917T015049_050361_06103C_F092.zip" to "/Volumes/External/TJ_SAR/01_data/03_2022_2025_gooddays"
✔️  Copied "S1A_IW_GRDH_1SDV_20230924T134500_20230924T134525_050470_061400_0602.zip" to "/Volumes/External/TJ_SAR/01_data/03_2022_