In [None]:
import math
from io import BytesIO
from pathlib import Path

import geopandas as gpd
import pandas as pd
from PIL import Image

import ipywidgets as widgets
from IPython.display import display

from mmocc.config import cache_path, wi_image_path
from mmocc.utils import get_conus_boundary

RAW_PKL = cache_path / "wi_blank_images_raw.pkl"
VALID_TXT = cache_path / "wi_blank_images_valid.txt"
INVALID_TXT = cache_path / "wi_blank_images_invalid.txt"

In [None]:
df_blanks = pd.read_pickle(RAW_PKL)

conus_boundary = get_conus_boundary()
conus_geometry = conus_boundary.unary_union
points = gpd.GeoSeries(
    gpd.points_from_xy(df_blanks["Longitude"], df_blanks["Latitude"]),
    index=df_blanks.index,
    crs="EPSG:4326",
)
mask = points.within(conus_geometry)
df_blanks = df_blanks[mask].reset_index(drop=True)

image_paths = df_blanks["FilePath"].tolist()
image_paths_set = set(image_paths)


def load_list(path: Path) -> list[str]:
    if not path.exists():
        return []
    return [line.strip() for line in path.read_text().splitlines() if line.strip()]


invalid_paths = {p for p in load_list(INVALID_TXT) if p in image_paths_set}

PAGE_ROWS = 16
PAGE_COLS = 4
PAGE_SIZE = PAGE_ROWS * PAGE_COLS
THUMB_SIZE = 240
THUMB_HEIGHT = math.ceil((9 / 16) * THUMB_SIZE)

state = {"page": 0}
print(f"Loaded {len(image_paths)} images inside CONUS.")

In [None]:
grid_container = widgets.Box()
status = widgets.HTML()


def write_lists() -> None:
    invalid = [p for p in image_paths if p in invalid_paths]
    valid = [p for p in image_paths if p not in invalid_paths]
    VALID_TXT.write_text("\n".join(valid) + ("\n" if valid else ""))
    INVALID_TXT.write_text("\n".join(invalid) + ("\n" if invalid else ""))


def update_status() -> None:
    total = len(image_paths)
    total_pages = max(1, math.ceil(total / PAGE_SIZE))
    page = state["page"]
    start = page * PAGE_SIZE + 1 if total else 0
    end = min((page + 1) * PAGE_SIZE, total)
    status.value = (
        f"<b>Page {page + 1}</b> / {total_pages} "
        f"| showing {start}-{end} of {total} "
        f"| invalid: {len(invalid_paths)}"
    )


def load_thumbnail(path_str: str) -> bytes | None:
    path = Path(path_str.replace("gs://", f"{wi_image_path}/"))
    if not path.exists():
        return None
    try:
        with Image.open(path) as image:
            image = image.convert("RGB")
            image.thumbnail((THUMB_SIZE, THUMB_HEIGHT))
            buf = BytesIO()
            image.save(buf, format="JPEG", quality=70)
            return buf.getvalue()
    except Exception:
        return None


def make_tile(idx: int) -> widgets.Widget:
    path_str = image_paths[idx]
    thumb = load_thumbnail(path_str)
    if thumb is None:
        img_widget = widgets.Label("Missing")
    else:
        img_widget = widgets.Image(
            value=thumb,
            format="jpeg",
            layout=widgets.Layout(width=f"{THUMB_SIZE}px", height=f"{THUMB_HEIGHT}px"),
        )
    toggle = widgets.ToggleButton(
        value=path_str in invalid_paths,
        description="Invalid",
        button_style="danger" if path_str in invalid_paths else "",
        layout=widgets.Layout(width=f"{THUMB_SIZE}px"),
    )

    def on_toggle(change: dict, path_str: str = path_str) -> None:
        if change.get("name") != "value":
            return
        if change.get("new"):
            invalid_paths.add(path_str)
            toggle.button_style = "danger"
        else:
            invalid_paths.discard(path_str)
            toggle.button_style = ""
        write_lists()
        update_status()

    toggle.observe(on_toggle, names="value")
    return widgets.VBox(
        [img_widget, toggle],
        layout=widgets.Layout(align_items="center", width=f"{THUMB_SIZE}px"),
    )


def render_page() -> None:
    total = len(image_paths)
    if not total:
        grid_container.children = [widgets.Label("No images to label.")]
        update_status()
        return
    start = state["page"] * PAGE_SIZE
    end = min(start + PAGE_SIZE, total)
    tiles = [make_tile(idx) for idx in range(start, end)]
    grid = widgets.GridBox(
        children=tiles,
        layout=widgets.Layout(
            grid_template_columns=f"repeat({PAGE_COLS}, {THUMB_SIZE}px)",
            grid_gap="6px 6px",
            justify_items="center",
        ),
    )
    grid_container.children = [grid]
    update_status()


def go_prev(_: object | None = None) -> None:
    if state["page"] > 0:
        state["page"] -= 1
        render_page()


def go_next(_: object | None = None) -> None:
    total_pages = max(1, math.ceil(len(image_paths) / PAGE_SIZE))
    if state["page"] < total_pages - 1:
        state["page"] += 1
        render_page()


prev_button = widgets.Button(description="Prev page")
next_button = widgets.Button(description="Next page")
prev_button.on_click(go_prev)
next_button.on_click(go_next)

controls = widgets.HBox([prev_button, next_button])
display(controls, status, grid_container)
write_lists()
render_page()

In [None]:
valid_set = {p for p in image_paths if p not in invalid_paths}
df_filtered = df_blanks.copy()
df_filtered = df_filtered[df_filtered["FilePath"].isin(valid_set)]
df_filtered.to_pickle(cache_path / "wi_blank_images_manual.pkl")
print(f"Wrote {len(df_filtered)} rows to {cache_path / 'wi_blank_images_manual.pkl'}")