In [None]:
import numpy as np
import pandas as pd
from typing import List, Optional

from sklearn.preprocessing import StandardScaler
from sklearn.cluster import KMeans

def sample_plans_by_clustering(
    vtd_units: pd.DataFrame,
    vtd_id_col: str,
    K: int,
    n_plans: int,
    random_seed: int,
    feature_cols: Optional[List[str]] = None,
) -> pd.DataFrame:
    df = vtd_units.copy()

    if feature_cols is None:
        feature_cols = ["p_white", "p_black", "p_latino", "p_asian", "p_native", "p_other"]

    X = df[feature_cols].values.astype(float)
    Xs = StandardScaler().fit_transform(X)

    plans = []
    rng = np.random.default_rng(random_seed)

    for plan_id in range(n_plans):
        seed = int(rng.integers(0, 1_000_000))
        km = KMeans(n_clusters=K, n_init=10, random_state=seed)
        cd_sim = km.fit_predict(Xs)

        plans.append(pd.DataFrame({
            "plan_id": plan_id,
            vtd_id_col: df[vtd_id_col].values,
            "cd_sim": cd_sim,
        }))

    return pd.concat(plans, ignore_index=True)
