In [None]:
# Cell 1 

import os, sys
from pathlib import Path
import pathlib

OPENML_CACHE_DIR = Path("openml_cache").resolve()
OPENML_CACHE_DIR.mkdir(parents=True, exist_ok=True)

os.environ["OPENML_HOME"] = str(OPENML_CACHE_DIR)
os.environ["OPENML_CACHE_DIRECTORY"] = str(OPENML_CACHE_DIR)


os.environ["USERPROFILE"] = str(OPENML_CACHE_DIR)
drive, tail = os.path.splitdrive(str(OPENML_CACHE_DIR))
if drive and tail:
    os.environ["HOMEDRIVE"] = drive
    os.environ["HOMEPATH"]  = tail


_path_home = OPENML_CACHE_DIR
pathlib.Path.home = staticmethod(lambda: _path_home)

_orig_expanduser = os.path.expanduser
def _expanduser_g(path):
    if isinstance(path, str) and path.startswith("~"):
        return path.replace("~", str(_path_home), 1)
    return _orig_expanduser(path)
os.path.expanduser = _expanduser_g

for m in list(sys.modules):
    if m == "openml" or m.startswith("openml."):
        del sys.modules[m]

import logging

logging.getLogger("openml").setLevel(logging.ERROR)
logging.getLogger("openml.datasets.functions").setLevel(logging.ERROR)

print("Path.home()            ->", Path.home())
print("os.path.expanduser('~') ->", os.path.expanduser("~"))
print("ENV OPENML_HOME        ->", os.environ.get("OPENML_HOME"))
print("ENV OPENML_CACHE_DIR   ->", os.environ.get("OPENML_CACHE_DIRECTORY"))

In [None]:
# Cell 2 

import os, gc, time
from pathlib import Path

import numpy as np
import pandas as pd
import requests
import psutil
import openml
import sklearn

from sklearn.base import clone
from sklearn.utils import shuffle as sk_shuffle
from sklearn.model_selection import train_test_split
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import OneHotEncoder as _SklearnOHE, StandardScaler
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, f1_score, precision_score

try:
    from xgboost import XGBClassifier
except Exception:
    XGBClassifier = None

try:
    import torch
    import torch.nn as nn
    import torch.optim as optim
    from torch.utils.data import TensorDataset, DataLoader
except Exception:
    torch = None

import sklearn.preprocessing as _skp
_skp.OneHotEncoder = _SklearnOHE
OneHotEncoder = _SklearnOHE

try:
    if hasattr(openml.config, "set_cache_directory"):
        openml.config.set_cache_directory(str(OPENML_CACHE_DIR))
except Exception:
    pass
openml.config.cache_directory = str(OPENML_CACHE_DIR)

print("OpenML cache set to:", openml.config.cache_directory)
print("Verify home now    :", Path.home())
print("Verify expanduser  :", os.path.expanduser("~"))

print("Current OpenML server:", openml.config.server)

# =================== GLOBAL CONFIG ===================

MAX_TASKS      = None           
MAX_DATASETS   = None
ONLY_ACTIVE    = True
SHUFFLE_TASKS  = True
RANDOM_SEED    = 42

SKIP_DATASET_IDS = {41811, 40864, 46043, 46046, 46026, 45923, 41883, 44324, 44310, 44296, 44284, 44312, 46056, 46040, 44250, 44313, 44307, 44251, 44273, 46036, 44244, 44332}

SKIP_TASK_IDS    = {362155, 362156, 361188, 167207}

# Candidate models 
ENABLED_MODELS = [
    "logreg",
    "rf",
    "xgboost",
    "cnn1d",
    "tiny_rnn",
    "mlp",
    "tinyconv",
]

TRAINING_ORDER = [
    "cnn1d",
    "tiny_rnn",
    "mlp",
    "tinyconv",
    "logreg",
    "rf",
    "xgboost",
]

# ---------- GPU settings ----------
USE_GPU         = True          
GPU_DEVICE      = "cuda:0"
FALLBACK_TO_CPU = True          

# ---------- Landmarks ----------
LANDMARK_ENABLED            = True
LANDMARK_SUBSAMPLE_FRACTION = 0.15  
LANDMARK_MAX_ROWS           = 1000   
LANDMARK_MIN_ROWS           = 50     
LANDMARK_TIMEOUT_S          = 60.0   

# ---------- Training limits ----------
TRAIN_TIMEOUT_S        = 1800.0   
MAX_TRAIN_ROWS         = 100_000  
MIN_TRAIN_ROWS         = 50     
MAX_FEATURES           = 400 
MIN_FEATURES           = 2
DL_MAX_EPOCHS          = 20
DL_BATCH_SIZE          = 128
DL_EARLY_STOP_PATIENCE = 3

# ---------- Score weights (trade-off metric) ----------
SCORE_WEIGHTS = {
    "accuracy":           0.50,
    "trained_model_size_kb":      0.25,
    "inference_speed_ms": 0.10,
    "ram_usage_kb":       0.15,
}

OUTPUT_CSV          = "openml_metadata_created.csv"

CHECKPOINT_DIR      = Path("checkpoints")
CHECKPOINT_DATASETS = CHECKPOINT_DIR / "finished_datasets.txt"
RUN_LOG             = CHECKPOINT_DIR / "run_log.txt"


CACHE_DIR           = Path(openml.config.cache_directory)
CACHE_MAX_GB        = 5.0

MAX_THREADS = 4  # None = all available

# ---------- OpenML fetch timeouts ----------
REQUEST_CONNECT_TIMEOUT = 300.0
REQUEST_READ_TIMEOUT    = 300.0
FETCH_RETRIES           = 3
RETRY_BACKOFF_S         = 3.0

RNG = np.random.default_rng(RANDOM_SEED)

print("sklearn version:", sklearn.__version__)
print("torch present? :", torch is not None)
print("XGB available? :", XGBClassifier is not None)

In [None]:
# Cell 3 

def _apply_thread_policy(max_threads: int):
    vars_ = ["OMP_NUM_THREADS", "OPENBLAS_NUM_THREADS", "MKL_NUM_THREADS", "NUMEXPR_NUM_THREADS"]
    if max_threads is None or int(max_threads) == -1:
        for v in vars_:
            os.environ.pop(v, None)
        eff = "unlimited (library default)"
    else:
        mt = str(int(max_threads))
        for v in vars_:
            os.environ[v] = mt
        eff = mt
    print(f"[threads] BLAS/OpenMP thread cap: {eff}")

_apply_thread_policy(MAX_THREADS)

if torch is not None:
    if USE_GPU and torch.cuda.is_available():
        device = torch.device(GPU_DEVICE)
        print(f"[device] Using CUDA device: {device}")
    else:
        if USE_GPU and not torch.cuda.is_available():
            print("[device] USE_GPU=True but CUDA not available → using CPU")
        device = torch.device("cpu")
else:
    device = None
    print("[device] torch not installed → deep models disabled")

# ---------- logger ----------
RUN_LOG_PATH = Path(RUN_LOG)

def log(msg: str):
    ts = time.strftime("%Y-%m-%d %H:%M:%S")
    line = f"[{ts}] {msg}"
    print(line)
    try:
        RUN_LOG_PATH.parent.mkdir(parents=True, exist_ok=True)
        with RUN_LOG_PATH.open("a", encoding="utf-8") as f:
            f.write(line + "\n")
    except Exception:
        pass

# ---------- Checkpoint helpers ----------
CHECKPOINT_DATASETS_PATH = Path(CHECKPOINT_DATASETS)

def load_finished_datasets():
    if not CHECKPOINT_DATASETS_PATH.exists():
        return set()
    with CHECKPOINT_DATASETS_PATH.open("r", encoding="utf-8") as f:
        ids = [line.strip() for line in f if line.strip()]
    return {int(x) for x in ids}

def mark_dataset_finished(dataset_id: int):
    CHECKPOINT_DATASETS_PATH.parent.mkdir(parents=True, exist_ok=True)
    with CHECKPOINT_DATASETS_PATH.open("a", encoding="utf-8") as f:
        f.write(str(int(dataset_id)) + "\n")
    log(f"[checkpoint] did={dataset_id} marked as finished")

# ---------- Cache management ----------
def _cache_root() -> Path:
    return Path(str(openml.config.cache_directory))

def _www_root() -> Path:
    base = _cache_root()
    candidates = [
        base / "org" / "openml" / "www",
        base / ".openml" / "org" / "openml" / "www",
        base / "www",
    ]
    for p in candidates:
        if p.exists():
            return p
    p = base / ".openml" / "org" / "openml" / "www"
    p.mkdir(parents=True, exist_ok=True)
    return p

def _cache_size_bytes() -> int:
    root = _cache_root()
    total = 0
    for p in root.rglob("*"):
        try:
            if p.is_file():
                total += p.stat().st_size
        except Exception:
            continue
    return total / (1024 ** 3)

def _purge_www_tree() -> tuple[int, int]:
    root = _www_root()
    files = [p for p in root.rglob("*") if p.is_file()]
    files.sort(key=lambda p: p.stat().st_mtime if p.exists() else 0)
    removed_count, removed_bytes = 0, 0
    for f in files:
        try:
            removed_bytes += f.stat().st_size
            f.unlink()
            removed_count += 1
        except Exception:
            pass

    dirs = sorted([p for p in root.rglob("*") if p.is_dir()], reverse=True)
    for d in dirs:
        try:
            d.rmdir()
        except Exception:
            pass
    return

def prune_cache_if_needed(cache_dir: Path = CACHE_DIR, max_gb: float = CACHE_MAX_GB):
    size_gb = _cache_size_bytes()
    if size_gb <= max_gb:
        return
    log(f"[cache] size={size_gb:.2f} GB > limit={max_gb:.2f} GB → pruning…")
    _purge_www_tree()
    size_gb = _cache_size_bytes()

    log(f"[cache] new size={size_gb:.2f} GB")

# ---------- Memory cleanup ----------
def free_memory():
    gc.collect()
    if torch is not None and torch.cuda.is_available():
        try:
            torch.cuda.empty_cache()
            if hasattr(torch.cuda, "ipc_collect"):
                torch.cuda.ipc_collect()
        except Exception:
            pass

# ---------- CSV helper ----------
def _append_csv(path: str, rows: list[dict]):
    if not rows:
        return
    path = Path(path)
    path.parent.mkdir(parents=True, exist_ok=True)
    write_header = not path.exists() or path.stat().st_size == 0
    pd.DataFrame(rows).to_csv(path, mode="a", index=False, header=write_header)

In [None]:
# Cell 4

from requests.adapters import HTTPAdapter
from urllib3.util.retry import Retry
from openml.exceptions import OpenMLServerException

if not getattr(requests, "_timeout_patched", False):
    _orig_session_init = requests.Session.__init__

    def _patched_session_init(self, *args, **kwargs):
        _orig_session_init(self, *args, **kwargs)

        retry_strategy = Retry(
            total=FETCH_RETRIES,
            backoff_factor=2.0,  # in seconds
            status_forcelist=[421, 429, 500, 502, 503, 504],
            allowed_methods=["HEAD", "GET", "POST", "OPTIONS"],
        )
        adapter = HTTPAdapter(max_retries=retry_strategy)

        self.mount("https://", adapter)
        self.mount("http://", adapter)

        self.headers.update({"Connection": "close"})

    requests.Session.__init__ = _patched_session_init

    _orig_request = requests.sessions.Session.request

    def _request(self, method, url, **kw):
        kw.setdefault("timeout", (REQUEST_CONNECT_TIMEOUT, REQUEST_READ_TIMEOUT))

        headers = kw.get("headers") or {}
        headers.setdefault("Connection", "close")
        kw["headers"] = headers

        return _orig_request(self, method, url, **kw)

    requests.sessions.Session.request = _request

    _orig_api_request = requests.api.request

    def _api_request(method, url, **kw):
        kw.setdefault("timeout", (REQUEST_CONNECT_TIMEOUT, REQUEST_READ_TIMEOUT))
        headers = kw.get("headers") or {}
        headers.setdefault("Connection", "close")
        kw["headers"] = headers
        return _orig_api_request(method, url, **kw)

    requests.api.request = _api_request

    requests._timeout_patched = True

log("[init] Requests timeout patch installed")

from pandas.api.types import (
    CategoricalDtype,
    is_bool_dtype,
    is_numeric_dtype,
    is_datetime64_any_dtype,
)

# ---------- Dtype repair ----------

def _repair_categorical_dtypes(X: pd.DataFrame) -> pd.DataFrame:
    X = X.copy()

    for col in X.columns:
        dt = X[col].dtype

        if isinstance(dt, CategoricalDtype):
            X[col] = X[col].astype(str)
            continue

        if is_datetime64_any_dtype(dt):
            X[col] = X[col].astype(str)
            continue

        if dt == object:
            X[col] = X[col].astype(str)
            continue

        if not (
            is_numeric_dtype(dt)
            or is_bool_dtype(dt)
            or is_datetime64_any_dtype(dt)
        ):
            X[col] = X[col].astype(str)
            continue

    return X

# ---------- OpenML fetch ----------
class TaskNotSupported(Exception):
    pass

def _get_task_with_retry(task_id: int):
    for attempt in range(1, FETCH_RETRIES + 1):
        try:
            log(f"[stage] get_task(tid={task_id}) attempt {attempt}/{FETCH_RETRIES}")
            t0 = time.time()
            task = openml.tasks.get_task(task_id)
            log(f"[stage] get_task tid={task_id} ✓ in {time.time()-t0:.2f}s")
            return task
        except OpenMLServerException as e:
            msg = str(e)
            if getattr(e, "code", None) in (151, 153) or "Unknown task" in msg or "Deprecated task" in msg:
                log(f"[skip][task_not_supported] tid={task_id} code={getattr(e, 'code', None)} msg={msg}")
                raise TaskNotSupported(msg)
            log(f"[warn] get_task tid={task_id} server error: {e}")
            if attempt == FETCH_RETRIES:
                raise
            time.sleep(RETRY_BACKOFF_S * attempt)
        except Exception as e:
            log(f"[warn] get_task tid={task_id} failed: {e}")
            if attempt == FETCH_RETRIES:
                raise
            time.sleep(RETRY_BACKOFF_S * attempt)

def _get_dataset_with_retry(task):
    ds_id = int(task.dataset_id)
    for attempt in range(1, FETCH_RETRIES + 1):
        try:
            log(f"[stage] get_dataset(did={ds_id}) attempt {attempt}/{FETCH_RETRIES}")
            t0 = time.time()
            ds = task.get_dataset() 
            log(f"[stage] get_dataset did={ds_id} ✓ in {time.time()-t0:.2f}s")
            return ds
        except Exception as e:
            log(f"[warn] get_dataset did={ds_id} failed: {e}")
            if attempt == FETCH_RETRIES:
                raise
            time.sleep(RETRY_BACKOFF_S)

def _get_data_with_retry(dataset, target_name: str):
    for attempt in range(1, FETCH_RETRIES + 1):
        try:
            log(f"[stage] get_data(target='{target_name}') attempt {attempt}/{FETCH_RETRIES}")
            t0 = time.time()
            X, y, categorical_indicator, attribute_names = dataset.get_data(
                target=target_name, dataset_format="dataframe"
            )
            log(f"[stage] get_data ✓ in {time.time()-t0:.2f}s "
                f"(n={getattr(X, 'shape', ['?','?'])[0]}, d={getattr(X,'shape',['?','?'])[1]})")
            return X, y, categorical_indicator, attribute_names
        except Exception as e:
            log(f"[warn] get_data failed: {e}")
            if attempt == FETCH_RETRIES:
                raise
            time.sleep(RETRY_BACKOFF_S)

def fetch_task_and_data(task_id: int):
    task = _get_task_with_retry(task_id)

    dataset = _get_dataset_with_retry(task)
    dataset_name = dataset.name

    target_name = task.target_name
    X, y, categorical_indicator, attribute_names = _get_data_with_retry(dataset, target_name)

    X = _repair_categorical_dtypes(X)

    return task, dataset, X, y, categorical_indicator, attribute_names, dataset_name

# ---------- Preprocessor builder ----------

def build_preprocessor(X_train: pd.DataFrame):
    numeric_cols = X_train.select_dtypes(include=[np.number]).columns.tolist()
    cat_cols = [c for c in X_train.columns if c not in numeric_cols]

    numeric_transformer = Pipeline(steps=[
        ("imputer", SimpleImputer(strategy="median")),
        ("scaler", StandardScaler()),
    ])

    try:
        cat_ohe = OneHotEncoder(
            handle_unknown="ignore",
            sparse_output=False
        )
    except TypeError:
        cat_ohe = OneHotEncoder(
            handle_unknown="ignore",
            sparse=False
        )

    categorical_transformer = Pipeline(steps=[
        ("imputer", SimpleImputer(strategy="most_frequent")),
        ("onehot", cat_ohe),
    ])

    transformers = []
    if numeric_cols:
        transformers.append(("num", numeric_transformer, numeric_cols))
    if cat_cols:
        transformers.append(("cat", categorical_transformer, cat_cols))

    if not transformers:
        raise ValueError("No columns in X_train to build a preprocessor.")

    preprocessor = ColumnTransformer(
        transformers=transformers,
        remainder="drop"
    )

    numeric_mask = np.array([col in numeric_cols for col in X_train.columns], dtype=bool)

    return preprocessor, numeric_mask

In [None]:
# Cell 5 

from sklearn.preprocessing import LabelEncoder

# ---------- Dataset mmeta-features ----------

def compute_dataset_features(X: pd.DataFrame, y) -> dict:
    meta = {}

    try:
        n_samples, n_features = X.shape
    except Exception:
        n_samples, n_features = None, None
    meta["n_samples"]  = int(n_samples) if n_samples is not None else None
    meta["n_features"] = int(n_features) if n_features is not None else None

    try:
        dtypes = X.dtypes
        numeric_mask = [is_numeric_dtype(dt) for dt in dtypes]
        n_numeric = int(np.sum(numeric_mask))
        n_categorical = int(len(dtypes) - n_numeric)

        n_binary = 0
        for col in X.columns:
            vals = pd.Series(X[col]).dropna().unique()
            if len(vals) == 2:
                n_binary += 1

        meta["n_numeric_features"]     = n_numeric
        meta["n_categorical_features"] = n_categorical
        meta["n_binary_features"]      = int(n_binary)
    except Exception as e:
        log(f"[meta] numeric/categorical error: {e}")
        numeric_mask = None  
        meta["n_numeric_features"]     = None
        meta["n_categorical_features"] = None
        meta["n_binary_features"]      = None

    try:
        y_arr = np.asarray(y)
        le = LabelEncoder()
        y_enc = le.fit_transform(y_arr)
        classes, counts = np.unique(y_enc, return_counts=True)
        n_classes = len(classes)
        probs = counts / counts.sum()
        class_balance_std = float(probs.std()) if n_classes > 0 else None
        class_entropy = float(-(probs * np.log2(probs + 1e-12)).sum()) if n_classes > 0 else None
        meta["n_classes"]         = int(n_classes)
        meta["class_balance_std"] = class_balance_std
        meta["class_entropy"]     = class_entropy
    except Exception as e:
        log(f"[meta] class_stats error: {e}")
        meta["n_classes"]         = None
        meta["class_balance_std"] = None
        meta["class_entropy"]     = None

    try:
        num_cols = X.select_dtypes(include=[np.number])

        mean_var = 0.0
        med_var  = 0.0
        mean_corr = 0.0
        max_corr  = 0.0

        if num_cols.shape[1] > 0 and num_cols.shape[0] > 1:
            vars_ = num_cols.var(axis=0, ddof=1).values
            if np.isfinite(vars_).sum() > 0:
                mean_var = float(np.nanmean(vars_))
                med_var  = float(np.nanmedian(vars_))

            max_corr_features = min(num_cols.shape[1], 50)
            corr = num_cols.iloc[:, :max_corr_features].corr().abs().values

            upper = corr[np.triu_indices_from(corr, k=1)]
            finite_upper = upper[np.isfinite(upper)]
            if finite_upper.size > 0:
                mean_corr = float(finite_upper.mean())
                max_corr  = float(finite_upper.max())

        meta["mean_feature_variance"]   = mean_var
        meta["median_feature_variance"] = med_var
        meta["mean_corr_abs"]           = mean_corr
        meta["max_corr_abs"]            = max_corr

    except Exception as e:
        log(f"[meta] variance/corr error: {e}")
        meta["mean_feature_variance"]   = 0.0
        meta["median_feature_variance"] = 0.0
        meta["mean_corr_abs"]           = 0.0
        meta["max_corr_abs"]            = 0.0

    try:
        # 1) feature_skewness_mean 
        num_cols = X.select_dtypes(include=[np.number])
        if num_cols.shape[1] > 0:
            skews = num_cols.skew(axis=0, skipna=True)
            skews = skews.replace([np.inf, -np.inf], np.nan)
            feature_skewness_mean = float(skews.mean(skipna=True)) if not skews.isna().all() else 0.0
        else:
            feature_skewness_mean = 0.0
        meta["feature_skewness_mean"] = feature_skewness_mean

        # 2) feature_kurtosis_mean 
        if num_cols.shape[1] > 0:
            kurts = num_cols.kurt(axis=0, skipna=True)
            kurts = kurts.replace([np.inf, -np.inf], np.nan)
            feature_kurtosis_mean = float(kurts.mean(skipna=True)) if not kurts.isna().all() else 0.0
        else:
            feature_kurtosis_mean = 0.0
        meta["feature_kurtosis_mean"] = feature_kurtosis_mean

        # 3) missing_percentage
        if n_samples is not None and n_features is not None and n_samples > 0 and n_features > 0:
            total_cells = float(n_samples * n_features)
            missing_count = float(X.isna().sum().sum())
            missing_percentage = missing_count / total_cells
        else:
            missing_percentage = 0.0
        meta["missing_percentage"] = float(missing_percentage)

        # 4) avg_cardinality_categorical
        avg_card = 0.0
        if numeric_mask is not None:
            cat_cols = [col for col, isnum in zip(X.columns, numeric_mask) if not isnum]
            if len(cat_cols) > 0:
                cards = []
                for col in cat_cols:
                    try:
                        cards.append(X[col].nunique(dropna=True))
                    except Exception:
                        continue
                if len(cards) > 0:
                    avg_card = float(np.mean(cards))
        meta["avg_cardinality_categorical"] = avg_card

        # 5) complexity_ratio 
        if n_samples is not None and n_features is not None and n_samples > 0:
            complexity_ratio = float(n_features) / float(n_samples)
        else:
            complexity_ratio = 0.0
        meta["complexity_ratio"] = complexity_ratio

        # 6) intrinsic_dim_estimate 
        intrinsic_dim = 0.0
        try:
            from sklearn.decomposition import PCA

            if num_cols.shape[1] >= 2 and num_cols.shape[0] >= 5:
                X_pca = num_cols.to_numpy(dtype=np.float32)
                col_means = np.nanmean(X_pca, axis=0)
                inds = np.where(np.isnan(X_pca))
                if inds[0].size > 0:
                    X_pca[inds] = np.take(col_means, inds[1])

                n_components = min(X_pca.shape[0], X_pca.shape[1])
                if n_components >= 1:
                    pca = PCA(n_components=n_components)
                    pca.fit(X_pca)
                    cumsum = np.cumsum(pca.explained_variance_ratio_)
                    k = int(np.searchsorted(cumsum, 0.95) + 1)
                    intrinsic_dim = float(max(1, min(k, n_components)))
        except Exception as e_pca:
            log(f"[meta] intrinsic_dim_estimate error: {e_pca}")
            intrinsic_dim = 0.0

        meta["intrinsic_dim_estimate"] = intrinsic_dim

    except Exception as e:
        log(f"[meta] extra_features error: {e}")
        meta.setdefault("feature_skewness_mean", 0.0)
        meta.setdefault("feature_kurtosis_mean", 0.0)
        meta.setdefault("missing_percentage", 0.0)
        meta.setdefault("avg_cardinality_categorical", 0.0)
        meta.setdefault("complexity_ratio", 0.0)
        meta.setdefault("intrinsic_dim_estimate", 0.0)

    return meta

# ---------- Landmarks  ----------

class LandmarkTimeout(Exception):
    pass

def _safe_stratify_or_none(y_enc: np.ndarray) -> np.ndarray | None:
    counts = np.unique(y_enc, return_counts=True)
    if counts.min() < 2:
        return None
    return y_enc

def compute_landmarks(X_train: pd.DataFrame, y_train, numeric_mask: np.ndarray | None) -> dict | str:
    if not LANDMARK_ENABLED:
        return {}

    n_rows = X_train.shape[0]
    if n_rows < LANDMARK_MIN_ROWS:
        log(f"[landmarks] too few rows: {n_rows} < LANDMARK_MIN_ROWS={LANDMARK_MIN_ROWS}")
        return "SKIP"

    if isinstance(X_train, pd.DataFrame):
        X_num = X_train.select_dtypes(include=[np.number]).copy()

        if X_num.shape[1] == 0:
            log("[landmarks] no numeric columns → label-encoding all features for landmarks")
            X_num = pd.DataFrame(index=X_train.index)
            for col in X_train.columns:
                s = X_train[col]
                if is_numeric_dtype(s):
                    X_num[col] = pd.to_numeric(s, errors="coerce").fillna(0)
                else:
                    le_col = LabelEncoder()
                    X_num[col] = le_col.fit_transform(s.astype(str).fillna("__NA__"))
    else:
        X_num = pd.DataFrame(X_train)

    n_sub = min(LANDMARK_MAX_ROWS, int(LANDMARK_SUBSAMPLE_FRACTION * n_rows))
    if n_sub < LANDMARK_MIN_ROWS:
        n_sub = LANDMARK_MIN_ROWS
    n_sub = min(n_sub, n_rows)

    idx = RNG.choice(n_rows, size=n_sub, replace=False)
    X_num_sub = X_num.iloc[idx].reset_index(drop=True).astype(np.float32)
    y_sub = np.asarray(y_train)[idx]

    # ---- Handle NaN / inf for LR/KNN/DT/FDR  ----
    X_num_sub = X_num_sub.replace([np.inf, -np.inf], np.nan)

    vals = X_num_sub.to_numpy(dtype=np.float32)

    if np.isnan(vals).any() or not np.isfinite(vals).all():
        vals[~np.isfinite(vals)] = np.nan

        col_means = np.nanmean(vals, axis=0)

        col_means = np.where(np.isnan(col_means), 0.0, col_means)

        inds = np.where(np.isnan(vals))
        if inds[0].size > 0:
            vals[inds] = np.take(col_means, inds[1])

        X_num_sub = pd.DataFrame(vals, columns=X_num_sub.columns)

        log("[landmarks] Missing/inf values imputed with column means (NaNs guaranteed removed) for LR/KNN/DT/FDR")

    le = LabelEncoder()
    y_enc = le.fit_transform(y_sub)
    strat_labels = _safe_stratify_or_none(y_enc)

    landmarks = {
        "landmark_lr_accuracy": None,
        "landmark_dt_depth3_accuracy": None,
        "landmark_knn3_accuracy": None,
        "landmark_random_noise_accuracy": None,
        "fisher_discriminant_ratio": None,
    }

    start = time.time()

    def check_timeout():
        if time.time() - start > LANDMARK_TIMEOUT_S:
            raise LandmarkTimeout(f"Landmarks exceeded {LANDMARK_TIMEOUT_S:.1f}s")

    # 1) Logistic Regression 
    try:
        check_timeout()
        if X_num_sub.shape[1] == 0:
            raise RuntimeError("No numeric features for LR landmark")
        Xtr, Xte, ytr, yte = train_test_split(
            X_num_sub, y_enc, test_size=0.2,
            random_state=RANDOM_SEED,
            stratify=strat_labels,
        )
        clf = LogisticRegression(max_iter=50, C=0.1, solver="lbfgs")
        clf.fit(Xtr, ytr)
        acc = accuracy_score(yte, clf.predict(Xte))
        landmarks["landmark_lr_accuracy"] = float(acc)
        log(f"[landmark] lr_accuracy ✓ acc={acc:.4f}")
    except LandmarkTimeout:
        log("[landmarks] timeout during LR")
        return "SKIP"
    except Exception as e:
        log(f"[landmark] lr_accuracy error: {e}")
        landmarks["landmark_lr_accuracy"] = None

    # 2) DecisionTree 
    try:
        check_timeout()
        if X_num_sub.shape[1] == 0:
            raise RuntimeError("No numeric features for DT landmark")
        Xtr, Xte, ytr, yte = train_test_split(
            X_num_sub, y_enc, test_size=0.2,
            random_state=RANDOM_SEED,
            stratify=strat_labels,
        )
        clf = DecisionTreeClassifier(max_depth=3, min_samples_leaf=5, random_state=RANDOM_SEED)
        clf.fit(Xtr, ytr)
        acc = accuracy_score(yte, clf.predict(Xte))
        landmarks["landmark_dt_depth3_accuracy"] = float(acc)
        log(f"[landmark] dt_depth3_accuracy ✓ acc={acc:.4f}")
    except LandmarkTimeout:
        log("[landmarks] timeout during DT")
        return "SKIP"
    except Exception as e:
        log(f"[landmark] dt_depth3_accuracy error: {e}")
        landmarks["landmark_dt_depth3_accuracy"] = None

    # 3) KNN-3 
    try:
        check_timeout()
        if X_num_sub.shape[1] == 0:
            raise RuntimeError("No numeric features for KNN landmark")
        X_knn = X_num_sub
        if X_knn.shape[1] > 30:
            cols = RNG.choice(X_knn.shape[1], size=30, replace=False)
            X_knn = X_knn.iloc[:, cols]
        Xtr, Xte, ytr, yte = train_test_split(
            X_knn, y_enc, test_size=0.2,
            random_state=RANDOM_SEED,
            stratify=strat_labels,
        )
        clf = KNeighborsClassifier(n_neighbors=3)
        clf.fit(Xtr, ytr)
        acc = accuracy_score(yte, clf.predict(Xte))
        landmarks["landmark_knn3_accuracy"] = float(acc)
        log(f"[landmark] knn3_accuracy ✓ acc={acc:.4f}")
    except LandmarkTimeout:
        log("[landmarks] timeout during KNN")
        return "SKIP"
    except Exception as e:
        log(f"[landmark] knn3_accuracy error: {e}")
        landmarks["landmark_knn3_accuracy"] = None

    # 4) Random noise baseline 
    try:
        check_timeout()
        probs = np.bincount(y_enc) / len(y_enc)
        preds = RNG.choice(np.arange(len(probs)), size=len(y_enc), p=probs)
        acc = accuracy_score(y_enc, preds)
        landmarks["landmark_random_noise_accuracy"] = float(acc)
        log(f"[landmark] random_noise_accuracy ✓ acc={acc:.4f}")
    except LandmarkTimeout:
        log("[landmarks] timeout during random baseline")
        return "SKIP"
    except Exception as e:
        log(f"[landmark] random_noise_accuracy error: {e}")
        landmarks["landmark_random_noise_accuracy"] = None

    # 5) Fisher Discriminant Ratio
    try:
        check_timeout()
        if X_num_sub.shape[1] == 0:
            raise RuntimeError("No numeric features for FDR")
        fdr_values = []
        for j in range(X_num_sub.shape[1]):
            xj = X_num_sub.iloc[:, j].values.astype(float)
            mu = xj.mean()
            num = 0.0
            den = 0.0
            for c in np.unique(y_enc):
                mask_c = (y_enc == c)
                xc = xj[mask_c]
                if xc.size == 0:
                    continue
                nc = xc.size
                mu_c = xc.mean()
                var_c = xc.var(ddof=1) if nc > 1 else 0.0
                num += nc * (mu_c - mu) ** 2
                den += nc * var_c
            if den > 0:
                fdr_values.append(num / (den + 1e-12))
        fdr = float(np.mean(fdr_values)) if fdr_values else None
        landmarks["fisher_discriminant_ratio"] = fdr
        log(f"[landmark] fisher_discriminant_ratio ✓ value={fdr}")
    except LandmarkTimeout:
        log("[landmarks] timeout during FDR")
        return "SKIP"
    except Exception as e:
        log(f"[landmark] fisher_discriminant_ratio error: {e}")
        landmarks["fisher_discriminant_ratio"] = None

    log(f"[landmarks] done in {time.time()-start:.2f}s")
    return landmarks

def compute_meta(X_train: pd.DataFrame, y_train, numeric_mask: np.ndarray):
    meta_ds = compute_dataset_features(X_train, y_train)
    lm = compute_landmarks(X_train, y_train, numeric_mask)
    if lm == "SKIP":
        return None
    if isinstance(lm, dict):
        meta_ds.update(lm)
    return meta_ds

In [None]:
# Cell 6 

_JOBS = -1 if (MAX_THREADS is None or int(MAX_THREADS) == -1) else int(MAX_THREADS)

def make_logreg():
    return LogisticRegression(
        max_iter=800,
        solver="lbfgs",
        n_jobs=_JOBS if hasattr(LogisticRegression(), "n_jobs") else None,
    )

def make_rf():
    return RandomForestClassifier(
        n_estimators=200,
        max_depth=None,
        random_state=RANDOM_SEED,
        n_jobs=_JOBS,
    )

def make_xgboost():
    try:
        import xgboost as xgb
    except ImportError:
        return None

    return xgb.XGBClassifier(
        tree_method="hist",
        n_estimators=200,
        max_depth=6,
        learning_rate=0.1,
        subsample=0.8,
        colsample_bytree=0.8,
        eval_metric="mlogloss",
        n_jobs=_JOBS,
    )

class MLPNet(nn.Module):
    def __init__(self, input_dim, n_classes):
        super().__init__()
        self.net = nn.Sequential(
            nn.Linear(input_dim, 128),
            nn.ReLU(),
            nn.Linear(128, 64),
            nn.ReLU(),
            nn.Linear(64, n_classes),
        )

    def forward(self, x):
        return self.net(x)

class TinyConv1DNet(nn.Module):
    def __init__(self, input_dim, n_classes):
        super().__init__()
        self.conv = nn.Conv1d(1, 8, kernel_size=3, padding=1)
        self.relu = nn.ReLU()
        self.pool = nn.AdaptiveMaxPool1d(16)
        self.fc = nn.Linear(8 * 16, n_classes)

    def forward(self, x):
        x = x.unsqueeze(1)          
        x = self.conv(x)           
        x = self.relu(x)
        x = self.pool(x)            
        x = x.view(x.size(0), -1)   
        return self.fc(x)

class TinyConvNet(nn.Module):
    def __init__(self, input_dim, n_classes):
        super().__init__()
        self.conv = nn.Conv1d(1, 4, kernel_size=3, padding=1)
        self.relu = nn.ReLU()
        self.pool = nn.AdaptiveMaxPool1d(8)
        self.fc = nn.Linear(4 * 8, n_classes)

    def forward(self, x):
        x = x.unsqueeze(1)          
        x = self.conv(x)           
        x = self.relu(x)
        x = self.pool(x)            
        x = x.view(x.size(0), -1)   
        return self.fc(x)

class TinyRNNNet(nn.Module):
    def __init__(self, input_dim, n_classes):
        super().__init__()
        self.hidden_dim = 32
        self.rnn = nn.GRU(input_size=1, hidden_size=self.hidden_dim, batch_first=True)
        self.fc = nn.Linear(self.hidden_dim, n_classes)

    def forward(self, x):
        x = x.unsqueeze(-1)
        out, h = self.rnn(x)
        return self.fc(h[-1])

def make_cnn1d(input_dim, n_classes):
    if device is None:
        return None
    model = TinyConv1DNet(input_dim, n_classes)
    return model.to(device)

def make_tinyconv(input_dim, n_classes):

    if device is None:
        return None
    model = TinyConvNet(input_dim, n_classes)
    return model.to(device)

def make_tiny_rnn(input_dim, n_classes):
    if device is None:
        return None
    model = TinyRNNNet(input_dim, n_classes)
    return model.to(device)

def make_mlp(input_dim, n_classes):
    if device is None:
        return None
    model = MLPNet(input_dim, n_classes)
    return model.to(device)

MODELS = {
    "logreg":   ("classic", make_logreg),
    "rf":       ("classic", make_rf),
    "xgboost":  ("classic", make_xgboost),
    "cnn1d":    ("deep",    make_cnn1d),
    "tiny_rnn": ("deep",    make_tiny_rnn),
    "mlp":      ("deep",    make_mlp),
    "tinyconv": ("deep",    make_tinyconv),
}

MODEL_IDS = {
    "logreg":   1,
    "rf":       2,
    "xgboost":  3,
    "cnn1d":    4,
    "tiny_rnn": 5,
    "mlp":      6,
    "tinyconv": 7,
}

# ---------- Model capabilities & static model features ----------

MODEL_CAPABILITIES = {
    "logreg": {
        "model_id":          MODEL_IDS["logreg"],
        "model_name":        "logreg",
        "is_deep_learning":  False,
        "is_tree_based":     False,
        "is_linear":         True,
        "model_family":      "Linear",

        "parameterization_type":        "linear-in-features",
        "complexity_training_big_o":    "O(n · d)",
        "complexity_inference_big_o":   "O(d)",
        "is_probabilistic":             True,
        "is_ensemble_model":            False,
        "regularization_supported":     "L2",
        "supports_multiclass_natively": True,
        "supports_online_learning":     False,

        "supports_multiple_trees":  False,
        "tree_growth_strategy":     "none",
        "default_max_depth":        0,
        "supports_pruning":         False,
        "splitting_criterion":      "none",

        "architecture_type":          "none",
        "supports_dropout":           False,
        "supports_batchnorm":         False,
        "default_activation":         "none",
        "supports_cuda_acceleration": False,

        "supports_non_linearity":        False,
        "supports_categorical_directly": False,
        "supports_missing_values":       False,
        "supports_gpu":                  False,

        "n_estimators":     0,
        "avg_tree_depth":   0.0,
        "max_tree_depth":   0,
        "n_leaves_mean":    0.0,

        "n_layers":             0,
        "hidden_units_mean":    0.0,
        "dropout_rate_mean":    0.0,
        "activation_type":      "none",
        "batch_size":           0,
        "epochs":               0,
    },

    "rf": {
        "model_id":          MODEL_IDS["rf"],
        "model_name":        "rf",
        "is_deep_learning":  False,
        "is_tree_based":     True,
        "is_linear":         False,
        "model_family":      "TreeEnsemble",

        "parameterization_type":        "fixed-per-estimator",
        "complexity_training_big_o":    "O(n · log n · trees)",
        "complexity_inference_big_o":   "O(trees · depth)",
        "is_probabilistic":             True,   
        "is_ensemble_model":            True,
        "regularization_supported":     "None",
        "supports_multiclass_natively": True,
        "supports_online_learning":     False,

        "supports_multiple_trees":  True,
        "tree_growth_strategy":     "depth-based",
        "default_max_depth":        0,            # 0 = unlimited
        "supports_pruning":         False,
        "splitting_criterion":      "gini",

        "architecture_type":          "none",
        "supports_dropout":           False,
        "supports_batchnorm":         False,
        "default_activation":         "none",
        "supports_cuda_acceleration": False,

        "supports_non_linearity":        True,
        "supports_categorical_directly": False,
        "supports_missing_values":       False,
        "supports_gpu":                  False,

        "n_estimators":     200,
        "avg_tree_depth":   0.0,   # 0 = dataset-dependent
        "max_tree_depth":   0,     # 0 = None
        "n_leaves_mean":    0.0,   # 0 = dataset-dependent

        "n_layers":             0,
        "hidden_units_mean":    0.0,
        "dropout_rate_mean":    0.0,
        "activation_type":      "none",
        "batch_size":           0,
        "epochs":               0,
    },

    "xgboost": {
        "model_id":          MODEL_IDS["xgboost"],
        "model_name":        "xgboost",
        "is_deep_learning":  False,
        "is_tree_based":     True,
        "is_linear":         False,
        "model_family":      "BoostedTrees",

        "parameterization_type":        "fixed-per-estimator",
        "complexity_training_big_o":    "O(n · log n · trees)",
        "complexity_inference_big_o":   "O(trees · depth)",
        "is_probabilistic":             True,
        "is_ensemble_model":            True,
        "regularization_supported":     "L1/L2",
        "supports_multiclass_natively": True,
        "supports_online_learning":     False,

        "supports_multiple_trees":  True,
        "tree_growth_strategy":     "leaf-based",
        "default_max_depth":        6,
        "supports_pruning":         True,
        "splitting_criterion":      "gain",

        "architecture_type":          "none",
        "supports_dropout":           False,
        "supports_batchnorm":         False,
        "default_activation":         "none",
        "supports_cuda_acceleration": True,   

        "supports_non_linearity":        True,
        "supports_categorical_directly": False,
        "supports_missing_values":       True,
        "supports_gpu":                  True,

        "n_estimators":     200,
        "avg_tree_depth":   6.0,   
        "max_tree_depth":   6,
        "n_leaves_mean":    0.0,  

        "n_layers":             0,
        "hidden_units_mean":    0.0,
        "dropout_rate_mean":    0.0,
        "activation_type":      "none",
        "batch_size":           0,
        "epochs":               0,
    },

    "cnn1d": {
        "model_id":          MODEL_IDS["cnn1d"],
        "model_name":        "cnn1d",
        "is_deep_learning":  True,
        "is_tree_based":     False,
        "is_linear":         False,
        "model_family":      "CNN",

        "parameterization_type":        "linear-in-features",
        "complexity_training_big_o":    "O(n · d · epochs)",
        "complexity_inference_big_o":   "O(d · filters)",
        "is_probabilistic":             True,   
        "is_ensemble_model":            False,
        "regularization_supported":     "L2",
        "supports_multiclass_natively": True,
        "supports_online_learning":     False,

        "supports_multiple_trees":  False,
        "tree_growth_strategy":     "none",
        "default_max_depth":        0,
        "supports_pruning":         False,
        "splitting_criterion":      "none",

        "architecture_type":          "CNN1D",
        "supports_dropout":           False,   
        "supports_batchnorm":         False,    
        "default_activation":         "relu",
        "supports_cuda_acceleration": True,     

        "supports_non_linearity":        True,
        "supports_categorical_directly": False,
        "supports_missing_values":       False,
        "supports_gpu":                  True,

        "n_estimators":     0,
        "avg_tree_depth":   0.0,
        "max_tree_depth":   0,
        "n_leaves_mean":    0.0,

        "n_layers":             2,
        "hidden_units_mean":    8.0,
        "dropout_rate_mean":    0.0,
        "activation_type":      "relu",
        "batch_size":           DL_BATCH_SIZE,
        "epochs":               DL_MAX_EPOCHS,
    },

    "tiny_rnn": {
        "model_id":          MODEL_IDS["tiny_rnn"],
        "model_name":        "tiny_rnn",
        "is_deep_learning":  True,
        "is_tree_based":     False,
        "is_linear":         False,
        "model_family":      "RNN",

        "parameterization_type":        "linear-in-features",
        "complexity_training_big_o":    "O(n · d · hidden_dim · epochs)",
        "complexity_inference_big_o":   "O(d · hidden_dim)",
        "is_probabilistic":             True,
        "is_ensemble_model":            False,
        "regularization_supported":     "L2",
        "supports_multiclass_natively": True,
        "supports_online_learning":     False,

        "supports_multiple_trees":  False,
        "tree_growth_strategy":     "none",
        "default_max_depth":        0,
        "supports_pruning":         False,
        "splitting_criterion":      "none",

        "architecture_type":          "RNN-GRU",
        "supports_dropout":           False,
        "supports_batchnorm":         False,
        "default_activation":         "tanh",
        "supports_cuda_acceleration": True,

        "supports_non_linearity":        True,
        "supports_categorical_directly": False,
        "supports_missing_values":       False,
        "supports_gpu":                  True,

        "n_estimators":     0,
        "avg_tree_depth":   0.0,
        "max_tree_depth":   0,
        "n_leaves_mean":    0.0,

        "n_layers":             2,
        "hidden_units_mean":    32.0,
        "dropout_rate_mean":    0.0,
        "activation_type":      "tanh",
        "batch_size":           DL_BATCH_SIZE,
        "epochs":               DL_MAX_EPOCHS,
    },

    "mlp": {
        "model_id":          MODEL_IDS["mlp"],
        "model_name":        "mlp",
        "is_deep_learning":  True,
        "is_tree_based":     False,
        "is_linear":         False,
        "model_family":      "MLP",

        "parameterization_type":        "linear-in-features",
        "complexity_training_big_o":    "O(n · Σ(layer_dims) · epochs)",
        "complexity_inference_big_o":   "O(Σ(layer_dims))",
        "is_probabilistic":             True,
        "is_ensemble_model":            False,
        "regularization_supported":     "L2",
        "supports_multiclass_natively": True,
        "supports_online_learning":     False,

        "supports_multiple_trees":  False,
        "tree_growth_strategy":     "none",
        "default_max_depth":        0,
        "supports_pruning":         False,
        "splitting_criterion":      "none",

        "architecture_type":          "MLP",
        "supports_dropout":           False,
        "supports_batchnorm":         False,
        "default_activation":         "relu",
        "supports_cuda_acceleration": True,

        "supports_non_linearity":        True,
        "supports_categorical_directly": False,
        "supports_missing_values":       False,
        "supports_gpu":                  True,

        "n_estimators":     0,
        "avg_tree_depth":   0.0,
        "max_tree_depth":   0,
        "n_leaves_mean":    0.0,

        "n_layers":             3,
        "hidden_units_mean":    (128.0 + 64.0) / 2.0,
        "dropout_rate_mean":    0.0,
        "activation_type":      "relu",
        "batch_size":           DL_BATCH_SIZE,
        "epochs":               DL_MAX_EPOCHS,
    },

    "tinyconv": {
        "model_id":          MODEL_IDS["tinyconv"],
        "model_name":        "tinyconv",
        "is_deep_learning":  True,
        "is_tree_based":     False,
        "is_linear":         False,
        "model_family":      "CNN",

        "parameterization_type":        "linear-in-features",
        "complexity_training_big_o":    "O(n · d · epochs)",
        "complexity_inference_big_o":   "O(d · filters)",
        "is_probabilistic":             True,
        "is_ensemble_model":            False,
        "regularization_supported":     "L2",
        "supports_multiclass_natively": True,
        "supports_online_learning":     False,

        "supports_multiple_trees":  False,
        "tree_growth_strategy":     "none",
        "default_max_depth":        0,
        "supports_pruning":         False,
        "splitting_criterion":      "none",

        "architecture_type":          "CNN1D",
        "supports_dropout":           False,
        "supports_batchnorm":         False,
        "default_activation":         "relu",
        "supports_cuda_acceleration": True,

        "supports_non_linearity":        True,
        "supports_categorical_directly": False,
        "supports_missing_values":       False,
        "supports_gpu":                  True,

        "n_estimators":     0,
        "avg_tree_depth":   0.0,
        "max_tree_depth":   0,
        "n_leaves_mean":    0.0,

        "n_layers":             2,
        "hidden_units_mean":    4.0,
        "dropout_rate_mean":    0.0,
        "activation_type":      "relu",
        "batch_size":           DL_BATCH_SIZE,
        "epochs":               DL_MAX_EPOCHS,
    },
}

# ---------- Utility ----------

def count_parameters(model) -> int:
    try:
        if torch is not None and isinstance(model, nn.Module):
            return sum(p.numel() for p in model.parameters())
        if hasattr(model, "coef_"):
            return int(np.prod(model.coef_.shape))
        if hasattr(model, "estimators_"):
            return sum(
                getattr(est, "tree_", None).node_count
                for est in model.estimators_
                if getattr(est, "tree_", None) is not None
            )
        return 0
    except Exception:
        return 0

# ---------- Training  ----------

def train_and_eval_model(
    model_name: str,
    preprocessor,
    X_train: pd.DataFrame,
    y_train,
    X_test: pd.DataFrame,
    y_test,
):
    
    model_type, ctor = MODELS[model_name]
    if ctor is None:
        log(f"[model] {model_name} ctor=None → skipping")
        return None

    proc = psutil.Process(os.getpid())

    try:
        if model_type == "classic":
            base_model = ctor()
            if base_model is None:
                log(f"[model] {model_name} unavailable (e.g., xgboost missing)")
                return None

            pipe = Pipeline([("preprocess", preprocessor), ("clf", base_model)])

            y_train_local = np.asarray(y_train)
            y_test_local  = np.asarray(y_test)

            if model_name == "xgboost":
                le = LabelEncoder()
                y_train_local = le.fit_transform(y_train_local)
                y_test_local  = le.transform(y_test_local)

                classes = np.unique(y_train_local)
                n_classes = len(classes)

                if n_classes < 2:
                    log(
                        f"[warn][model] did model=xgboost skipped: "
                        f"only {n_classes} distinct class(es) in y_train"
                    )
                    return None

                try:
                    if n_classes == 2:
                        pipe.set_params(clf__objective="binary:logistic")
                        try:
                            pipe.set_params(clf__num_class=None)
                        except Exception:
                            pass
                    else:
                        pipe.set_params(
                            clf__objective="multi:softprob",
                            clf__num_class=int(n_classes),
                        )
                except Exception as e:
                    log(f"[warn][model] xgboost set_params failed: {e}")

            t0 = time.time()
            try:
                pipe.fit(X_train, y_train_local)
            except Exception as e:
                log(f"[error][model_fit] model={model_name} error={type(e).__name__}: {e}")
                return None

            training_time = time.time() - t0

            # ---- Evaluate on test ----

            # Capture CPU RAM before inference
            rss_before = proc.memory_info().rss

            t1 = time.time()
            y_pred = pipe.predict(X_test)
            infer_time = time.time() - t1

            # Capture CPU RAM after inference
            rss_after = proc.memory_info().rss
            
            # Dynamic RAM: extra KB used temporarily during inference
            dynamic_ram_kb = max(0.0, (rss_after - rss_before) / 1024.0) 

            n_test = len(y_test_local)
            inf_ms_per_sample = (infer_time / max(1, n_test)) * 1000.0

            acc = accuracy_score(y_test_local, y_pred)
            f1m = f1_score(y_test_local, y_pred, average="macro", zero_division=0)
            pm = precision_score(y_test_local, y_pred, average="macro", zero_division=0)

            # Serialize model for size
            import tempfile, joblib
            with tempfile.NamedTemporaryFile(delete=False, suffix=".joblib") as tmp:
                tmp_path = Path(tmp.name)
            joblib.dump(pipe, tmp_path)
            size_kb = tmp_path.stat().st_size / 1024.0
            tmp_path.unlink(missing_ok=True)

            n_params = count_parameters(pipe.named_steps["clf"])

            static_ram_kb = size_kb

            total_peak_ram_kb = static_ram_kb + dynamic_ram_kb

            result = {
                "model_name": model_name,
                "accuracy": acc,
                "f1_macro": f1m,
                "precision_macro": pm,
                "trained_model_size_kb": size_kb,
                "inference_speed_ms": inf_ms_per_sample,
                "ram_usage_kb": total_peak_ram_kb,               
                "static_ram_kb": static_ram_kb,             
                "dynamic_ram_kb": dynamic_ram_kb,           
                "training_time_seconds": training_time,
                "inference_time_ms_per_sample": inf_ms_per_sample,
                "model_n_parameters": n_params,
            }

            log(
                f"[model] did model={model_name} "
                f"train={training_time:.2f}s inf={inf_ms_per_sample:.3f}ms/row "
                f"size={size_kb:.1f}KB acc={acc:.4f} f1={f1m:.4f}"
            )

            return result

        else:
            if torch is None or device is None:
                log(f"[model] {model_name} deep model but torch/device unavailable → skipping")
                return None

            from sklearn.base import clone as sk_clone
            preproc_dl = sk_clone(preprocessor)

            def _train_eval_deep_on_device(run_device: torch.device):
                t0_p = time.time()
                X_train_trans = preproc_dl.fit_transform(X_train)
                X_test_trans  = preproc_dl.transform(X_test)

                if hasattr(X_train_trans, "toarray"):
                    X_train_arr = X_train_trans.astype(np.float32).toarray()
                    X_test_arr  = X_test_trans.astype(np.float32).toarray()
                else:
                    X_train_arr = np.asarray(X_train_trans, dtype=np.float32)
                    X_test_arr  = np.asarray(X_test_trans,  dtype=np.float32)
                    
                log(f"[dl] preprocessor for {model_name} fit+transform on {run_device} in {time.time()-t0_p:.2f}s")

                X_train_arr = np.asarray(X_train_trans, dtype=np.float32)
                X_test_arr  = np.asarray(X_test_trans, dtype=np.float32)

                le = LabelEncoder()
                y_train_enc = le.fit_transform(y_train)
                y_test_enc  = le.transform(y_test)

                n_classes = len(le.classes_)
                input_dim = X_train_arr.shape[1]

                mdl = ctor(input_dim, n_classes)
                if mdl is None:
                    log(f"[model] {model_name} ctor returned None → skipping")
                    return None
                mdl = mdl.to(run_device)

                criterion = nn.CrossEntropyLoss()
                optimizer = optim.Adam(mdl.parameters(), lr=1e-3)

                train_ds = TensorDataset(
                    torch.from_numpy(X_train_arr),
                    torch.from_numpy(y_train_enc.astype(np.int64)),
                )
                test_ds = TensorDataset(
                    torch.from_numpy(X_test_arr),
                    torch.from_numpy(y_test_enc.astype(np.int64)),
                )
                train_loader = DataLoader(train_ds, batch_size=DL_BATCH_SIZE, shuffle=True)
                test_loader  = DataLoader(test_ds, batch_size=DL_BATCH_SIZE, shuffle=False)

                best_val_acc = -np.inf
                best_state = None
                no_improve = 0
                t0 = time.time()

                for epoch in range(DL_MAX_EPOCHS):
                    mdl.train()
                    for xb, yb in train_loader:
                        xb = xb.to(run_device)
                        yb = yb.to(run_device)
                        optimizer.zero_grad()
                        out = mdl(xb)
                        loss = criterion(out, yb)
                        loss.backward()
                        optimizer.step()

                    mdl.eval()
                    correct = 0
                    total = 0
                    with torch.no_grad():
                        for xb, yb in test_loader:
                            xb = xb.to(run_device)
                            yb = yb.to(run_device)
                            out = mdl(xb)
                            preds = out.argmax(dim=1)
                            correct += (preds == yb).sum().item()
                            total += yb.size(0)
                    val_acc = correct / max(1, total)
                    if val_acc > best_val_acc + 1e-4:
                        best_val_acc = val_acc
                        best_state = {k: v.cpu().clone() for k, v in mdl.state_dict().items()}
                        no_improve = 0
                    else:
                        no_improve += 1
                        if no_improve >= DL_EARLY_STOP_PATIENCE:
                            break

                training_time = time.time() - t0

                if best_state is not None:
                    mdl.load_state_dict(best_state)
                    mdl = mdl.to(run_device)

                # ---- Evaluate on test ----
                rss_before = proc.memory_info().rss
                if torch.cuda.is_available():
                    try:
                        torch.cuda.reset_peak_memory_stats(run_device)
                    except Exception:
                        pass

                mdl.eval()
                all_preds = []
                all_true  = []
                t1 = time.time()
                with torch.no_grad():
                    for xb, yb in test_loader:
                        xb = xb.to(run_device)
                        yb = yb.to(run_device)
                        out = mdl(xb)
                        preds = out.argmax(dim=1).cpu().numpy()
                        all_preds.append(preds)
                        all_true.append(yb.cpu().numpy())
                infer_time = time.time() - t1

                rss_after = proc.memory_info().rss
                cpu_delta_kb = max(0.0, (rss_after - rss_before) / 1024.0)

                vram_delta_kb = 0.0
                if torch.cuda.is_available():
                    try:
                        vram_delta_kb = torch.cuda.max_memory_allocated(run_device) / 1024.0
                    except Exception:
                        vram_delta_kb = 0.0

                dynamic_ram_kb = max(cpu_delta_kb, vram_delta_kb)

                y_pred_enc = np.concatenate(all_preds) if all_preds else np.array([], dtype=int)
                y_true_enc = np.concatenate(all_true) if all_true else np.array([], dtype=int)

                try:
                    y_pred = le.inverse_transform(y_pred_enc)
                    y_true = le.inverse_transform(y_true_enc)
                except Exception:
                    y_pred = y_pred_enc
                    y_true = y_true_enc

                n_test = len(y_true)
                inf_ms_per_sample = (infer_time / max(1, n_test)) * 1000.0

                acc = accuracy_score(y_true, y_pred)
                f1m = f1_score(y_true, y_pred, average="macro", zero_division=0)
                pm = precision_score(y_true, y_pred, average="macro", zero_division=0)

                n_params = count_parameters(mdl)

                import tempfile
                with tempfile.NamedTemporaryFile(delete=False, suffix=".pt") as tmp:
                    tmp_path = Path(tmp.name)
                torch.save(mdl.state_dict(), tmp_path)
                size_kb = tmp_path.stat().st_size / 1024.0
                tmp_path.unlink(missing_ok=True)

                bytes_per_param = 4 
                static_ram_kb = (n_params * bytes_per_param) / 1024.0

                total_peak_ram_kb = static_ram_kb + dynamic_ram_kb

                res = {
                    "model_name": model_name,
                    "accuracy": acc,
                    "f1_macro": f1m,
                    "precision_macro": pm,
                    "trained_model_size_kb": size_kb,
                    "inference_speed_ms": inf_ms_per_sample,
                    "ram_usage_kb": total_peak_ram_kb,         
                    "static_ram_kb": static_ram_kb,              
                    "dynamic_ram_kb": dynamic_ram_kb,            
                    "training_time_seconds": training_time,
                    "inference_time_ms_per_sample": inf_ms_per_sample,
                    "model_n_parameters": n_params,
                }

                log(
                    f"[model] did model={model_name} (DL, device={run_device.type}) "
                    f"train={training_time:.2f}s inf={inf_ms_per_sample:.3f}ms/row "
                    f"size={size_kb:.1f}KB acc={acc:.4f} f1={f1m:.4f}"
                )

                return res

            try:
                if device is not None and device.type == "cuda":
                    if torch.cuda.is_available():
                        torch.cuda.empty_cache()
                    return _train_eval_deep_on_device(device)
                else:
                    return _train_eval_deep_on_device(torch.device("cpu"))
                
            except RuntimeError as e:
                err_msg = str(e).lower()
                
                if "out of memory" in err_msg:
                    log(f"[warn][model_oom] model={model_name} CUDA OOM on {device}, retrying on CPU")
                    try:
                        if torch.cuda.is_available():
                            torch.cuda.empty_cache()
                            if hasattr(torch.cuda, "ipc_collect"):
                                torch.cuda.ipc_collect()
                    except Exception:
                        pass
                    return _train_eval_deep_on_device(torch.device("cpu"))
                else:
                    log(f"[error][model_dl] model={model_name} RuntimeError: {e}")
                    return None
                
    except Exception as e:
        log(f"[error][model] model={model_name} error={type(e).__name__}: {e}")
        return None
    finally:
        if torch is not None and torch.cuda.is_available():
            try:
                torch.cuda.empty_cache()
                if hasattr(torch.cuda, "ipc_collect"):
                    torch.cuda.ipc_collect()
            except Exception:
                pass

# ---------- Per-dataset scoring helper ----------

def apply_scores_per_dataset(model_results: list[dict]) -> list[dict]:
    if not model_results:
        return model_results

    sizes = np.array([r["trained_model_size_kb"]      for r in model_results], dtype=float)
    infs  = np.array([r["inference_speed_ms"] for r in model_results], dtype=float)
    rams  = np.array([r["ram_usage_kb"]       for r in model_results], dtype=float)
    accs  = np.array([r["accuracy"]           for r in model_results], dtype=float)

    size_min, size_max = sizes.min(), sizes.max()
    inf_min,  inf_max  = infs.min(),  infs.max()
    ram_min,  ram_max  = rams.min(),  rams.max()

    for r in model_results:
        acc = r["accuracy"]

        # Normalize & invert for cost metrics
        def norm_inv(val, vmin, vmax):
            if vmax <= vmin:
                return 1.0
            x = (val - vmin) / (vmax - vmin + 1e-9)
            return float(1.0 - x)

        size_reward = norm_inv(r["trained_model_size_kb"],      size_min, size_max)
        inf_reward  = norm_inv(r["inference_speed_ms"], inf_min,  inf_max)
        ram_reward  = norm_inv(r["ram_usage_kb"],       ram_min,  ram_max)

        score = (
            SCORE_WEIGHTS["accuracy"]           * acc +
            SCORE_WEIGHTS["trained_model_size_kb"]      * size_reward +
            SCORE_WEIGHTS["inference_speed_ms"] * inf_reward +
            SCORE_WEIGHTS["ram_usage_kb"]       * ram_reward
        )
        r["score"] = float(score)

    return model_results

In [None]:
# Cell 7

finished_datasets = load_finished_datasets()

log("[init] Listing OpenML tasks…")

def list_tasks_with_retry(retries=3, backoff=3):

    for attempt in range(1, retries + 1):
        try:
            log(f"[stage] list_tasks attempt {attempt}/{retries}")
            t0 = time.time()

            tasks = openml.tasks.list_tasks(output_format="dataframe")  # ✔️ no filters

            log(f"[stage] list_tasks ✓ in {time.time() - t0:.2f}s, got {len(tasks)} tasks")
            return tasks

        except Exception as e:
            log(f"[warn] list_tasks failed ({type(e).__name__}): {e}")
            if attempt == retries:
                raise
            time.sleep(backoff * attempt)

task_list = list_tasks_with_retry()

df = task_list

if "task_type" in df.columns:
    before = len(df)
    df = df[df["task_type"] == "Supervised Classification"]
    log(f"[filter] task_type='Supervised Classification' → kept {len(df)}/{before}")
else:
    log("[filter] WARNING: task_type column missing")

if ONLY_ACTIVE and "status" in df.columns:
    before = len(df)
    df = df[df["status"].str.lower() == "active"]
    log(f"[filter] status='active' → kept {len(df)}/{before}")

if "did" in df.columns:
    before = len(df)
    df = df[~df["did"].isin(SKIP_DATASET_IDS)]
    log(f"[filter] dataset blacklist → kept {len(df)}/{before}")

if "NumberOfInstances" in df.columns:
    before = len(df)
    df = df[
        (df["NumberOfInstances"] >= MIN_TRAIN_ROWS) &
        (df["NumberOfInstances"] <= MAX_TRAIN_ROWS)
    ]
    log(f"[filter] {MIN_TRAIN_ROWS} ≤ NumberOfInstances ≤ {MAX_TRAIN_ROWS} → kept {len(df)}/{before}")
else:
    log("[filter] NumberOfInstances column missing → CANNOT filter by instance count")

if "NumberOfFeatures" in df.columns:
    before = len(df)
    df = df[
        (df["NumberOfFeatures"] >= MIN_FEATURES) &
        (df["NumberOfFeatures"] <= MAX_FEATURES)
    ]
    log(
        f"[filter] {MIN_FEATURES} ≤ NumberOfFeatures ≤ {MAX_FEATURES} "
        f"→ kept {len(df)}/{before}"
    )
else:
    log("[filter] NumberOfFeatures column missing → CANNOT filter by feature count")

task_ids = [int(t) for t in df["tid"].tolist()]

if SHUFFLE_TASKS:
    RNG.shuffle(task_ids)

if MAX_TASKS is not None:
    task_ids = task_ids[:MAX_TASKS]

log(f"[init] Selected {len(task_ids)} tasks for processing")

processed_count = 0

for idx, task_id in enumerate(task_ids, start=1):
    if task_id in SKIP_TASK_IDS:
        continue

    dataset_id = None
    name = "UNKNOWN"
    try:
        tmp_task = openml.tasks.get_task(task_id)
        dataset_id = int(tmp_task.dataset_id)
        name = None        
    except Exception:
        dataset_id = None
        name = None

    if dataset_id is not None and dataset_id in finished_datasets:
        continue

    if dataset_id is not None and dataset_id in SKIP_DATASET_IDS:
        log(f"[skip][dataset_blacklist] did={dataset_id} (in SKIP_DATASET_IDS)")
        mark_dataset_finished(dataset_id)
        continue

    log(f"[dataset] #{idx}/{len(task_ids)} → tid={task_id} did={dataset_id} name='{name}'")

    try:
        task, dataset, X, y, cat_ind, attr_names, dataset_name = fetch_task_and_data(task_id)
        dataset_id = int(dataset.dataset_id)
        name = dataset_name
        log(f"[fetch] did={dataset_id} name='{name}' n_samples={X.shape[0]} n_features={X.shape[1]}")
    except TaskNotSupported as e:
        log(f"[skip][task_type] tid={task_id} reason={e}")
        if dataset_id is not None:
            mark_dataset_finished(dataset_id)
        continue
    except Exception as e:
        log(f"[skip][fetch_error] tid={task_id} did={dataset_id} error={type(e).__name__}: {e}")
        if dataset_id is not None:
            mark_dataset_finished(dataset_id)
        continue

    if dataset_id in SKIP_DATASET_IDS:
        log(f"[skip][dataset_blacklist] did={dataset_id} (in SKIP_DATASET_IDS)")
        mark_dataset_finished(dataset_id)
        continue

    if MAX_TRAIN_ROWS is not None and X.shape[0] > MAX_TRAIN_ROWS:
        log(f"[dataset] did={dataset_id} subsampling from n={X.shape[0]} to {MAX_TRAIN_ROWS}")
        idx_rows = RNG.choice(X.shape[0], size=MAX_TRAIN_ROWS, replace=False)
        X = X.iloc[idx_rows].reset_index(drop=True)
        y = np.asarray(y)[idx_rows]

    try:
        X, y = sk_shuffle(X, y, random_state=RANDOM_SEED)
        y_array = np.asarray(y)

        X_train, X_test, y_train, y_test = train_test_split(
            X, y_array, test_size=0.25, random_state=RANDOM_SEED,
            stratify=y_array if np.unique(y_array).shape[0] > 1 else None
        )
    except Exception as e:
        log(f"[skip][split] did={dataset_id} error={type(e).__name__}: {e}")
        mark_dataset_finished(dataset_id)
        continue

    try:
        preprocessor, numeric_mask = build_preprocessor(X_train)
    except Exception as e:
        log(f"[skip][preprocess] did={dataset_id} error={type(e).__name__}: {e}")
        mark_dataset_finished(dataset_id)
        continue

    try:
        meta = compute_meta(X_train, y_train, numeric_mask)
        if meta is None:
            log(f"[skip][meta] did={dataset_id} reason=landmark_timeout_or_too_few_rows")
            mark_dataset_finished(dataset_id)
            continue
        log(f"[meta] did={dataset_id} meta-features+landmarks computed")
    except Exception as e:
        log(f"[skip][meta_error] did={dataset_id} error={type(e).__name__}: {e}")
        mark_dataset_finished(dataset_id)
        continue

    t_start = time.time()
    model_results = []
    timed_out = False
    model_failed = False  

    for i, model_name in enumerate(TRAINING_ORDER, start=1):
        if torch is not None and torch.cuda.is_available():
            torch.cuda.empty_cache()
        gc.collect()

        if time.time() - t_start > TRAIN_TIMEOUT_S:
            timed_out = True
            break

        log(f"[model] did={dataset_id} [{i}/{len(ENABLED_MODELS)}] starting model={model_name}")
        result = train_and_eval_model(
            model_name=model_name,
            preprocessor=preprocessor,
            X_train=X_train,
            y_train=y_train,
            X_test=X_test,
            y_test=y_test,
        )

        if result is None:
            model_failed = True
            log(
                f"[warn][model] did={dataset_id} model={model_name} failed or unavailable "
                f"→ aborting remaining models for this dataset"
            )
            break

        model_results.append(result)

    if timed_out:
        log(f"[timeout] did={dataset_id} training exceeded {TRAIN_TIMEOUT_S:.1f}s → skipping dataset (no rows)")
        mark_dataset_finished(dataset_id)
        prune_cache_if_needed()
        free_memory()
        continue

    if model_failed or len(model_results) != len(ENABLED_MODELS):
        log(
            f"[skip][models_incomplete] did={dataset_id} "
            f"success_models={len(model_results)}/{len(ENABLED_MODELS)}"
        )
        mark_dataset_finished(dataset_id)
        prune_cache_if_needed()
        free_memory()
        continue

    model_results = apply_scores_per_dataset(model_results)

    model_order_map = {name: i for i, name in enumerate(ENABLED_MODELS)}

    def sort_key(result):
        return model_order_map.get(result["model_name"], float('inf'))

    model_results.sort(key=sort_key)

    for res in model_results:
        try:
            log(
                f"[score] did={dataset_id} model={res['model_name']} "
                f"acc={res['accuracy']:.4f} f1={res['f1_macro']:.4f} score={res['score']:.4f}"
            )
        except Exception:
            log(
                f"[score] did={dataset_id} model={res.get('model_name','?')} "
                f"acc={res.get('accuracy')} f1={res.get('f1_macro')} score={res.get('score')}"
            )

    rows_to_append = []
    for res in model_results:
        mname = res["model_name"]
        caps = MODEL_CAPABILITIES.get(mname, {})

        row = {
            "Task_id":          int(task_id),
            "dataset_id":       int(dataset_id),
            "dataset_name":     name,

            "n_samples":                    meta.get("n_samples"),
            "n_features":                   meta.get("n_features"),
            "n_numeric_features":           meta.get("n_numeric_features"),
            "n_categorical_features":       meta.get("n_categorical_features"),
            "n_binary_features":            meta.get("n_binary_features"),
            "n_classes":                    meta.get("n_classes"),
            "class_balance_std":            meta.get("class_balance_std"),
            "class_entropy":                meta.get("class_entropy"),
            "mean_feature_variance":        meta.get("mean_feature_variance"),
            "median_feature_variance":      meta.get("median_feature_variance"),
            "mean_corr_abs":                meta.get("mean_corr_abs"),
            "max_corr_abs":                 meta.get("max_corr_abs"),
            "feature_skewness_mean":        meta.get("feature_skewness_mean"),
            "feature_kurtosis_mean":        meta.get("feature_kurtosis_mean"),
            "missing_percentage":           meta.get("missing_percentage"),
            "avg_cardinality_categorical":  meta.get("avg_cardinality_categorical"),
            "complexity_ratio":             meta.get("complexity_ratio"),
            "intrinsic_dim_estimate":       meta.get("intrinsic_dim_estimate"),

            "landmark_lr_accuracy":          meta.get("landmark_lr_accuracy"),
            "landmark_dt_depth3_accuracy":   meta.get("landmark_dt_depth3_accuracy"),
            "landmark_knn3_accuracy":        meta.get("landmark_knn3_accuracy"),
            "landmark_random_noise_accuracy": meta.get("landmark_random_noise_accuracy"),
            "fisher_discriminant_ratio":     meta.get("fisher_discriminant_ratio"),

            "model_id":                    caps.get("model_id"),
            "model_name":                  caps.get("model_name", mname),
            "model_family":                caps.get("model_family"),
            "is_deep_learning":            caps.get("is_deep_learning"),
            "is_tree_based":               caps.get("is_tree_based"),
            "is_linear":                   caps.get("is_linear"),

            "parameterization_type":        caps.get("parameterization_type"),
            "complexity_training_big_o":    caps.get("complexity_training_big_o"),
            "complexity_inference_big_o":   caps.get("complexity_inference_big_o"),
            "is_probabilistic":             caps.get("is_probabilistic"),
            "is_ensemble_model":            caps.get("is_ensemble_model"),
            "regularization_supported":     caps.get("regularization_supported"),
            "supports_multiclass_natively": caps.get("supports_multiclass_natively"),
            "supports_online_learning":     caps.get("supports_online_learning"),

            "supports_multiple_trees":  caps.get("supports_multiple_trees"),
            "tree_growth_strategy":     caps.get("tree_growth_strategy"),
            "default_max_depth":        caps.get("default_max_depth"),
            "supports_pruning":         caps.get("supports_pruning"),
            "splitting_criterion":      caps.get("splitting_criterion"),

            "architecture_type":          caps.get("architecture_type"),
            "supports_dropout":           caps.get("supports_dropout"),
            "supports_batchnorm":         caps.get("supports_batchnorm"),
            "default_activation":         caps.get("default_activation"),
            "supports_cuda_acceleration": caps.get("supports_cuda_acceleration"),

            "supports_non_linearity":        caps.get("supports_non_linearity"),
            "supports_categorical_directly": caps.get("supports_categorical_directly"),
            "supports_missing_values":       caps.get("supports_missing_values"),
            "supports_gpu":                  caps.get("supports_gpu"),

            "n_estimators":     caps.get("n_estimators", 0),
            "avg_tree_depth":   caps.get("avg_tree_depth", 0.0),
            "max_tree_depth":   caps.get("max_tree_depth", 0),
            "n_leaves_mean":    caps.get("n_leaves_mean", 0.0),

            "n_layers":             caps.get("n_layers", 0),
            "hidden_units_mean":    caps.get("hidden_units_mean", 0.0),
            "dropout_rate_mean":    caps.get("dropout_rate_mean", 0.0),
            "activation_type":      caps.get("activation_type", "none"),
            "batch_size":           caps.get("batch_size", 0),
            "epochs":               caps.get("epochs", 0),

            "accuracy":                res["accuracy"],
            "f1_macro":                res["f1_macro"],
            "precision_macro":         res["precision_macro"],
            "trained_model_size_kb":   res["trained_model_size_kb"],       
            "inference_speed_ms":      res["inference_speed_ms"],
            "static_usage_ram_kb":     res["static_ram_kb"],             
            "dynamic_usage_ram_kb":    res["dynamic_ram_kb"],            
            "full_ram_usage_kb":       res["ram_usage_kb"],
            "model_n_parameters":      res["model_n_parameters"],
            "score":                   res["score"],
        }

        rows_to_append.append(row)

    _append_csv(OUTPUT_CSV, rows_to_append)

    elapsed = time.time() - t_start
    best = max(model_results, key=lambda r: r["score"])
    log(
        f"[dataset] did={dataset_id} completed {len(rows_to_append)} models in {elapsed:.1f}s "
        f"| best_model={best['model_name']} acc={best['accuracy']:.4f} score={best['score']:.4f}"
    )

    mark_dataset_finished(dataset_id)
    processed_count += 1

    if MAX_DATASETS is not None and processed_count >= MAX_DATASETS:
        log(f"[done] Reached MAX_DATASETS={MAX_DATASETS}, stopping early")
        break

    prune_cache_if_needed()
    free_memory()

log(f"[done] Processed {processed_count} datasets (full pipeline)")

In [None]:
# Cell 8 

from pathlib import Path

if Path(OUTPUT_CSV).exists():
    df = pd.read_csv(OUTPUT_CSV)
    print(f"CSV shape: {df.shape}")
    display(df.tail(20))
    print("Unique dataset_ids:", df["dataset_id"].nunique())
else:
    print(f"No CSV found at {OUTPUT_CSV} yet.")