In [None]:
import random
import itertools
import numpy as np
import pandas as pd
from tqdm import tqdm
from Deep_learning_subgroup import ConcreteAutoencoderFeatureSelector
from sklearn.metrics import roc_auc_score
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import ParameterGrid

import tensorflow as tf
from tensorflow.keras.layers import Dense, ReLU
from tensorflow.keras.optimizers import Adam
import warnings
warnings.filterwarnings("ignore")
# ------------------------------------------------------------------
# 0.  Utility: decoder‐factory so we can vary its hidden units
# ------------------------------------------------------------------
def make_decoder(units):
    """Return a decoder function with the requested dense layer widths."""
    def decoder(x):
        for u in units:
            x = Dense(u)(x)
            x = ReLU()(x)
        return Dense(1)(x)
    return decoder

# ------------------------------------------------------------------
# 1.  Hyper-parameter search space
# ------------------------------------------------------------------
param_grid = {
    "K":              [10],           # number of features to select
    "epochs":         [10,20,30,50],             # with early-stopping 50 is plenty
    "batch_size":     [256,512],
    "learning_rate":  [1e-3, 5e-3, 1e-2],
    "start_temp":     [5.0, 10.0],
    "min_temp":       [0.1, 0.5],
    "decoder_units":  [(128, 64, 32), (256, 128, 64)],
}

grid = list(ParameterGrid(param_grid))
print(f"Total models to train: {len(grid)}\n")

# ------------------------------------------------------------------
# 2.  Data – wrap the load logic in a function so we can re-use it
# ------------------------------------------------------------------
def load_split(po, pre, train_idx, test_idx):
    """Read the CSV once and return train / test splits."""
    path = f"/ui/abv/liuzx18/deep learning/simulation_data/simulation_case1_0816/{po}contious_intersection_{pre}.csv"
    df   = pd.read_csv(path).drop(columns=["Unnamed: 0"])
    df_train, df_test = df.iloc[train_idx], df.iloc[test_idx]

    X_train = df_train[[f"x{i}" for i in range(1, 11)]].to_numpy(np.float32)
    X_test  = df_test [[f"x{i}" for i in range(1, 11)]].to_numpy(np.float32)

    y_train = df_train[["y"]].to_numpy(np.float32)
    y_test  = df_test [["y"]].to_numpy(np.float32)

    trt_train = df_train["treatment"].to_numpy(np.float32)
    trt_train_signed = np.where(trt_train == 1, 1, -1).astype(np.float32)

    # Propensity scores π(x)
    pi_train = (
        LogisticRegression()
        .fit(X_train, trt_train)
        .predict_proba(X_train)[:, 1]
        .astype(np.float32)
    )

    g_train = df_train["sigpo"].to_numpy(np.int8)
    g_test  = df_test ["sigpo"].to_numpy(np.int8)

    return X_train, y_train, X_test, y_test, trt_train_signed, pi_train, g_train, g_test

# ------------------------------------------------------------------
# 3.  Train / evaluate one configuration
# ------------------------------------------------------------------
def run_one_cfg(cfg, X_train, y_train, X_test, y_test, trt, pi):
    decoder = make_decoder(cfg["decoder_units"])
    selector = ConcreteAutoencoderFeatureSelector(
        K             = cfg["K"],
        output_function = decoder,
        batch_size    = cfg["batch_size"],
        num_epochs    = cfg["epochs"],
        loss_name     = "A",
        learning_rate = cfg["learning_rate"],
        start_temp    = cfg["start_temp"],
        min_temp      = cfg["min_temp"],
        trt           = trt,
        pi            = pi,
        ver           = 0,         # silent fit
    )

    selector.fit(X_train, y_train)        # no val set; early-stop handles overfit
    y_pred_train = selector.model.predict(X_train, verbose=0)
    y_pred_test  = selector.model.predict(X_test,  verbose=0)

    auc_train = roc_auc_score(-g_train,  y_pred_train)
    auc_test  = roc_auc_score(-g_test,   y_pred_test)
    return auc_train, auc_test, selector

# ------------------------------------------------------------------
# 4.  Main experiment loop
# ------------------------------------------------------------------
val_num = 1          # how many random splits; set >1 for CV
results = []

for po in [0]:
    for pre in [0.7]:
        for split_id in range(val_num):
            # ----- random 80/20 split (without replacement) -----
            train_idx = random.sample(range(1000), 800)
            test_idx  = list(set(range(1000)) - set(train_idx))

            # ----- load data once for this split -----
            (X_train, y_train, X_test, y_test,
             trt, pi, g_train, g_test) = load_split(po, pre, train_idx, test_idx)

            # ----- iterate over the grid -----
            best_auc, best_cfg, best_selector = -np.inf, None, None

            for cfg in tqdm(grid, desc=f"split {split_id+1}", leave=False):
                auc_tr, auc_te, selector = run_one_cfg(cfg, X_train, y_train,
                                                       X_test,  y_test,  trt, pi)
                results.append({
                    "po": po, "pre": pre, "split": split_id,
                    **cfg,
                    "AUC_train": auc_tr,
                    "AUC_test":  auc_te,
                })

                if auc_te > best_auc:
                    best_auc, best_cfg, best_selector = auc_te, cfg, selector

            print(f"▲ Best AUC on split {split_id}: {best_auc:.4f}  cfg={best_cfg}")

# ------------------------------------------------------------------
# 5.  Save a tidy results table
# ------------------------------------------------------------------
df_results = pd.DataFrame(results)
df_results.to_csv("cae_grid_results.csv", index=False)
print("\nFull grid search results written to 'cae_grid_results.csv'")


Total models to train: 192



split 1:  57%|█████▋    | 110/192 [01:45<01:08,  1.19it/s]