In [None]:
!pip -q install numpy==1.26.4 pandas==2.2.2 scikit-learn==1.5.2 lightgbm==4.5.0 \
               sdv==1.16.0 ctgan==0.10.2

import os, time, numpy as np, pandas as pd
from pathlib import Path

from sklearn.model_selection import train_test_split
from sklearn.metrics import roc_auc_score, f1_score, accuracy_score, precision_score, recall_score
from sklearn.pipeline import Pipeline
from sklearn.base import TransformerMixin, BaseEstimator

from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from lightgbm import LGBMClassifier
from sklearn.svm import LinearSVC

from sdv.single_table import CTGANSynthesizer, TVAESynthesizer
from sdv.metadata import SingleTableMetadata

from google.colab import drive
drive.mount('/content/drive', force_remount=True)

RSEED = 42
np.random.seed(RSEED)

PROJ = "/content/drive/MyDrive/dissertation"
DATA_DIR = f"{PROJ}/data"
OUT_ROOT = f"{PROJ}/outputs/synthetic_eval_numeric_only"
os.makedirs(OUT_ROOT, exist_ok=True)


Mounted at /content/drive


In [None]:
# Datasets: numeric-only baseline for 04C
DATASETS = {
    "adult": {
        "path": f"{DATA_DIR}/Adult_clean.csv",
        "target_candidates": ["income_bin","Income_bin","income","Income","target","class"],
        "use_ctgan": True,
        "use_tvae": True
    },
    "petfinder": {
        "path": f"{DATA_DIR}/Petfinder_clean.csv",
        "target_candidates": ["AdoptionSpeed_bin","adoption_bin","AdoptionSpeed"],
        "use_ctgan": True,
        "use_tvae": True
    },
    "breast": {
        "path": f"{DATA_DIR}/Breast_clean.csv",
        "target_candidates": ["OS5yr_bin","os5_bin","OS5yr"],
        "use_ctgan": False,
        "use_tvae": True
    }
}

CONFIG = {
    "train_frac": 0.6,
    "ctgan": {"epochs": 60, "batch_size": 256, "pac": 1, "verbose": True},
    "tvae":  {"epochs": 60, "batch_size": 256, "compress_dims": [256,128]},
    "models": {
        "LR":     {"type": "lr",     "params": {"max_iter": 1000, "n_jobs": -1}},
        "LinSVM": {"type": "linsvm", "params": {"C": 1.0}},
        "RF":     {"type": "rf",     "params": {"n_estimators": 400, "max_depth": None, "n_jobs": -1,
                                                "random_state": RSEED}},
        "LGBM":   {"type": "lgbm",   "params": {"n_estimators": 500, "num_leaves": 63,
                                                "random_state": RSEED}}
    }
}


In [None]:
RUNLOG = []

def _timed(name):
    def deco(fn):
        def inner(*a, **k):
            t0 = time.time()
            out = fn(*a, **k)
            dt = time.time() - t0
            RUNLOG.append({"step": name, "seconds": dt})
            print(f"⏱ {name}: {dt:.2f}s")
            return out
        return inner
    return deco

def save_runlog(out_dir, tag):
    p = f"{out_dir}/timings_{tag}.csv"
    pd.DataFrame(RUNLOG).to_csv(p, index=False)
    print(f"timings saved → {p}")

def pick_target(df, candidates):
    for c in candidates:
        if c in df.columns:
            vals = df[c]
            if set(pd.unique(vals.dropna())) <= {0, 1}:
                return c, df
            sv = vals.astype(str)
            if set(sv.unique()) <= {">50K","<=50K",">50K.","<=50K."}:
                mapped = sv.replace({">50K":1,"<=50K":0,">50K.":1,"<=50K.":0}).astype(int)
                df2 = df.copy(); df2[c] = mapped
                return c, df2
            try:
                uniq = set(pd.unique(pd.to_numeric(vals, errors="coerce").dropna()))
                if uniq <= {0.0, 1.0}:
                    df2 = df.copy(); df2[c] = pd.to_numeric(vals, errors="coerce").fillna(0).astype(int)
                    return c, df2
            except Exception:
                pass
            u = pd.Index(vals.dropna().unique())
            if len(u) == 2:
                lab = sorted(map(str, u))
                m = {lab[0]:0, lab[1]:1}
                df2 = df.copy(); df2[c] = vals.astype(str).map(m).astype(int)
                return c, df2
    raise AssertionError(f"No valid target found among candidates: {candidates}")

def infer_numeric_only(X):
    return [c for c in X.columns if np.issubdtype(X[c].dtype, np.number)]

class ToDense(BaseEstimator, TransformerMixin):
    def fit(self, X, y=None): return self
    def transform(self, X): return X.toarray() if hasattr(X, "toarray") else X

def make_model(name):
    kind   = CONFIG["models"][name]["type"]
    params = CONFIG["models"][name]["params"]
    if kind == "lr":     return LogisticRegression(**params)
    if kind == "linsvm": return LinearSVC(**params)
    if kind == "rf":     return RandomForestClassifier(**params)
    if kind == "lgbm":   return LGBMClassifier(**params)
    raise ValueError(name)

def build_pipeline(model_name, num_cols):
    from sklearn.compose import ColumnTransformer
    pre = ColumnTransformer([("num", "passthrough", num_cols)])
    mdl = make_model(model_name)
    needs_dense = isinstance(mdl, (RandomForestClassifier, LGBMClassifier))
    steps = [("prep", pre)]
    if needs_dense: steps.append(("todense", ToDense()))
    steps.append(("clf", mdl))
    from sklearn.pipeline import Pipeline
    return Pipeline(steps)

def predict_proba_like(pipe, X):
    if hasattr(pipe, "predict_proba"):
        return pipe.predict_proba(X)[:, 1]
    if hasattr(pipe, "decision_function"):
        d = pipe.decision_function(X)
        return (d - d.min()) / (d.max() - d.min() + 1e-9)
    return pipe.predict(X).astype(float)

def score_suite(tag, X_train, y_train, X_eval, y_eval, num_cols, out_dir):
    rows = []
    for name in CONFIG["models"].keys():
        print(f"→ {tag} / {name}")
        pipe = build_pipeline(name, num_cols)
        pipe.fit(X_train, y_train)
        p = predict_proba_like(pipe, X_eval)
        pred = (p > 0.5).astype(int)
        rows.append(dict(
            Tag=tag, Model=name,
            AUC = roc_auc_score(y_eval, p),
            F1  = f1_score(y_eval, pred, zero_division=0),
            ACC = accuracy_score(y_eval, pred),
            PREC= precision_score(y_eval, pred, zero_division=0),
            REC = recall_score(y_eval, pred, zero_division=0)
        ))
    df = pd.DataFrame(rows)
    csv_path = f"{out_dir}/{tag}.csv"
    df.to_csv(csv_path, index=False)
    print(" saved:", csv_path)
    return df


In [None]:
@_timed("run_dataset")
def run_dataset(ds_name):
    ds_cfg  = DATASETS[ds_name]
    out_dir = f"{OUT_ROOT}/{ds_name}"
    os.makedirs(out_dir, exist_ok=True)

    # Load
    df = pd.read_csv(ds_cfg["path"], low_memory=False)

    # Target
    target, df = pick_target(df, ds_cfg["target_candidates"])
    print(f"\n[{ds_name}] Using target: {target}")
    y = df[target].astype(int)
    X = df.drop(columns=[target])

    # Numeric-only view
    all_num_cols = infer_numeric_only(X)
    print(f"Numeric columns kept ({len(all_num_cols)}):", all_num_cols[:10], "..." if len(all_num_cols) > 10 else "")
    X = X[all_num_cols].copy()
    for c in all_num_cols:
        X[c] = pd.to_numeric(X[c], errors="coerce")
    X = X.replace([np.inf, -np.inf], np.nan).fillna(X.median(numeric_only=True))

    # Splits
    X_tr, X_tmp, y_tr, y_tmp = train_test_split(
        X, y, train_size=CONFIG["train_frac"], stratify=y, random_state=RSEED
    )
    X_va, X_te, y_va, y_te = train_test_split(
        X_tmp, y_tmp, test_size=0.5, stratify=y_tmp, random_state=RSEED
    )
    print("Shapes (train / val / test):", X_tr.shape, X_va.shape, X_te.shape)

    # SDV metadata: numeric-only + target as CATEGORICAL
    @_timed(f"{ds_name}:build_metadata")
    def build_metadata_full(Xtrain, ytrain):
        train_full = Xtrain.copy()
        train_full[target] = ytrain.values
        meta = SingleTableMetadata()
        meta.detect_from_dataframe(train_full)
        for c in all_num_cols:
            meta.update_column(c, sdtype="numerical")
        meta.update_column(target, sdtype="categorical")  # critical for preserving class prior
        meta.validate()
        return meta, train_full

    meta_full, train_full = build_metadata_full(X_tr, y_tr)

    # Fit generators
    ctgan = None; tvae = None
    if ds_cfg.get("use_ctgan", True):
        @_timed(f"{ds_name}:fit_CTGAN")
        def fit_ctgan(train_full, meta):
            cfg = CONFIG["ctgan"]
            synth = CTGANSynthesizer(metadata=meta, epochs=cfg["epochs"],
                                     batch_size=cfg["batch_size"], pac=cfg["pac"],
                                     verbose=cfg["verbose"])
            synth.fit(train_full); return synth
        ctgan = fit_ctgan(train_full, meta_full)
    else:
        print(f"[{ds_name}] Skipping CTGAN by config.")

    if ds_cfg.get("use_tvae", True):
        @_timed(f"{ds_name}:fit_TVAE")
        def fit_tvae(train_full, meta):
            cfg = CONFIG["tvae"]
            synth = TVAESynthesizer(metadata=meta, epochs=cfg["epochs"],
                                    batch_size=cfg["batch_size"], compress_dims=cfg["compress_dims"])
            synth.fit(train_full); return synth
        tvae = fit_tvae(train_full, meta_full)
    else:
        print(f"[{ds_name}] Skipping TVAE by config.")

    # ---------- Conditioned sampling helpers ----------
    def _has_sample_conditions(synth):
        return hasattr(synth, "sample_conditions") or hasattr(synth, "sample_remaining_columns")

    def _real_label_mix(y_train, force_balanced=False):
        if force_balanced: return {0: 0.5, 1: 0.5}
        p1 = float((y_train == 1).mean()); return {0: 1 - p1, 1: p1}

    @_timed(f"{ds_name}:sample_conditioned")
    def sample_by_target(synth, n_rows, y_train, tag, force_balanced=False):
        """Try (1) sample_conditions, (2) sample_remaining_columns(df), (3) unconditional, (4) force labels."""
        if synth is None: return None
        mix = _real_label_mix(y_train, force_balanced=force_balanced)

        # (1) SDV >= 1.5: sample_conditions(list-of-dicts)
        if hasattr(synth, "sample_conditions"):
            try:
                parts = []
                for lbl, frac in mix.items():
                    k = max(1, int(round(n_rows * frac)))
                    conds = [{target: int(lbl)}] * k
                    parts.append(synth.sample_conditions(conds))
                return pd.concat(parts, axis=0, ignore_index=True)
            except Exception as e:
                print(f"[{tag}] sample_conditions failed: {e}")

        # (2) Older API: sample_remaining_columns(conditions_df) — positional arg only
        if hasattr(synth, "sample_remaining_columns"):
            try:
                parts = []
                for lbl, frac in mix.items():
                    k = max(1, int(round(n_rows * frac)))
                    cond_df = pd.DataFrame({target: [int(lbl)] * k})
                    parts.append(synth.sample_remaining_columns(cond_df))  # ← positional
                return pd.concat(parts, axis=0, ignore_index=True)
            except Exception as e:
                print(f"[{tag}] sample_remaining_columns failed: {e}")

        # (3) Fallback: unconditional attempts; accept first bi-class batch
        tries, pool = 0, []
        while tries < 5:
            try:
                pool.append(synth.sample(num_rows=n_rows))
            except Exception as e:
                print(f"[{tag}] unconditional sample failed: {e}")
                break
            tries += 1
            df_all = pd.concat(pool, axis=0, ignore_index=True)
            if df_all[target].nunique(dropna=False) >= 2:
                return df_all.sample(n=n_rows, replace=False, random_state=RSEED).reset_index(drop=True)

        # (4) Last-resort: unconditional one shot + force label column to required mix
        try:
            df_force = synth.sample(num_rows=n_rows)
            # overwrite target to match mix
            n1 = int(round(n_rows * mix[1])); idx = np.random.RandomState(RSEED).permutation(n_rows)
            y_forced = np.zeros(n_rows, dtype=int); y_forced[idx[:n1]] = 1
            df_force[target] = y_forced
            return df_force
        except Exception as e:
            print(f"[{tag}] final fallback failed: {e}")
            return None
    # --------------------------------------------------

    # Conditioned sampling (aim to preserve label mix)
    syn_ctgan = sample_by_target(ctgan, len(train_full), y_tr, "CTGAN")
    syn_tvae  = sample_by_target(tvae,  len(train_full), y_tr, "TVAE")

    # Split to X/y (numeric-only) + label summaries
    def split_xy(df_syn):
        y_syn = pd.to_numeric(df_syn[target], errors="coerce").round().clip(0,1).astype(int)
        X_syn = df_syn.drop(columns=[target])[all_num_cols].copy()
        for c in all_num_cols:
            X_syn[c] = pd.to_numeric(X_syn[c], errors="coerce")
        X_syn = X_syn.replace([np.inf, -np.inf], np.nan).fillna(X_syn.median(numeric_only=True))
        return X_syn, y_syn

    def _summarize_label(tag, yv):
        pos = int(pd.Series(yv).sum()); neg = int(len(yv) - pos)
        tot = pos + neg
        print(f"[{tag}] pos={pos} ({pos/tot:.3f}), neg={neg} ({neg/tot:.3f})")

    if syn_ctgan is not None:
        X_syn_ctgan, y_syn_ctgan = split_xy(syn_ctgan)
        _summarize_label("CTGAN", y_syn_ctgan)
    else:
        X_syn_ctgan = y_syn_ctgan = None

    if syn_tvae is not None:
        X_syn_tvae, y_syn_tvae = split_xy(syn_tvae)
        _summarize_label("TVAE", y_syn_tvae)
    else:
        X_syn_tvae = y_syn_tvae = None

    # Guards to skip single-class synthetic sets
    def _ok(yv): return yv is not None and pd.Series(yv).nunique(dropna=False) >= 2

    # TSTR/TRTS on numeric-only view
    summaries = []
    if _ok(y_syn_ctgan):
        summaries.append(score_suite("tstr_ctgan", X_syn_ctgan, y_syn_ctgan, X_te, y_te, all_num_cols, out_dir))
    else:
        print(" Skipping TSTR_CTGAN: synthetic labels not bi-class.")
    if _ok(y_syn_tvae):
        summaries.append(score_suite("tstr_tvae",  X_syn_tvae,  y_syn_tvae,  X_te, y_te, all_num_cols, out_dir))
    else:
        print(" Skipping TSTR_TVAE: synthetic labels not bi-class.")

    if _ok(y_syn_ctgan):
        summaries.append(score_suite("trts_ctgan", X_tr, y_tr, X_syn_ctgan, y_syn_ctgan, all_num_cols, out_dir))
    else:
        print(" Skipping TRTS_CTGAN: synthetic labels not bi-class.")
    if _ok(y_syn_tvae):
        summaries.append(score_suite("trts_tvae",  X_tr, y_tr, X_syn_tvae,  y_syn_tvae,  all_num_cols, out_dir))
    else:
        print(" Skipping TRTS_TVAE: synthetic labels not bi-class.")

    if len(summaries) == 0:
        print(" No valid summaries for this dataset (all generators collapsed labels).")
        summary = pd.DataFrame(columns=["Tag","Model","AUC","F1","ACC","PREC","REC"])
    else:
        summary = pd.concat(summaries, axis=0)

    summary.to_csv(f"{out_dir}/summary_baseline.csv", index=False)
    print(" summary →", f"{out_dir}/summary_baseline.csv")
    save_runlog(out_dir, f"{ds_name}_baseline")
    return summary

# Run subset + pretty tables (unchanged)
to_run = ["adult","petfinder","breast"]
all_summaries = []
for ds in to_run:
    print("\n" + "="*80)
    print(f"Running baseline (numeric-only) for: {ds.upper()}")
    print("="*80)
    RUNLOG.clear()
    all_summaries.append(run_dataset(ds))

combined = pd.concat(all_summaries, axis=0, ignore_index=True)
combined.to_csv(f"{OUT_ROOT}/summary_baseline_combined.csv", index=False)
print("\n Combined summary →", f"{OUT_ROOT}/summary_baseline_combined.csv")

def _fmt_setting(tag: str) -> str: return tag.upper()

def make_human_table(df: pd.DataFrame, include_dataset=False) -> pd.DataFrame:
    out = df.copy()
    if len(out) == 0:
        cols = (["Dataset"] if include_dataset else []) + ["Setting","Model","AUC","F1","ACC","PREC","REC"]
        return pd.DataFrame(columns=cols)
    out["Setting"] = out["Tag"].apply(_fmt_setting)
    cols = ["Setting","Model","AUC","F1","ACC","PREC","REC"]
    if include_dataset:
        out["Dataset"] = out.get("Dataset","")
        cols = ["Dataset"] + cols
    for c in ["AUC","F1","ACC","PREC","REC"]:
        out[c] = out[c].astype(float).round(3)
    out = out[cols].sort_values(["Setting","Model"]).reset_index(drop=True)
    return out

for ds_name, df_sum in zip(to_run, all_summaries):
    pretty = make_human_table(df_sum)
    path = f"{OUT_ROOT}/{ds_name}/summary_pretty.csv"
    pretty.to_csv(path, index=False)
    print(f" Pretty summary for {ds_name} → {path}")

stacked = []
for ds_name, df_sum in zip(to_run, all_summaries):
    tmp = df_sum.copy()
    if len(tmp): tmp["Dataset"] = ds_name.upper()
    stacked.append(tmp)
stacked = pd.concat(stacked, axis=0, ignore_index=True)

pretty_combined = make_human_table(stacked, include_dataset=True)
pretty_combined.to_csv(f"{OUT_ROOT}/summary_pretty_combined.csv", index=False)
print(" Pretty combined summary →", f"{OUT_ROOT}/summary_pretty_combined.csv")

example = pretty_combined[pretty_combined["Dataset"]=="ADULT"].drop(columns=["Dataset"], errors="ignore")
print("\nExample (ADULT):")
display(example)



Running baseline (numeric-only) for: ADULT

[adult] Using target: income
Numeric columns kept (5): ['age', 'education_num', 'capital_gain', 'capital_loss', 'hours_per_week'] 
Shapes (train / val / test): (29305, 5) (9768, 5) (9769, 5)
⏱ adult:build_metadata: 0.21s


Gen. (-0.99) | Discrim. (-0.02): 100%|██████████| 60/60 [08:40<00:00,  8.67s/it]


⏱ adult:fit_CTGAN: 573.52s
⏱ adult:fit_TVAE: 220.68s


Sampling remaining columns: 100%|██████████| 22293/22293 [00:02<00:00, 10144.30it/s]
Sampling remaining columns: 100%|██████████| 7012/7012 [00:00<00:00, 9930.65it/s] 


⏱ adult:sample_conditioned: 2.93s


Sampling remaining columns: 100%|██████████| 22293/22293 [00:00<00:00, 30865.02it/s]
Sampling remaining columns: 100%|██████████| 7012/7012 [00:00<00:00, 10027.98it/s]


⏱ adult:sample_conditioned: 1.45s
[CTGAN] pos=7012 (0.239), neg=22293 (0.761)
[TVAE] pos=7012 (0.239), neg=22293 (0.761)
→ tstr_ctgan / LR
→ tstr_ctgan / LinSVM
→ tstr_ctgan / RF
→ tstr_ctgan / LGBM
[LightGBM] [Info] Number of positive: 7012, number of negative: 22293
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.002802 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 671
[LightGBM] [Info] Number of data points in the train set: 29305, number of used features: 5
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.239277 -> initscore=-1.156650
[LightGBM] [Info] Start training from score -1.156650
 saved: /content/drive/MyDrive/dissertation/outputs/synthetic_eval_numeric_only/adult/tstr_ctgan.csv
→ tstr_tvae / LR
→ tstr_tvae / LinSVM
→ tstr_tvae / RF
→ tstr_tvae / LGBM
[LightGBM] [Info] Number of positive: 7012, number of negative: 22293
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the ove

Gen. (-1.10) | Discrim. (-0.05): 100%|██████████| 60/60 [01:53<00:00,  1.90s/it]


⏱ petfinder:fit_CTGAN: 117.99s
⏱ petfinder:fit_TVAE: 27.12s


Sampling remaining columns: 100%|██████████| 5265/5265 [00:00<00:00, 11374.19it/s]
Sampling remaining columns: 100%|██████████| 1657/1657 [00:00<00:00, 2934.52it/s]


⏱ petfinder:sample_conditioned: 1.04s


Sampling remaining columns: 100%|██████████| 5265/5265 [00:00<00:00, 27148.78it/s]
Sampling remaining columns: 100%|██████████| 1657/1657 [00:00<00:00, 8049.04it/s]


⏱ petfinder:sample_conditioned: 0.42s
[CTGAN] pos=1657 (0.239), neg=5265 (0.761)
[TVAE] pos=1657 (0.239), neg=5265 (0.761)
→ tstr_ctgan / LR
→ tstr_ctgan / LinSVM
→ tstr_ctgan / RF
→ tstr_ctgan / LGBM
[LightGBM] [Info] Number of positive: 1657, number of negative: 5265
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000298 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 307
[LightGBM] [Info] Number of data points in the train set: 6922, number of used features: 3
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.239382 -> initscore=-1.156072
[LightGBM] [Info] Start training from score -1.156072
 saved: /content/drive/MyDrive/dissertation/outputs/synthetic_eval_numeric_only/petfinder/tstr_ctgan.csv
→ tstr_tvae / LR
→ tstr_tvae / LinSVM
→ tstr_tvae / RF
→ tstr_tvae / LGBM
[LightGBM] [Info] Number of positive: 1657, number of negative: 5265
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the 



⏱ breast:fit_TVAE: 1026.53s
⏱ breast:sample_conditioned: 0.00s


Sampling remaining columns: 100%|██████████| 25088/25088 [00:04<00:00, 5941.92it/s]
Sampling remaining columns: 100%|██████████| 10782/10782 [00:05<00:00, 1883.84it/s]

⏱ breast:sample_conditioned: 9.98s
[TVAE] pos=10782 (0.301), neg=25088 (0.699)
⚠️  Skipping TSTR_CTGAN: synthetic labels not bi-class.
→ tstr_tvae / LR





→ tstr_tvae / LinSVM
→ tstr_tvae / RF
→ tstr_tvae / LGBM
[LightGBM] [Info] Number of positive: 10782, number of negative: 25088
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.012820 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2540
[LightGBM] [Info] Number of data points in the train set: 35870, number of used features: 54
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.300585 -> initscore=-0.844512
[LightGBM] [Info] Start training from score -0.844512
 saved: /content/drive/MyDrive/dissertation/outputs/synthetic_eval_numeric_only/breast/tstr_tvae.csv
⚠️  Skipping TRTS_CTGAN: synthetic labels not bi-class.
→ trts_tvae / LR
→ trts_tvae / LinSVM
→ trts_tvae / RF
→ trts_tvae / LGBM
[LightGBM] [Info] Number of positive: 10782, number of negative: 25088
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing w

Unnamed: 0,Setting,Model,AUC,F1,ACC,PREC,REC
0,TRTS_CTGAN,LGBM,0.749,0.407,0.797,0.679,0.291
2,TRTS_CTGAN,LR,0.819,0.474,0.803,0.656,0.371
4,TRTS_CTGAN,LinSVM,0.817,0.273,0.794,0.885,0.162
6,TRTS_CTGAN,RF,0.806,0.511,0.798,0.608,0.441
8,TRTS_TVAE,LGBM,0.762,0.413,0.806,0.752,0.285
11,TRTS_TVAE,LR,0.865,0.436,0.815,0.802,0.299
14,TRTS_TVAE,LinSVM,0.864,0.231,0.791,0.977,0.131
17,TRTS_TVAE,RF,0.859,0.539,0.815,0.669,0.452
20,TSTR_CTGAN,LGBM,0.81,0.514,0.803,0.627,0.435
22,TSTR_CTGAN,LR,0.829,0.495,0.813,0.699,0.383
