In [1]:
from pytorch_tabnet.tab_model import TabNetClassifier
from sklearn.metrics import accuracy_score, classification_report
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import pandas as pd
import numpy as np
import torch
from sklearn.model_selection import StratifiedKFold

In [2]:
train=pd.read_csv('../Unbalanced_c_Dataset.csv',index_col=0)
test_set=pd.read_csv('../testset.csv',index_col=0)

train_copy=train.drop('id',axis=1)
X = train_copy.drop('smoking', axis=1)
y = train_copy['smoking']

test_set=test_set.drop('id',axis=1)
X_test_new= test_set.drop('smoking', axis=1)
y_test_new = test_set['smoking']

In [3]:
import optuna
from pytorch_tabnet.tab_model import TabNetClassifier
from sklearn.metrics import accuracy_score
import numpy as np

skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)

def objective(trial):
    
    n_d = trial.suggest_int("n_d", 16, 64, step=8)        
    n_a = trial.suggest_int("n_a", 16, 64, step=8)        
    n_steps = trial.suggest_int("n_steps", 4, 10)         
    gamma = trial.suggest_float("gamma", 1.0, 2.0)        
    lambda_sparse = trial.suggest_float("lambda_sparse", 1e-6, 1e-4, log=True)  
    lr = trial.suggest_float("lr", 5e-4, 5e-3, log=True)  
    max_epochs = trial.suggest_int("max_epochs", 50, 200) 

    scores = []

    for fold, (train_idx, val_idx) in enumerate(skf.split(X, y)):
        X_train_fold, X_val_fold = X.iloc[train_idx], X.iloc[val_idx]
        y_train_fold, y_val_fold = y.iloc[train_idx], y.iloc[val_idx]   

        scaler = StandardScaler()
        X_train_scaled = scaler.fit_transform(X_train_fold)
        X_val_scaled   = scaler.transform(X_val_fold)

        
        X_train_fold = X_train_scaled.astype(np.float32)
        X_val_fold   = X_val_scaled.astype(np.float32)
        y_train_fold = y_train_fold.values.astype(np.int64)
        y_val_fold   = y_val_fold.values.astype(np.int64)

        model = TabNetClassifier(
            n_d=n_d,
            n_a=n_a,
            n_steps=n_steps,
            gamma=gamma,
            lambda_sparse=lambda_sparse,
            optimizer_fn=torch.optim.Adam,
            optimizer_params=dict(lr=lr),
            verbose=0,
            seed=42
        )

        model.fit(
            X_train_fold, y_train_fold,
            # eval_set=[(X_val_fold, y_val_fold)],
            # eval_metric=['accuracy'],
            max_epochs=max_epochs,
            # patience=15,
            batch_size=1024,
            virtual_batch_size=128
        )

        y_pred_val = model.predict(X_val_fold)
        acc = accuracy_score(y_val_fold, y_pred_val)
        
        scores.append(acc)


    return np.mean(scores)


sampler = optuna.samplers.TPESampler(seed=42)
study = optuna.create_study(direction="maximize", sampler=sampler)
study.optimize(objective, n_trials=5)

print("best_params:", study.best_params)
print("best_value:", study.best_value)


[I 2025-08-23 17:24:21,164] A new study created in memory with name: no-name-f7460e5b-67b5-483f-b294-483d118da778
[I 2025-08-23 17:27:57,824] Trial 0 finished with value: 0.7263333333333334 and parameters: {'n_d': 32, 'n_a': 64, 'n_steps': 9, 'gamma': 1.5986584841970366, 'lambda_sparse': 2.05133826308745e-06, 'lr': 0.0007160849144555758, 'max_epochs': 58}. Best is trial 0 with value: 0.7263333333333334.
[I 2025-08-23 17:33:53,772] Trial 1 finished with value: 0.7666666666666666 and parameters: {'n_d': 64, 'n_a': 48, 'n_steps': 8, 'gamma': 1.0205844942958024, 'lambda_sparse': 8.706020878304853e-05, 'lr': 0.003399481210795565, 'max_epochs': 82}. Best is trial 1 with value: 0.7666666666666666.
[I 2025-08-23 17:39:49,475] Trial 2 finished with value: 0.7542666666666668 and parameters: {'n_d': 24, 'n_a': 24, 'n_steps': 6, 'gamma': 1.524756431632238, 'lambda_sparse': 7.30953983591291e-06, 'lr': 0.0009776854331372624, 'max_epochs': 142}. Best is trial 1 with value: 0.7666666666666666.
[I 2025

best_params: {'n_d': 48, 'n_a': 16, 'n_steps': 8, 'gamma': 1.1705241236872914, 'lambda_sparse': 1.3492834268013243e-06, 'lr': 0.004444833953509465, 'max_epochs': 195}
best_value: 0.7669333333333335


In [4]:

X_train_f, X_val_f, y_train_f, y_val_f = train_test_split(
    X, y, test_size=0.2, random_state=42
)

scaler = StandardScaler()
X_train_f_scaled = scaler.fit_transform(X_train_f)
X_val_f_scaled   = scaler.transform(X_val_f)
X_test_new_scaled  = scaler.transform(X_test_new)


X_train_scaled_np = X_train_f_scaled.astype(np.float32)
y_train_np = y_train_f.values.astype(np.int64)

X_val_scaled_np = X_val_f_scaled.astype(np.float32)
y_val_np = y_val_f.values.astype(np.int64)

X_test_new_scaled_np = X_test_new_scaled.astype(np.float32)
y_test_new_np = y_test_new.values.astype(np.int64)


tabnet_model = TabNetClassifier(
    optimizer_fn=torch.optim.Adam,
    optimizer_params=dict(lr=0.004444833953509465),
    scheduler_params={"step_size":10, "gamma":0.9},
    scheduler_fn=torch.optim.lr_scheduler.StepLR,
    verbose=10,
    n_d=48, n_a=16, n_steps=8, gamma=1.1705241236872914,
    lambda_sparse=1.3492834268013243e-06,
    seed=42
)



tabnet_model.fit(
    X_train=X_train_scaled_np, y_train=y_train_np,
    batch_size=1024,
    virtual_batch_size=128,
    max_epochs=195,
    num_workers=4
)

y_train_pred = tabnet_model.predict(X_train_scaled_np)
train_acc = accuracy_score(y_train_np, y_train_pred)

print(f"TabNet Train Accuracy: {train_acc:.4f}")
# print(classification_report(y_train_np, y_train_pred, digits=4))


y_pred = tabnet_model.predict(X_test_new_scaled_np)
acc = accuracy_score(y_test_new_np, y_pred)

print(f"\n TabNet Test Accuracy: {acc:.4f}")
print(classification_report(y_test_new_np, y_pred, digits=4))



epoch 0  | loss: 1.09108 |  0:00:26s
epoch 10 | loss: 0.57457 |  0:04:48s
epoch 20 | loss: 0.52331 |  0:09:09s
epoch 30 | loss: 0.51117 |  0:13:27s
epoch 40 | loss: 0.49713 |  0:17:45s
epoch 50 | loss: 0.49471 |  0:22:04s
epoch 60 | loss: 0.49091 |  0:26:21s
epoch 70 | loss: 0.48388 |  0:30:36s
epoch 80 | loss: 0.48212 |  0:34:51s
epoch 90 | loss: 0.48392 |  0:39:05s
epoch 100| loss: 0.47284 |  0:43:20s
epoch 110| loss: 0.47149 |  0:47:34s
epoch 120| loss: 0.46612 |  0:51:48s
epoch 130| loss: 0.46416 |  0:56:02s
epoch 140| loss: 0.46545 |  1:00:17s
epoch 150| loss: 0.46545 |  1:04:37s
epoch 160| loss: 0.45923 |  1:08:55s
epoch 170| loss: 0.45668 |  1:13:14s
epoch 180| loss: 0.455   |  1:17:33s
epoch 190| loss: 0.45099 |  1:21:51s
TabNet Train Accuracy: 0.7899

 TabNet Test Accuracy: 0.7825
              precision    recall  f1-score   support

           0     0.8343    0.7050    0.7642      1000
           1     0.7446    0.8600    0.7981      1000

    accuracy                       

In [None]:
skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)
scores = []

for fold, (train_idx, val_idx) in enumerate(skf.split(X, y)):
    X_train_fold, X_val_fold = X.iloc[train_idx], X.iloc[val_idx]
    y_train_fold, y_val_fold = y.iloc[train_idx], y.iloc[val_idx]   

    scaler = StandardScaler()
    X_train_scaled = scaler.fit_transform(X_train_fold)
    X_val_scaled   = scaler.transform(X_val_fold)

    
    X_train_fold = X_train_scaled.astype(np.float32)
    X_val_fold   = X_val_scaled.astype(np.float32)
    y_train_fold = y_train_fold.values.astype(np.int64)
    y_val_fold   = y_val_fold.values.astype(np.int64)

    model = TabNetClassifier(
        optimizer_fn=torch.optim.Adam,
    optimizer_params=dict(lr=0.004444833953509465),
    scheduler_params={"step_size":10, "gamma":0.9},
    scheduler_fn=torch.optim.lr_scheduler.StepLR,
    verbose=10,
    n_d=48, n_a=16, n_steps=8, gamma=1.1705241236872914,
    lambda_sparse=1.3492834268013243e-06,
    seed=42
    )

    model.fit(
        X_train=X_train_scaled_np, y_train=y_train_np,
    batch_size=1024,
    virtual_batch_size=128,
    max_epochs=195,
    num_workers=4
    )

    y_pred_val = model.predict(X_val_fold)
    acc = accuracy_score(y_val_fold, y_pred_val)

    y_train_pred = tabnet_model.predict(X_train_fold)
    train_acc = accuracy_score(y_train_fold, y_train_pred)

    
    print(f"TabNet Train Accuracy: {train_acc:.4f}")


    print(f"\n TabNet Test Accuracy: {acc:.4f}")
    


   


