In [1]:
# Numerical Computing
import numpy as np

# Data Manipulation
import pandas as pd

# Warnings
import warnings

# Time
import time

# Notebook Optimizer
from tqdm.notebook import tqdm

# Scikit-Learn
from sklearn.metrics import f1_score
from sklearn.compose import ColumnTransformer
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OneHotEncoder

# TensorFlow
import tensorflow as tf
from tensorflow.keras import backend as K
from tensorflow.keras.models import Sequential  
from tensorflow.keras.optimizers import Adam, SGD
from tensorflow.keras.layers import Dense, Dropout 

# Optuna
import optuna

2025-07-14 21:54:38.908215: I tensorflow/core/util/port.cc:113] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2025-07-14 21:54:38.911593: I external/local_tsl/tsl/cuda/cudart_stub.cc:31] Could not find cuda drivers on your machine, GPU will not be used.
2025-07-14 21:54:38.944994: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2025-07-14 21:54:38.945111: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2025-07-14 21:54:38.946212: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to

In [4]:
warnings.filterwarnings("ignore")

In [7]:
from tqdm.notebook import tqdm
import time

for i in tqdm(range(10), desc="Loading"):
    time.sleep(0.1)

Loading:   0%|          | 0/10 [00:00<?, ?it/s]

In [10]:
def create_model(trial, input_size): 
    model = Sequential()
    model.add(Dense(input_size, input_shape=(input_size,), activation='relu')) 

    num_layers = trial.suggest_int('num_layers', 0, 3)
    for layer_i in range(num_layers):
        n_units = trial.suggest_int(f'n_units_layer_{layer_i}', 10, 50, step=5)
        dropout_rate = trial.suggest_float(f'dropout_rate_layer_{layer_i}', 0.0, 0.5)
        actv_func = trial.suggest_categorical(f'actv_func_layer_{layer_i}', ['relu', 'tanh', 'elu'])

        model.add(Dropout(dropout_rate))
        model.add(Dense(n_units, activation=actv_func))

    model.add(Dense(1, activation='sigmoid'))
    return model

In [13]:
def create_optimizer(trial):
    opt_name = trial.suggest_categorical('optimizer', ['Adam', 'SGD'])
    if opt_name == 'SGD':
        return SGD(
            learning_rate=trial.suggest_float('sgd_lr', 1e-5, 1e-1, log=True),
            momentum=trial.suggest_float('sgd_momentum', 1e-5, 1e-1, log=True)
        )
    else:
        return Adam(learning_rate=trial.suggest_float('adam_lr', 1e-5, 1e-1, log=True))

In [16]:
@tf.function
def recall_m(y_true, y_pred):
    true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
    possible_positives = K.sum(K.round(K.clip(y_true, 0, 1)))
    recall = true_positives / (possible_positives + K.epsilon())
    return recall

@tf.function
def precision_m(y_true, y_pred):
    true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
    predicted_positives = K.sum(K.round(K.clip(y_pred, 0, 1)))
    precision = true_positives / (predicted_positives + K.epsilon())
    return precision

@tf.function
def f1_m(y_true, y_pred):
    precision = precision_m(y_true, y_pred)
    recall = recall_m(y_true, y_pred)
    return 2 * ((precision * recall) / (precision + recall + K.epsilon()))

In [19]:
def preprocessing(X, numeric_preprocessor, categorical_preprocessor, is_train=True):
    X = X.copy()
    if is_train:
        X[numerical_feats] = numeric_preprocessor.fit_transform(X[numerical_feats])
        X_cat = categorical_preprocessor.fit_transform(X[categorical_feats]).toarray()
    else:
        X[numerical_feats] = numeric_preprocessor.transform(X[numerical_feats])
        X_cat = categorical_preprocessor.transform(X[categorical_feats]).toarray()

    X_cat = pd.DataFrame(X_cat, columns=categorical_preprocessor.get_feature_names_out())
    X = X.drop(columns=categorical_feats).reset_index(drop=True)
    X = pd.concat([X, X_cat], axis=1)
    return X, numeric_preprocessor, categorical_preprocessor

In [22]:
def train(trial, df_train, df_val=None, use_pruner=False):
    X_train, y_train = df_train.drop(columns=['y']), df_train['y']
    X_val, y_val = None, None

    if df_val is not None:
        X_val, y_val = df_val.drop(columns=['y']), df_val['y']

    # Preprocessing
    numeric_preprocessor = StandardScaler()
    categorical_preprocessor = OneHotEncoder(handle_unknown="ignore")
    X_train, numeric_preprocessor, categorical_preprocessor = preprocessing(X_train, numeric_preprocessor, categorical_preprocessor, is_train=True)
    
    if df_val is not None:
        X_val, _, _ = preprocessing(X_val, numeric_preprocessor, categorical_preprocessor, is_train=False)

    model = create_model(trial, X_train.shape[1])
    optimizer = create_optimizer(trial)

    callbacks = []
    if use_pruner and df_val is not None:
        callbacks.append(optuna.integration.TFKerasPruningCallback(trial, 'val_f1_m'))

    model.compile(loss='binary_crossentropy', optimizer=optimizer, metrics=[f1_m])
    history = model.fit(
        X_train, y_train,
        epochs=trial.suggest_int('epoch', 15, 50),
        batch_size=64,
        validation_data=(X_val, y_val) if df_val is not None else None,
        callbacks=callbacks,
        verbose=0
    )

    if df_val is not None:
        return np.mean(history.history['val_f1_m'])
    return model

In [25]:
def objective(trial, df_train, use_pruner=False):
    df_train_hp, df_val = train_test_split(df_train, test_size=0.1, random_state=0)
    val_f1_score = train(trial, df_train_hp, df_val, use_pruner)
    return val_f1_score

In [28]:
def train_and_evaluate_final(df_train, df_test, **kwargs):
    X_train, y_train = df_train.drop(columns=['y']), df_train['y']
    X_test, y_test = df_test.drop(columns=['y']), df_test['y']

    numeric_preprocessor = StandardScaler()
    categorical_preprocessor = OneHotEncoder(handle_unknown="ignore")

    X_train, numeric_preprocessor, categorical_preprocessor = preprocessing(X_train, numeric_preprocessor, categorical_preprocessor, is_train=True)
    X_test, _, _ = preprocessing(X_test, numeric_preprocessor, categorical_preprocessor, is_train=False)

    model = Sequential()
    model.add(Dense(X_train.shape[1], input_shape=(X_train.shape[1],), activation='relu'))

    for i in range(kwargs.get('num_layers', 0)):
        model.add(Dropout(kwargs.get(f'dropout_rate_layer_{i}', 0)))
        model.add(Dense(kwargs.get(f'n_units_layer_{i}', 10), activation=kwargs.get(f'actv_func_layer_{i}', 'relu')))

    model.add(Dense(1, activation='sigmoid'))

    opt_name = kwargs.get('optimizer', 'Adam')
    if opt_name == 'SGD':
        optimizer = SGD(
            learning_rate=kwargs.get('sgd_lr', 1e-5),
            momentum=kwargs.get('sgd_momentum', 1e-5)
        )
    else:
        optimizer = Adam(learning_rate=kwargs.get('adam_lr', 1e-5))

    model.compile(loss='binary_crossentropy', optimizer=optimizer, metrics=[f1_m])
    model.fit(X_train, y_train, epochs=kwargs.get('epoch', 15), batch_size=64, verbose=1)

    y_test_pred_proba = model.predict(X_test)
    y_test_pred = [1 if x > 0.5 else 0 for x in y_test_pred_proba]

    print("=" * 100)
    print("F1-Score on Test Data:", f1_score(y_test, y_test_pred))

### TPE

In [31]:
df = pd.read_csv("/work/train.csv", sep=";")

In [34]:
df['y'] = df['y'].map({'yes':1,'no':0})

In [37]:
df_train, df_test = train_test_split(df, test_size=0.1, random_state=0)

In [40]:
numerical_feats = list(df_train.drop(columns='y').select_dtypes(include=np.number).columns)

In [43]:
categorical_feats = list(df_train.drop(columns='y').select_dtypes(exclude=np.number).columns)

### Performing Hyperparameter Tuning with TPE

In [46]:
study = optuna.create_study(direction='maximize',
                            sampler=optuna.samplers.TPESampler(seed=0),
                           )

[I 2025-07-14 21:58:24,920] A new study created in memory with name: no-name-180c7249-dc20-4e5b-ac4a-ebb44e36f087


In [49]:
study.optimize(lambda trial: objective(trial, df_train),
               n_trials=50, n_jobs=-1,
              )

[I 2025-07-14 22:01:23,301] Trial 11 finished with value: 0.19522500932216644 and parameters: {'num_layers': 0, 'optimizer': 'SGD', 'sgd_lr': 3.558370973165931e-05, 'sgd_momentum': 0.03672585775701328, 'epoch': 15}. Best is trial 11 with value: 0.19522500932216644.
[I 2025-07-14 22:02:00,578] Trial 2 finished with value: 0.41338187593378517 and parameters: {'num_layers': 0, 'optimizer': 'Adam', 'adam_lr': 0.00011585285852973606, 'epoch': 19}. Best is trial 2 with value: 0.41338187593378517.
[I 2025-07-14 22:02:06,425] Trial 12 finished with value: 0.0026843562281053317 and parameters: {'num_layers': 3, 'n_units_layer_0': 15, 'dropout_rate_layer_0': 0.18851756731215935, 'actv_func_layer_0': 'tanh', 'n_units_layer_1': 30, 'dropout_rate_layer_1': 0.16723541425753158, 'actv_func_layer_1': 'relu', 'n_units_layer_2': 15, 'dropout_rate_layer_2': 0.15129208053764825, 'actv_func_layer_2': 'elu', 'optimizer': 'SGD', 'sgd_lr': 2.031560431309805e-05, 'sgd_momentum': 0.005417695169143028, 'epoch': 

In [52]:
print("Best Trial:")
best_trial = study.best_trial

print("    Value: ", best_trial.value)

print("    Hyperparameters: ")
for key, value in best_trial.params.items():
    print(f"        {key}: {value}")

Best Trial:
    Value:  0.5573793289562067
    Hyperparameters: 
        num_layers: 2
        n_units_layer_0: 40
        dropout_rate_layer_0: 0.018129944380239387
        actv_func_layer_0: elu
        n_units_layer_1: 45
        dropout_rate_layer_1: 0.48471631771715173
        actv_func_layer_1: elu
        optimizer: Adam
        adam_lr: 0.001991827840760064
        epoch: 24


In [55]:
best_trial.params

{'num_layers': 2,
 'n_units_layer_0': 40,
 'dropout_rate_layer_0': 0.018129944380239387,
 'actv_func_layer_0': 'elu',
 'n_units_layer_1': 45,
 'dropout_rate_layer_1': 0.48471631771715173,
 'actv_func_layer_1': 'elu',
 'optimizer': 'Adam',
 'adam_lr': 0.001991827840760064,
 'epoch': 24}

In [58]:
train_and_evaluate_final(df_train, df_test, **best_trial.params)

Epoch 1/24
Epoch 2/24
Epoch 3/24
Epoch 4/24
Epoch 5/24
Epoch 6/24
Epoch 7/24
Epoch 8/24
Epoch 9/24
Epoch 10/24
Epoch 11/24
Epoch 12/24
Epoch 13/24
Epoch 14/24
Epoch 15/24
Epoch 16/24
Epoch 17/24
Epoch 18/24
Epoch 19/24
Epoch 20/24
Epoch 21/24
Epoch 22/24
Epoch 23/24
Epoch 24/24
F1-Score on Test Data: 0.5915221579961464


<a style='text-decoration:none;line-height:16px;display:flex;color:#5B5B62;padding:10px;justify-content:end;' href='https://deepnote.com?utm_source=created-in-deepnote-cell&projectId=b0bd6d5e-ce51-4cdb-a0ff-8c5849b40992' target="_blank">
 </img>
Created in <span style='font-weight:600;margin-left:4px;'>Deepnote</span></a>