In [1]:
import pandas as pd
import numpy as np

from keras import models, layers, regularizers, optimizers, callbacks, utils, losses, metrics
# from keras.metrics import BinaryAccuracy, AUC, BinaryCrossentropy
from tensorflow.keras.backend import clear_session
from tensorflow import convert_to_tensor

from ray import train, tune
from ray.tune.search.optuna import OptunaSearch
from ray.tune.search import ConcurrencyLimiter
from sklearn.preprocessing import StandardScaler
# utils.set_random_seed(1)

In [2]:
fn_x = '/Volumes/Extreme SSD/rematch_eia_ferc1_docker/working_data/model_a/train/x.parquet'
fn_y = '/Volumes/Extreme SSD/rematch_eia_ferc1_docker/working_data/model_a/train/y.parquet'
fn_id = '/Volumes/Extreme SSD/rematch_eia_ferc1_docker/working_data/model_a/train/id.parquet'

dir_hyperparameters = '/Volumes/Extreme SSD/rematch_eia_ferc1_docker/working_data/model_a/train'
fn_out = '/Volumes/Extreme SSD/rematch_eia_ferc1_docker/working_data/model_a/train/ann/grid_search.csv'

In [3]:
def np_cleaning(X):
    X = np.clip(X, a_min=-3, a_max=3)
    X = np.nan_to_num(X, nan=0.0, posinf=0.0, neginf=0.0)
    return X

In [4]:
space = {
    'dropout_1': tune.uniform(0, 0.95),
    'dropout_2': tune.uniform(0, 0.95),
    'relu_1': tune.randint(1, 59),
    'relu_2': tune.randint(1, 30)
}

search_alg = OptunaSearch(metric=["binary_crossentropy"], mode=["min"])
search_alg = ConcurrencyLimiter(search_alg, max_concurrent=2)

In [5]:
def fit_mod(space):
    X = pd.read_parquet(fn_x)
    Y = pd.read_parquet(fn_y)
    ID = pd.read_parquet(fn_id)
    
    # Split data into training and validation
    fold_variable = 1
    is_train_mask = (ID['fold_num'] != fold_variable).values
    
    XTrain = X.loc[is_train_mask]
    XVal = X.loc[~is_train_mask]
    y_train = Y.loc[is_train_mask, 'is_match']
    y_val = Y.loc[~is_train_mask, 'is_match']
    
    # X value processing
    standard_scaler = StandardScaler()
    standard_scaler.fit(XTrain)
    XTrain = standard_scaler.transform(XTrain)
    XVal  = standard_scaler.transform(XVal)
    
    XTrain = np_cleaning(XTrain)
    XVal  = np_cleaning(XVal)
    
    XTrain = convert_to_tensor(XTrain)
    XVal = convert_to_tensor(XVal)

    # Fit model
    clear_session()
    model = models.Sequential()
    model.add(layers.Dropout(rate=space["dropout_1"]))
    model.add(layers.Dense(units=space["relu_1"], activation='relu'))    
    model.add(layers.Dropout(rate=space["dropout_2"]))
    model.add(layers.Dense(units=space["relu_2"], activation='relu'))   
    model.add(layers.Dense(1, activation='sigmoid'))

    model.compile(
        loss=losses.BinaryCrossentropy(),
        metrics=[
            metrics.BinaryCrossentropy(),
            metrics.BinaryAccuracy(), 
            metrics.AUC()
        ]
    )
        
    history = model.fit(
        XTrain, y_train, epochs=500, batch_size=128,  # hard-coded here
        validation_data=(XVal, y_val), 
        callbacks=callbacks.EarlyStopping(patience=5, start_from_epoch=10),
        verbose=0
    )

    train.report(
        {
            "binary_crossentropy": np.min(history.history['val_binary_crossentropy'][10:]),
            "auc": np.min(history.history['val_auc'][10:]),
            'binary_accuracy': np.min(history.history['val_binary_accuracy'][10:])
        }
    )

In [None]:
tuner = tune.Tuner(
    fit_mod,
    tune_config=tune.TuneConfig(
        num_samples=250,
        search_alg=search_alg,
    ),
    param_space=space,
    run_config=train.RunConfig(
        storage_path=dir_hyperparameters, 
        name="ann"
    )
)
results = tuner.fit()

In [12]:
Grid = results.get_dataframe().copy()
Grid.index.name = 'order'
RankedGrid = Grid.sort_values(['binary_crossentropy', 'auc'], ascending=[True, False]).reset_index()
RankedGrid.index.name = 'rank'
RankedGrid.to_csv(fn_out)
RankedGrid.head()

Unnamed: 0_level_0,order,binary_crossentropy,auc,binary_accuracy,timestamp,checkpoint_dir_name,done,training_iteration,trial_id,date,...,pid,hostname,node_ip,time_since_restore,iterations_since_restore,config/dropout_1,config/dropout_2,config/relu_1,config/relu_2,logdir
rank,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
0,224,0.000642,0.981097,0.99984,1735937757,,False,1,bddace64,2025-01-03_12-55-57,...,41572,Macmini,127.0.0.1,681.699654,1,0.001017,0.151072,50,27,bddace64
1,153,0.000646,0.981911,0.999872,1735917884,,False,1,c5fab502,2025-01-03_07-24-44,...,26403,Macmini,127.0.0.1,521.569143,1,0.001477,0.066359,55,29,c5fab502
2,195,0.000654,0.978697,0.999855,1735929055,,False,1,3eb5cf5b,2025-01-03_10-30-55,...,35103,Macmini,127.0.0.1,492.462615,1,0.003402,0.133375,53,28,3eb5cf5b
3,206,0.000673,0.985119,0.999831,1735931930,,False,1,611b1e1d,2025-01-03_11-18-50,...,37206,Macmini,127.0.0.1,498.71186,1,0.001111,0.129905,51,26,611b1e1d
4,164,0.000681,0.984718,0.999844,1735920503,,False,1,c7eb9a38,2025-01-03_08-08-23,...,28187,Macmini,127.0.0.1,585.967197,1,0.015476,0.131372,58,29,c7eb9a38
