https://docs.ray.io/en/latest/tune/examples/tune_mnist_keras.html

In [3]:
from keras import models, layers, regularizers, optimizers, callbacks, utils
from keras.metrics import BinaryAccuracy, AUC, BinaryCrossentropy
from tensorflow.keras.backend import clear_session

import pandas as pd
import numpy as np
import os

from tensorflow import convert_to_tensor
utils.set_random_seed(1)
# from sklearn.metrics import confusion_matrix, accuracy_score, precision_score

# import matplotlib.pyplot as plt

In [2]:
from ray import train, tune
from ray.tune.search.optuna import OptunaSearch
# from ray.tune.schedulers import ASHAScheduler
from ray.tune.search import ConcurrencyLimiter

In [1]:
fn_train_x = '/Volumes/Extreme SSD/rematch_eia_ferc1_docker/working_data/model_a/train/train_x.parquet'
fn_train_y = '/Volumes/Extreme SSD/rematch_eia_ferc1_docker/working_data/model_a/train/train_y.parquet'

fn_test_x = '/Volumes/Extreme SSD/rematch_eia_ferc1_docker/working_data/model_a/train/test_x.parquet'
fn_test_y = '/Volumes/Extreme SSD/rematch_eia_ferc1_docker/working_data/model_a/train/test_y.parquet'

In [5]:
TrainX = pd.read_parquet(fn_train_x)
TrainX

Unnamed: 0,dist_plant_name_lv,dist_plant_name_lcs,dist_utility_name_lv,dist_utility_name_lcs,dist_plant_name_refined_lv,dist_plant_name_refined_lcs,dist_utility_name_refined_lv,dist_utility_name_refined_lcs,does_plant_name_ferc_contain_digits,does_plant_name_eia_contain_digits,...,prime_mover_code_eia_X.Missing.,plant_part_eia_plant,plant_part_eia_plant_ferc_acct,plant_part_eia_plant_gen,plant_part_eia_plant_match_ferc1,plant_part_eia_plant_operating_year,plant_part_eia_plant_prime_fuel,plant_part_eia_plant_prime_mover,plant_part_eia_plant_technology,plant_part_eia_plant_unit
0,-1.846154,-1.344828,0.096774,0.000000,-1.35,-1.730769,0.692308,0.636364,3.0,-3.0,...,-3.0,-3.0,-3.0,3.0,-3.0,-3.0,-3.0,-3.0,-3.0,-3.0
1,-1.846154,-1.034483,0.193548,0.648649,-1.35,-1.730769,0.923077,0.363636,3.0,-3.0,...,-3.0,3.0,-3.0,-3.0,-3.0,-3.0,-3.0,-3.0,-3.0,-3.0
2,-0.923077,-0.310345,0.000000,0.891892,-1.50,-1.153846,0.923077,1.181818,3.0,-3.0,...,-3.0,-3.0,-3.0,-3.0,-3.0,-3.0,-3.0,3.0,-3.0,-3.0
3,-1.730769,-1.344828,0.000000,0.162162,-1.35,-1.153846,0.692308,0.181818,3.0,-3.0,...,-3.0,-3.0,-3.0,3.0,-3.0,-3.0,-3.0,-3.0,-3.0,-3.0
4,-1.038462,-0.310345,0.000000,0.405405,-1.05,-0.461538,0.576923,0.636364,3.0,-3.0,...,-3.0,-3.0,-3.0,3.0,-3.0,-3.0,-3.0,-3.0,-3.0,-3.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
10193089,-1.269231,-1.241379,-0.967742,-0.486486,-1.95,-1.961538,-0.461538,-0.727273,-3.0,-3.0,...,-3.0,-3.0,-3.0,3.0,-3.0,-3.0,-3.0,-3.0,-3.0,-3.0
10193090,-1.038462,-0.827586,-0.870968,-0.324324,-1.20,-1.384615,-0.692308,-0.818182,-3.0,-3.0,...,-3.0,-3.0,-3.0,3.0,-3.0,-3.0,-3.0,-3.0,-3.0,-3.0
10193091,-1.615385,-1.241379,-0.774194,-0.324324,-1.80,-1.730769,-0.346154,-0.272727,-3.0,-3.0,...,-3.0,-3.0,-3.0,-3.0,-3.0,3.0,-3.0,-3.0,-3.0,-3.0
10193092,-0.461538,-0.620690,-1.064516,-0.648649,-1.80,-1.500000,-0.115385,-0.727273,-3.0,-3.0,...,-3.0,3.0,-3.0,-3.0,-3.0,-3.0,-3.0,-3.0,-3.0,-3.0


In [14]:
def fit_mod(space):
    # Load data
    os.chdir('/Users/andrewbartnof/Documents/rmi/rematch_ferc_eia1/clean_data/')
    TrainX = convert_to_tensor(pd.read_parquet('train_x.parquet'))
    train_y = pd.read_parquet('train_y.parquet').pop('is_match')
    ValX = convert_to_tensor(pd.read_parquet('validation_x.parquet'))
    val_y = pd.read_parquet('validation_y.parquet').pop('is_match')

    # Establish class weights
    pos = train_y.sum()
    neg = (train_y == 0).sum()
    total = len(train_y)
    weight_for_0 = (1 / neg) * (total / 2.0)
    weight_for_1 = (1 / pos) * (total / 2.0)
    class_weight = {0: weight_for_0, 1: weight_for_1}

    # Fit model
    clear_session()
    model = models.Sequential()
    model.add(layers.Dropout(rate=space["dropout_1"]))
    model.add(layers.Dense(units=int(space["relu_1"]), activation='relu'))    
    model.add(layers.Dropout(rate=space["dropout_2"]))
    model.add(layers.Dense(units=int(space["relu_2"]), activation='relu'))   
    model.add(layers.Dense(1, activation='sigmoid'))
        
    model.compile(optimizer=optimizers.SGD(),
        loss='binary_crossentropy',
        metrics=[BinaryCrossentropy(), AUC()]
        )
        
    history = model.fit(
        TrainX, train_y, epochs=500, batch_size=128,  # hard-coded here
        validation_data=(ValX, val_y), 
        class_weight=class_weight,
        callbacks=callbacks.EarlyStopping(patience=5, start_from_epoch=10),
        verbose=0
    )

    best_binary_crossentropy = np.min(history.history['val_binary_crossentropy'][10:])
    best_auc = np.max(history.history['val_auc'][10:])
    train.report(
        {
            "binary_crossentropy": best_binary_crossentropy,
            "auc": best_auc
        }
    )

In [11]:
search_alg = OptunaSearch(metric="binary_crossentropy", mode="min")
search_alg = ConcurrencyLimiter(search_alg, max_concurrent=1)

space = {
    'relu_1': tune.randint(1, 57),
    'relu_2': tune.randint(1, 57),
    'dropout_1': tune.uniform(0.0001, 1),
    'dropout_2': tune.uniform(0.0001, 1),
    # 'objective':'binary',
    'metrics':['binary_logloss', 'auc']
}

In [12]:
tuner = tune.Tuner(
    fit_mod,
    tune_config=tune.TuneConfig(
        # scheduler=asha_scheduler,
        search_alg=search_alg,
        num_samples=500
    ),
    param_space=space,
    run_config=train.RunConfig(
        storage_path=hp_dir, 
        name="full_model_neural_network_hp_search"
    )
)
results = tuner.fit()

0,1
Current time:,2024-09-26 19:29:06
Running for:,00:00:31.53
Memory:,6.4/8.0 GiB

Trial name,# failures,error file
fit_mod_29bc5f86,1,"/tmp/ray/session_2024-09-26_19-22-34_336301_41696/artifacts/2024-09-26_19-28-34/full_neural_network_raytune_hp_search/driver_artifacts/fit_mod_29bc5f86_1_dropout_1=0.3849,dropout_2=0.7292,metrics=binary_logloss_auc,objective=binary,relu_1=54,relu_2=46_2024-09-26_19-28-34/error.txt"
fit_mod_91ff1745,1,"/tmp/ray/session_2024-09-26_19-22-34_336301_41696/artifacts/2024-09-26_19-28-34/full_neural_network_raytune_hp_search/driver_artifacts/fit_mod_91ff1745_2_dropout_1=0.6157,dropout_2=0.5117,metrics=binary_logloss_auc,objective=binary,relu_1=7,relu_2=25_2024-09-26_19-28-45/error.txt"

Trial name,status,loc,dropout_1,dropout_2,relu_1,relu_2
fit_mod_acd89ee4,RUNNING,127.0.0.1:41788,0.513131,0.487093,33,32
fit_mod_29bc5f86,ERROR,127.0.0.1:41773,0.384946,0.729172,54,46
fit_mod_91ff1745,ERROR,127.0.0.1:41783,0.615677,0.511665,7,25


2024-09-26 19:28:45,420	ERROR tune_controller.py:1331 -- Trial task failed for trial fit_mod_29bc5f86
Traceback (most recent call last):
  File "/opt/miniconda3/lib/python3.12/site-packages/ray/air/execution/_internal/event_manager.py", line 110, in resolve_future
    result = ray.get(future)
             ^^^^^^^^^^^^^^^
  File "/opt/miniconda3/lib/python3.12/site-packages/ray/_private/auto_init_hook.py", line 21, in auto_init_wrapper
    return fn(*args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^
  File "/opt/miniconda3/lib/python3.12/site-packages/ray/_private/client_mode_hook.py", line 103, in wrapper
    return func(*args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^
  File "/opt/miniconda3/lib/python3.12/site-packages/ray/_private/worker.py", line 2656, in get
    values, debugger_breakpoint = worker.get_objects(object_refs, timeout=timeout)
                                  ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/opt/miniconda3/lib/python3.12/site-packages/ray/_priv

In [16]:
# !jupyter nbconvert --to script full_model_neural_network_hp_search.ipynb

[NbConvertApp] Converting notebook full_model_neural_network_hp_search.ipynb to script
[NbConvertApp] Writing 3396 bytes to full_model_neural_network_hp_search.py
