In [2]:
import pandas as pd
import numpy as np
import os

from keras import models, layers, regularizers, optimizers, callbacks, utils, losses, metrics
# from keras.metrics import BinaryAccuracy, AUC, BinaryCrossentropy
from tensorflow.keras.backend import clear_session
from tensorflow import convert_to_tensor

from sklearn.preprocessing import StandardScaler
# utils.set_random_seed(1)

In [3]:
data_dir = '/Volumes/Extreme SSD/rematch_eia_ferc1_docker'
dir_working_model_a_training = os.path.join(data_dir, 'working_data/model_a/model_a_training')
dir_working_model_a_training

'/Volumes/Extreme SSD/rematch_eia_ferc1_docker/working_data/model_a/model_a_training'

In [4]:
fn_x = os.path.join(dir_working_model_a_training, 'x.parquet')
fn_y = os.path.join(dir_working_model_a_training, 'y.parquet')
fn_id = os.path.join(dir_working_model_a_training, 'id.parquet')

fn_model = os.path.join(dir_working_model_a_training, 'model_a_ann.keras')

In [20]:
fn_params = os.path.join(dir_working_model_a_training, 'model_a_ann_hp.csv')
params = pd.read_csv(fn_params).to_dict(orient='list')
params = {k:params[k][0] for k in params.keys()}
print(params)

{'dropout_1': 0.00012044868222029, 'dropout_2': 0.0632962786094702, 'relu_1': 33, 'relu_2': 20, 'epochs': 20}


In [23]:
def np_cleaning(X):
    X = np.clip(X, a_min=-3, a_max=3)
    X = np.nan_to_num(X, nan=0.0, posinf=0.0, neginf=0.0)
    return X

In [24]:
X = pd.read_parquet(fn_x)
Y = pd.read_parquet(fn_y)
ID = pd.read_parquet(fn_id)

In [25]:
# This is all done automagically by the R script that creates the new data tranches.
# We only need to do this for the final model training
standard_scaler = StandardScaler()
standard_scaler.fit(X)
XClean = standard_scaler.transform(X)
XClean = np_cleaning(XClean)
XClean = convert_to_tensor(XClean)

In [26]:
clear_session()
model = models.Sequential()
model.add(layers.Dropout(rate=params["dropout_1"]))
model.add(layers.Dense(units=int(params["relu_1"]), activation='relu'))    
model.add(layers.Dropout(rate=params["dropout_2"]))
model.add(layers.Dense(units=int(params["relu_2"]), activation='relu'))   
model.add(layers.Dense(1, activation='sigmoid'))

model.compile(
    loss=losses.BinaryCrossentropy(),
    metrics=[
        metrics.BinaryCrossentropy(),
        metrics.BinaryAccuracy(), 
        metrics.AUC()
    ]
)
    
history = model.fit(
    XClean, Y, epochs=int(params['epochs']), batch_size=128,  # hard-coded here
    verbose=1
)

Epoch 1/20
[1m49800/49800[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 418us/step - auc: 0.9179 - binary_accuracy: 0.9988 - binary_crossentropy: 0.0051 - loss: 0.0051
Epoch 2/20
[1m49800/49800[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 407us/step - auc: 0.9598 - binary_accuracy: 0.9996 - binary_crossentropy: 0.0020 - loss: 0.0020
Epoch 3/20
[1m49800/49800[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 396us/step - auc: 0.9616 - binary_accuracy: 0.9997 - binary_crossentropy: 0.0017 - loss: 0.0017
Epoch 4/20
[1m49800/49800[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 395us/step - auc: 0.9634 - binary_accuracy: 0.9997 - binary_crossentropy: 0.0019 - loss: 0.0019
Epoch 5/20
[1m49800/49800[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 394us/step - auc: 0.9685 - binary_accuracy: 0.9997 - binary_crossentropy: 0.0016 - loss: 0.0016
Epoch 6/20
[1m49800/49800[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m23s[0m 453us/step - auc: 0.9688 - b

In [27]:
model.save(fn_model)

In [28]:
!jupyter nbconvert --to script model_a_ann_fit.ipynb

[NbConvertApp] Converting notebook model_a_ann_fit.ipynb to script
[NbConvertApp] Writing 2445 bytes to model_a_ann_fit.py
