In [1]:
import numpy as np
from typing import Tuple, Dict
np.set_printoptions(edgeitems=30, linewidth=100000, 
    formatter=dict(float=lambda x: "%.3g" % x))
import matplotlib.pyplot as plt
import pandas as pd
from keras.callbacks import EarlyStopping, ModelCheckpoint
from einops import rearrange, repeat
from ToApps.to_apps import to_slack
import tensorflow as tf
from tensorflow.math import logical_not
from builderfuncs import build_transformer, save_whole_model, restore_model
from builderfuncs import restore_model, EarlyStopAndSave
from parameterdicts import TransformerParameters
import os
import datetime


df = pd.read_csv('data/scaled_U2_data.csv', index_col=0)
df.drop("UNNAMED: 0", axis=1, inplace=True)

devices = tf.config.list_physical_devices('GPU')
tf.config.experimental.set_memory_growth(devices[0], True)

print("Num GPUs Available: ", len(devices))

Num GPUs Available:  1


In [2]:
def ttv_split(ds: tf.data.Dataset, ds_size: int, 
              train_split: float = 0.8, 
              val_split: float = 0.1, 
              test_split: float = 0.1) -> Tuple[tf.data.Dataset, 
                                                tf.data.Dataset, 
                                                tf.data.Dataset]:

    assert (train_split + test_split + val_split) == 1
   
    train_size = int(train_split * ds_size)
    val_size = int(val_split * ds_size)
    
    train_ds = ds.take(train_size)    
    val_ds = ds.skip(train_size).take(val_size)
    test_ds = ds.skip(train_size).skip(val_size)
    
    return train_ds, val_ds, test_ds


def mask_window(features: Tuple[Dict[tf.Tensor, tf.Tensor], tf.Tensor], 
                mask: tf.Tensor) -> Tuple[Dict[tf.Tensor, tf.Tensor], 
                                          tf.Tensor]:
    inputs = features[0]
    labels = features[1]
    decoder_labels = tf.einsum('tf,t->tf', labels, mask)
    mask = repeat(mask, 'i -> 1 i j', j=labels.shape[-2])
    mask = tf.math.minimum(mask, rearrange(mask, '1 i j -> 1 j i'))
    decoder_labels.set_shape([12, None])
    mask.set_shape([None, 12, 12])
    inputs.update({'decoder_labels':decoder_labels,
                   'attention_mask':mask})
    return inputs, labels


def split_window(features: tf.Tensor) -> Tuple[Dict[tf.Tensor, tf.Tensor],
                                               tf.Tensor]:
    encoder_input = features[:12, 1:]
    decoder_input = features[12:, 1:75]
    paddings = tf.constant([[0, 0,], [0, 173]])
    encoder_input = tf.concat([encoder_input, tf.pad(decoder_input, paddings, "CONSTANT")], axis=0)
    labels = rearrange(features[11:, 0], 'a -> a 1')
    decoder_input = features[11:, 1:75]
    encoder_input.set_shape([23, 247])
    decoder_input.set_shape([12, 74])
    labels.set_shape([12, None])
    

    return {'encoder_inputs':encoder_input, 'decoder_inputs':decoder_input}, labels

                        
def make_dataset(data: pd.DataFrame, length: int, 
                 batch_size: int = 64, multistep: bool = True) -> Tuple[tf.data.Dataset, 
                                                                        tf.data.Dataset, 
                                                                        tf.data.Dataset]:
    data = np.array(data.iloc[:, :], dtype=np.float32)
    ds = tf.keras.utils.timeseries_dataset_from_array(data=data,
                                                        targets=None,
                                                        sequence_length=length,
                                                        sequence_stride=1,
                                                        shuffle=True,
                                                        seed=1,
                                                        batch_size=None)

    ds = ds.filter(lambda x: tf.reduce_all(logical_not(tf.math.is_nan(x))))
    ds = ds.map(split_window)
    if multistep == True:
        nums = np.zeros(170680)
    else:
        nums = np.random.randint(0, length/2-1, 170680)
    mask = tf.sequence_mask(nums, length/2, dtype=tf.float32)
    maskds = tf.data.Dataset.from_tensor_slices(mask)
    ds = tf.data.Dataset.zip((ds, maskds))
    ds = ds.map(mask_window).batch(batch_size)
    ds = ds.apply(tf.data.experimental.assert_cardinality(170680//batch_size + 1))
    train_ds, val_ds, test_ds = ttv_split(ds, 170680//batch_size, train_split=0.8, val_split=0.1, test_split=0.1)
    train_ds = train_ds
    return train_ds, val_ds, test_ds

In [3]:
# park_logdir = os.path.join("logs", 'park-logs')
# park_tensorboard_callback = tf.keras.callbacks.TensorBoard(park_logdir, histogram_freq=1)
# park_earlystopper = EarlyStopAndSave(filepath="model_folder/park_folder", patience=15, quickstop="val_r2")
train_data, val_data, test_data = make_dataset(df, 23, batch_size=64)

In [4]:
from builderfuncs import save_whole_model
from keras_tuner.engine import tuner_utils
import GPUtil
gpu = GPUtil.getGPUs()[0]
from time import sleep
from ToApps.to_apps import to_slack

def _build_and_fit_model(self, trial, *args, **kwargs):
    """For AutoKeras to override.
    DO NOT REMOVE this function. AutoKeras overrides the function to tune
    tf.data preprocessing pipelines, preprocess the dataset to obtain
    the input shape before building the model, adapt preprocessing layers,
    and tune other fit_args and fit_kwargs.
    Args:
        trial: A `Trial` instance that contains the information needed to
            run this trial. `Hyperparameters` can be accessed via
            `trial.hyperparameters`.
        *args: Positional arguments passed by `search`.
        **kwargs: Keyword arguments passed by `search`.
    Returns:
        The fit history.
    """
    
    
    string = f"Park_trial_{trial.trial_id}\n"
    string += "------------------------------------------------\n"
    if trial.hyperparameters.values:
            for hp, value in trial.hyperparameters.values.items():
                string += f"{hp}: {value}\n"
    
    hp = trial.hyperparameters
    model = self._try_build(hp)
    results = self.hypermodel.fit(hp, model, *args, **kwargs)
    tuner_utils.validate_trial_results(
        results, self.oracle.objective, "HyperModel.fit()"
    )
    try:
        path = os.path.join("parkdir2", f"trial_{trial.trial_id}")
        os.mkdir(path) 
    except FileExistsError:
         pass
    save_whole_model(model, f"parkdir2/trial_{trial.trial_id}")

    string += "------------------------------------------------\n"
    result_dict = tuner_utils.convert_to_metrics_dict(results, self.oracle.objective)
    string += f"loss: {result_dict['loss']:.6f}, r2: {result_dict['r2']:.6f}\n"
    string += f"val_loss: {result_dict['val_loss']:.6f}, val_r2: {result_dict['val_r2']:.6f}\n"
             
    string += "------------------------------------------------\n"
    while GPUtil.getGPUs()[0].temperature > 89.0:
        to_slack(str(GPUtil.getGPUs()[0].temperature))
        sleep(10)
    try:
        to_slack(string)
    except:
         print("cannot connect to slack")
    
    return results

In [5]:
import types
import keras_tuner as kt
from ktfuncs import kt_park
tuner = kt.BayesianOptimization(kt_park,
                                objective=kt.Objective('val_loss', 'min'),
                                max_trials=100,
                                num_initial_points=54,
                                directory='directory',
                                project_name="park_trial_2",
                                seed=1,
                                max_consecutive_failed_trials=1)
tuner._build_and_fit_model = types.MethodType(_build_and_fit_model, tuner)

In [6]:
tuner.search_space_summary()

Search space summary
Default search space size: 20
num_heads (Int)
{'default': None, 'conditions': [], 'min_value': 4, 'max_value': 16, 'step': 2, 'sampling': 'linear'}
lookback (Fixed)
{'conditions': [], 'value': 23}
n_features (Fixed)
{'conditions': [], 'value': 247}
n_manips (Fixed)
{'conditions': [], 'value': 74}
n_targs (Fixed)
{'conditions': [], 'value': 1}
horizon (Fixed)
{'conditions': [], 'value': 12}
ff_dim (Int)
{'default': None, 'conditions': [], 'min_value': 8, 'max_value': 16, 'step': 4, 'sampling': 'linear'}
ff_activ (Choice)
{'default': 'linear', 'conditions': [], 'values': ['linear', 'relu', 'elu', 'selu', 'gelu', 'sigmoid', 'tanh'], 'ordered': False}
num_encoders (Int)
{'default': None, 'conditions': [], 'min_value': 1, 'max_value': 2, 'step': 1, 'sampling': 'linear'}
num_decoders (Fixed)
{'conditions': [], 'value': 0}
mlp_layers (Int)
{'default': None, 'conditions': [], 'min_value': 1, 'max_value': 4, 'step': 1, 'sampling': 'linear'}
mlp_units (Int)
{'default': None,

In [7]:
import traceback
try:
    tuner.search(train_data,
                epochs=25,
                verbose=2,
                validation_data=val_data,
                callbacks=[EarlyStopAndSave(filepath="ffolder", patience=3, quickstop="val_r2", lim=0.5)])
except:
    # printing stack trace
    for i in range(5):
        to_slack("PARK ERROR")
    traceback.print_exc()

Trial 32 Complete [00h 02m 45s]
val_loss: 0.027632107958197594

Best val_loss So Far: 0.0019061993807554245
Total elapsed time: 17h 24m 23s

Search: Running Trial #33

Value             |Best Value So Far |Hyperparameter
8                 |6                 |num_heads
23                |23                |lookback
247               |247               |n_features
74                |74                |n_manips
1                 |1                 |n_targs
12                |12                |horizon
16                |12                |ff_dim
gelu              |tanh              |ff_activ
1                 |2                 |num_encoders
0                 |0                 |num_decoders
1                 |4                 |mlp_layers
64                |128               |mlp_units
sigmoid           |gelu              |mlp_activ
0.4               |0.3               |mlp_dropout
0.5               |0.3               |dropout
gelu              |gelu              |out_activ
0.0001       

Traceback (most recent call last):
  File "C:\Users\Owner\AppData\Local\Temp\ipykernel_10788\4106867355.py", line 3, in <module>
    tuner.search(train_data,
  File "c:\Users\Owner\anaconda3\envs\plant_proj_venv\lib\site-packages\keras_tuner\engine\base_tuner.py", line 230, in search
    self._try_run_and_update_trial(trial, *fit_args, **fit_kwargs)
  File "c:\Users\Owner\anaconda3\envs\plant_proj_venv\lib\site-packages\keras_tuner\engine\base_tuner.py", line 270, in _try_run_and_update_trial
    self._run_and_update_trial(trial, *fit_args, **fit_kwargs)
  File "c:\Users\Owner\anaconda3\envs\plant_proj_venv\lib\site-packages\keras_tuner\engine\base_tuner.py", line 235, in _run_and_update_trial
    results = self.run_trial(trial, *fit_args, **fit_kwargs)
  File "c:\Users\Owner\anaconda3\envs\plant_proj_venv\lib\site-packages\keras_tuner\engine\tuner.py", line 287, in run_trial
    obj_value = self._build_and_fit_model(trial, *args, **copied_kwargs)
  File "C:\Users\Owner\AppData\Local\T

In [None]:
import json
hyperparams = []
for i in range(100):
    try:
        with open(f"directory/park_trial_2/trial_{i:03}/trial.json") as f:
            parameters = json.load(f)
        hyperparams.append(parameters)
    except FileNotFoundError:
        print(f"trial_{i:03} not found")

In [None]:
i = 0
while i < len(hyperparams):
    if hyperparams[i]["score"] == None:
        hyperparams.pop(i)
        print(i)
    else:
        i += 1

In [None]:
hyperparams = sorted(hyperparams, key=lambda x: x["score"])

In [None]:
table = []
from tabulate import tabulate
for i in range(25):
    line = [hyperparams[i]['trial_id']]
    for key in list(hyperparams[i]['hyperparameters']['values'].keys()):
        line.append(hyperparams[i]['hyperparameters']['values'][key])
    line.append(hyperparams[i]['score'])
    table.append(line)
headers = list(hyperparams[i]['hyperparameters']['values'].keys())
headers.append('score')
print(tabulate(table, headers=headers))

In [None]:
model = restore_model("parkdir/trial_052", modeltype="transformer")
print(model.evaluate(test_data))

In [None]:
# model.fit(train_data,
#           epochs=200,
#           verbose=1,
#           validation_data=val_data,
#           callbacks=[park_earlystopper, park_tensorboard_callback])

In [None]:
# from builderfuncs import save_whole_model
# save_whole_model(model, "model_folder/park_folder")