# Test Automation

Runs Test_ML_Models_A in a loop

In [1]:
import numpy as np
import pandas as pd
import myLibrary as mL
import pickle
import os
import time

NDBC = mL.NDBC_lib
ERA5 = mL.ERA5_lib
Models = mL.Models
DP = mL.DataProcessor
Experiment = mL.Experiment

In [2]:
def run_test_A(MODEL_NAME, ALPHA, filename, report_description,
               DATAFILE = "dataset_GOM_1_A_A.pickle", STATIONARY_SHIFT = 1, N_TEST_HOURS=24):

    #Read data from file
    data_directory = os.path.join(os.getcwd(), f'data/datasets/type_A')
    with open(f'data/datasets/type_A/{DATAFILE}', 'rb') as f:
        dataset = pickle.load(f)
    data = dataset["data"]

    #Preprocessing
    data_stationary = DP.data_to_stationary(data, n = STATIONARY_SHIFT)
    data_supervised = DP.data_to_supervised(data_stationary, n_in=3)

    train_X, train_y, test_X, test_y = DP.train_test_split(data_supervised, N_TEST_HOURS)

    #Training
    start_time = time.time()
    model = Models.get_model(MODEL_NAME, train_X, train_y, ALPHA)
    TRAINING_TIME = time.time() - start_time

    #One-Shot-Forecasting
    model.predict(train_X, batch_size=1)
    yhat = model.predict(test_X)

    #Create Evaluation Dataframes
    output_cols = data.columns.tolist()
    yhat_df = pd.DataFrame(yhat, columns=[name + "_pred" for name in output_cols])
    yhat_df.set_index(data.tail(len(yhat)).index, inplace=True)

    evaluation_1 = data.tail(len(yhat)+1).copy()  #+1 since i need that value for de-differencing

    #De-Differenciating
    for col in evaluation_1.columns:
        evaluation_1[f"{col}_pred"]= evaluation_1[col].shift(STATIONARY_SHIFT) + yhat_df[f"{col}_pred"]

    evaluation_1 = evaluation_1.iloc[STATIONARY_SHIFT:]  # remove first n entries since there is no delta value for them

    # Correct wind direction (modulo 360)
    wdir_columns = [col for col in evaluation_1.columns if col.startswith("WDIR")]
    evaluation_1[wdir_columns] = evaluation_1[wdir_columns] % 360

    #CREATE REPORT
    # Convert model summary to string
    stringlist = []
    model.summary(print_fn=lambda x: stringlist.append(x))
    model_summary = "\n".join(stringlist)

    report = Experiment(
        name=filename,
        description=report_description,

        stations = dataset["stations"],
        years = dataset["years"],
        nan_threshold=dataset["nan_threshold"],
        features=dataset["features"],
        era5=dataset["add_era5"],

        stationary_shift=STATIONARY_SHIFT,

        n_test_hours=N_TEST_HOURS,

        #stationary=STATIONARY,
        scaler= None, # SCALER,

        model_name = MODEL_NAME,
        model_summary=model_summary,
        training_time = TRAINING_TIME,

        one_shot_forecast = evaluation_1,
        recursive_forecast = None   # evaluation_2
    )


    # open a file for writing in binary mode
    filepath = f'data/reports/{report.name}.pickle'
    with open(filepath, 'wb') as f:
        # write the object to the file using pickle.dump()
        pickle.dump(report, f)
        print(f"File successfully saved:{filepath}")

In [None]:
def run_test_B(MODEL_NAME, ALPHA, filename, report_description,
               DATAFILE = "dataset_GOM_1_B_B.pickle", STATIONARY_SHIFT = 1, N_TEST_HOURS=24):

    #Read data from file
    with open(f'data/datasets/type_B/{DATAFILE}', 'rb') as f:
        # load the object from the file using pickle.load()
        dataset = pickle.load(f)

    train = dataset["data_train"]
    test = dataset["data_test"]

    #Preprocessing
    train_stationary = DP.data_to_stationary(train, n = STATIONARY_SHIFT)
    test_stationary = DP.data_to_stationary(test, n = STATIONARY_SHIFT)

    #Data is already supervised!

    train_X, train_y, _, _ = DP.train_test_split(train_stationary, -len(train_stationary))
    _, _, test_X, test_y = DP.train_test_split(test_stationary, len(test_stationary))

    #Training
    start_time = time.time()
    model = Models.get_model(MODEL_NAME, train_X, train_y, ALPHA)
    TRAINING_TIME = time.time() - start_time

    #One-Shot-Forecasting
    model.predict(train_X, batch_size=1)
    yhat = model.predict(test_X)

    #Create Evaluation Dataframes
    output_cols  = test.loc[:, ~test.columns.str.contains('t-')].columns

    yhat_df = pd.DataFrame(yhat)
    yhat_df.columns = [name + "_pred" for name in output_cols]
    yhat_df.set_index(test.tail(len(yhat)).index, inplace=True)
    evaluation_1 = test.loc[:, ~test.columns.str.contains('t-')]    #ground truth: just y, without X

    #De-Differenciating
    for col in evaluation_1.columns:
        shifted = evaluation_1[col].shift(STATIONARY_SHIFT)
        evaluation_1[f"{col}_pred"]= shifted + yhat_df[f"{col}_pred"]

    evaluation_1 = evaluation_1.iloc[STATIONARY_SHIFT:]  # remove first n entries since there is no delta value for them

    # Correct wind direction (modulo 360)
    wdir_columns = [col for col in evaluation_1.columns if col.startswith("WDIR")]
    evaluation_1[wdir_columns] = evaluation_1[wdir_columns] % 360

    #CREATE REPORT
    #Convert model summary to string
    stringlist = []
    model.summary(print_fn=lambda x: stringlist.append(x))
    model_summary = "\n".join(stringlist)

    report = Experiment(
            name=filename,
            description=report_description,

            stations = dataset["files"],
            years = ["not available"],
            nan_threshold=dataset["nan_threshold"],
            features=dataset["features"],
            era5=dataset["add_era5"],

            stationary_shift=STATIONARY_SHIFT,
            # lag=1,
            n_test_hours=dataset["num_test_hours"],

            #stationary=STATIONARY,
            scaler= None, # SCALER,

            model_name = MODEL_NAME,
            model_summary=model_summary,

            one_shot_forecast = evaluation_1,
            recursive_forecast = None
    )

    # open a file for writing in binary mode
    filepath = f'data/reports/{filename}.pickle'
    with open(filepath, 'wb') as f:
        # write the object to the file using pickle.dump()
        pickle.dump(report, f)
        print("File successfully saved:")
        print(filepath)

In [5]:
approach = "SSUD"   # "MLM" ... Multi Location Modelling, "SSUD" ... Station Specific Unified Dataset
model_names = ["CNN"]# ["LSTM", "GRU", "CNN", "TCN"]
alpha_values = np.arange(0, 1.1, 0.1)
report_description = "Executed with automated script. Corrected wind direction. excluded scaling."

if approach == "MLM":
    for model in model_names:
        for alpha in alpha_values:
            filename = f'report_A_{model}_{format(alpha, ".1f")}'
            run_test_A(model,alpha,filename,report_description)

elif approach == "SSUD":
    for model in model_names:
        for alpha in alpha_values:
            filename = f'report_B_{model}_{format(alpha, ".1f")}'
            run_test_B(model,alpha,filename,report_description)


  data_stationary[col] = data[col] - data[col].shift(n)  # y = value(i) - value(i-n)
  data_stationary[col] = data[col] - data[col].shift(n)  # y = value(i) - value(i-n)
  data_stationary[col] = data[col] - data[col].shift(n)  # y = value(i) - value(i-n)
  data_stationary[col] = data[col] - data[col].shift(n)  # y = value(i) - value(i-n)


Model: "sequential_11"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv1d_22 (Conv1D)          (None, 2, 128)            26752     
                                                                 
 max_pooling1d_22 (MaxPoolin  (None, 2, 128)           0         
 g1D)                                                            
                                                                 
 conv1d_23 (Conv1D)          (None, 1, 64)             16448     
                                                                 
 max_pooling1d_23 (MaxPoolin  (None, 1, 64)            0         
 g1D)                                                            
                                                                 
 flatten_11 (Flatten)        (None, 64)                0         
                                                                 
 dense_22 (Dense)            (None, 50)              

KeyboardInterrupt: 

In [4]:
print("DONE")

DONE
