In [1]:
import os
import pandas as pd
import numpy as np

from tensorflow import keras
from scipy.stats import reciprocal
from sklearn.model_selection import RandomizedSearchCV

from sklearn.model_selection import train_test_split

In [2]:
def build_model(n_hidden=1,
                n_neurons=30,
                learning_rate=3e-3,
                input_shape=[1]):
    model = keras.models.Sequential()
    model.add(keras.layers.InputLayer(input_shape=input_shape))
    
    for layer in range(n_hidden):
        model.add(keras.layers.Dense(n_neurons, activation="relu"))
    
    model.add(keras.layers.Dense(1))
    optimizer = keras.optimizers.SGD(lr=learning_rate)
    model.compile(loss="mse", optimizer=optimizer)
    
    return model

In [3]:
SOURCE_PATH = os.path.dirname(os.getcwd())

TRAIN_SIZE = 0.7 # size of training data (in %)
VALIDATION_SIZE = 0.2
N_JOBS = 1  # number of jobs to run in parallel
N_SPLITS = 10  # number of splits (k) to be made within the k fold cv
N_ITER = 3  # number of parameter settings that are sampled
SEED = 2294
VERBOSE = True
INPUTS_PATH = os.path.join(SOURCE_PATH, "data", "inputs")
OUTPUTS_PATH = os.path.join(SOURCE_PATH, "data", "outputs")
DATASET_NAMES = ["betadgp_covdgp_data", "betadgp_beta2x2_data", "betadgp_data"]
TARGET_NAME = "betas_dgp"
MODEL_TAG = "FFNN"
STANDARDIZE = True
DIR_NAMES = os.listdir(os.path.join(INPUTS_PATH))

dgp_name = DIR_NAMES[0]
ds = DATASET_NAMES[0] 

In [4]:
data = pd.read_csv(os.path.join(INPUTS_PATH, dgp_name, ds + ".csv"))
y = data["betas_dgp"].to_numpy()
X = data[["cov_dgp"]].to_numpy()

X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=TRAIN_SIZE)
X_train, X_valid, y_train, y_valid = train_test_split(X, y, train_size=(1 - VALIDATION_SIZE))

In [5]:
keras_reg = keras.wrappers.scikit_learn.KerasRegressor(build_model)

In [6]:
keras_reg.fit(X_train,
              y_train,
              epochs=100,
              validation_data=(X_valid, y_valid),
              callbacks=[keras.callbacks.EarlyStopping(patience=10)]) 
mse_test = keras_reg.score(X_test,
                           y_test)

Train on 18000 samples, validate on 4500 samples
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/10

In [7]:
from scipy.stats import reciprocal
from sklearn.model_selection import RandomizedSearchCV

In [9]:
param_distribs = {"n_hidden": [0, 1, 2, 3],
                  "n_neurons": np.arange(1, 100),
                  "learning_rate": reciprocal(3e-4, 3e-2),}
rnd_search_cv = RandomizedSearchCV(keras_reg,
                                   param_distribs,
                                   n_iter=2,
                                   cv=3)
output_rnd_search_cv = rnd_search_cv.fit(X_train,
                                         y_train,
                                         epochs=100,
                                         validation_data=(X_valid, y_valid),
                                         callbacks=[keras.callbacks.EarlyStopping(patience=10)])

Train on 12000 samples, validate on 4500 samples
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Train on 12000 samples, validate on 4500 samples
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/1

In [12]:
output_rnd_search_cv.best_params_

{'learning_rate': 0.0013113346214630906, 'n_hidden': 1, 'n_neurons': 25}