In [1]:
import os
import re
import joblib
import time
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt
import multiprocessing
import gc
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Sequential, load_model
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint
from tensorflow.keras import Input, Model
from joblib import Parallel, delayed, parallel_backend




In [7]:
path="drive/MyDrive/"
tf.config.list_physical_devices('GPU')

[]

In [12]:
def generate_data(n, it = 1,
                  p=100, err = 0.5,
                  a=1, b=2, ## X ~ Unif[a,b]
                  columns=[0, 5, 10, 15, 20]):

    # Simulate data
    np.random.seed(it)
    
    # Generate 100 iid variables following uniform(a, b) distribution
    X = np.random.uniform(low=a, high=b, size=(n, p))

    X_selected = X[:, columns]

    # Scenario 1 in paper
    y = -np.sin(np.pi * X_selected[:, 0]) + 2 * (X_selected[:, 1] - 0.5)**2 + 1.5 * X_selected[:, 2] * X_selected[:, 3] - 5 / X_selected[:, 4]

    # Add some noise to y
    y_obs = y + np.random.normal(0, err, size=y.shape)

    df = np.c_[y, y_obs, X]

    fname = path + 'df_' + str(it) + '.txt'
    np.savetxt(fname, df, delimiter='\t')
    
    return X, y, y_obs

In [13]:
n=600
ntest = 100
p = 100
X_test, y_test, y_test_obs = generate_data(n = ntest, it = 999, p= p)
nsim = 101
Brep = 102
y_preds_arr = np.empty((nsim, Brep, n+ntest))
ntrain_epochs = np.empty((nsim, Brep))
val_losses = np.empty((nsim, Brep))

In [19]:
def one_split(X, y, X_test, y_test, # model,
              it = 1, b = 1, 
              th_val_loss = 0.3, 
              test_size=0.5,
              model_dense_layer_1_activation='relu', 
              model_dense_layer_2_activation='relu',
              early_stop_patience = 100, early_stop_min_delta = 0.001, 
              model_epochs=200, model_batch_size=32, model_loss='mse',
              model_optimizer='adam', 
              verbose=False,
              plot_res = False,
              del_weights = True):

    p = X.shape[1]
    n = len(y)
    ntest = len(y_test)

    # Split the data into training and testing sets
    indices = range(n)

    X_train, X_val, y_train, y_val, indices_train, indices_val = train_test_split(X, y, indices,
                                                                                  test_size = test_size,
                                                                                  random_state = 1000*it+b) 


    es = EarlyStopping(monitor='val_loss', min_delta=early_stop_min_delta, 
                       patience=early_stop_patience, 
                       verbose=0, mode='min',
                       restore_best_weights = True)
    

    # Define and train the neural network
    model = Sequential()
    model.add(Dense(128, activation=model_dense_layer_1_activation, input_shape=(p,)))
    model.add(Dropout(0.5))
    model.add(Dense(64, activation=model_dense_layer_2_activation))
    model.add(Dropout(0.5))
    model.add(Dense(1, activation='linear'))
    optimizer = tf.keras.optimizers.Adam(learning_rate=0.01)
    model.compile(optimizer=optimizer, loss='mse')


    history = model.fit(X_train, y_train, validation_data=(X_val, y_val), 
                        epochs=model_epochs, batch_size=32,
                        callbacks=[es],  verbose = verbose)
    
    val_loss = min(history.history['val_loss'])
    train_loss = min(history.history['loss'])
    train_epochs = len(history.history['val_loss'])

    y_pred = model.predict(X_test)
    y_val_pred = model.predict(X_val)

    y_vec = np.zeros(n+ntest)
    y_vec[indices_val] = y_val_pred.flatten()
    y_vec[-ntest:] = y_pred.flatten()
    
    # if plot_res:
    #     # Plot the training and validation errors versus epochs
    #     plt.plot(history.history['loss'], label='Training Error')
    #     plt.plot(history.history['val_loss'], label='Validation Error')
    #     plt.xlabel('Epochs')
    #     plt.ylabel('Error')
    #     plt.legend()

    #     # Save the plot to a file
    #     plt.savefig('training_errs.png')
    #     plt.show()


    
    return train_loss, val_loss, y_vec, train_epochs

In [None]:
for it in range(nsim):
    
    gc.collect()

    start_time = time.time()
    print(it)
    # create_folder(it)
    X, y, y_obs = generate_data(n, it)
    
    n_epochs = []
    val_loss_vec = []
    y_mat = np.zeros((0, n+ntest))
    for b in range(Brep):

        train_loss, val_loss, y_vec, train_epochs = one_split(X, y_obs, X_test, y_test, it = it, b = b)                        
        print(train_epochs)
        # Separate val_loss and y_vec into two separate lists
        # val_losses = [result[0] for result in results]
        # y_vecs = [result[1] for result in results]

        val_loss_vec.append(val_loss)
        n_epochs.append(train_epochs)
        y_mat = np.vstack((y_mat, y_vec ) ) 

    # y_mat = np.array(y_vecs)
    # print(f"y mat shape is {y_mat.shape}")
    print(sum(n_epochs)/len(n_epochs))
    
    y_preds_arr[it, :, :] = y_mat
    val_losses[it, :] = val_loss_vec
    ntrain_epochs[it, :] = n_epochs

    if it % 10 == 1:
        
        print(val_losses[it, :])
        print(np.mean(ntrain_epochs, axis=1))

        np.save(path+'_val_losses.npy', val_losses)
        np.save(path+'_ntrain_epochs.npy', ntrain_epochs)
        
        np.save(path+'_y_preds_arr.npy', y_preds_arr)



    end_time = time.time()
    elapsed_time = end_time - start_time
    print(f"Elapsed time: {elapsed_time:.2f} seconds")

0
170
154
132
168
134
129
129
135
188
137
129
130
127
140
184
125
159
122
132
123
117
126
151
127
117
154
133
132
128
101
135
163
132
133
125
119
139
200
145
171
134
124
200
122
122
187
137
152
120
126
129
136
122
101
120
164
131
175
124
131
121
153
114
148
200
200
122
127
128
175
162
125
147
126
124
156
143
141
131
133
128
200
109
134
130
122
162
135
117
136
142
129
130
132
141
136
120
125
139
200
128
144
139.87254901960785
Elapsed time: 708.16 seconds
1
153
132
130
172
124
124
123
145
132
126
200
134
127
127
131
123
122
200
124
129
125
119
126
122
148
126
134
148
142
200
136
159
159
114
139
127
140
129
140
156
122
118
119
140
122
128
128
129
130
138
194
141
128
132
125
184
128
124
128
127
175
148
121
133
119
139
137
176
143
125
199
128
121
118
111
101
138
135
127
183
119
126
164
139
139
168
118
135
124
119
149
139
132
123
142
139
126
138
136
132
183
140
137.83333333333334
[2.11630249 2.78354931 2.71325111 2.95798135 2.7269125  2.74558663
 2.70827198 2.15484595 2.80142903 3.10838437 1