In [16]:
import pandas as pd
import tensorflow as tf 
import numpy as np

from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from tensorflow.keras.callbacks import EarlyStopping
from sklearn.model_selection import RandomizedSearchCV
#from pandas_profiling import ProfileReport

In [4]:
df = pd.read_csv('all_train.csv')

In [5]:
#Install the below libaries before importing

#EDA using pandas-profiling
#profile = ProfileReport(pd.read_csv('all_train.csv'), explorative=True)

#Saving results to a HTML file
#profile.to_file("output.html")


### EDA is complete in the output file. There is no missing data. The response variable, label, is pretty closeley even, 3500879 to 3499121.

In [6]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 7000000 entries, 0 to 6999999
Data columns (total 29 columns):
 #   Column   Dtype  
---  ------   -----  
 0   # label  float64
 1   f0       float64
 2   f1       float64
 3   f2       float64
 4   f3       float64
 5   f4       float64
 6   f5       float64
 7   f6       float64
 8   f7       float64
 9   f8       float64
 10  f9       float64
 11  f10      float64
 12  f11      float64
 13  f12      float64
 14  f13      float64
 15  f14      float64
 16  f15      float64
 17  f16      float64
 18  f17      float64
 19  f18      float64
 20  f19      float64
 21  f20      float64
 22  f21      float64
 23  f22      float64
 24  f23      float64
 25  f24      float64
 26  f25      float64
 27  f26      float64
 28  mass     float64
dtypes: float64(29)
memory usage: 1.5 GB


In [7]:
# change # label to "response"
df.rename(columns ={'# label': 'response'}, inplace=True)
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 7000000 entries, 0 to 6999999
Data columns (total 29 columns):
 #   Column    Dtype  
---  ------    -----  
 0   response  float64
 1   f0        float64
 2   f1        float64
 3   f2        float64
 4   f3        float64
 5   f4        float64
 6   f5        float64
 7   f6        float64
 8   f7        float64
 9   f8        float64
 10  f9        float64
 11  f10       float64
 12  f11       float64
 13  f12       float64
 14  f13       float64
 15  f14       float64
 16  f15       float64
 17  f16       float64
 18  f17       float64
 19  f18       float64
 20  f19       float64
 21  f20       float64
 22  f21       float64
 23  f22       float64
 24  f23       float64
 25  f24       float64
 26  f25       float64
 27  f26       float64
 28  mass      float64
dtypes: float64(29)
memory usage: 1.5 GB


### Split Data into test and train while scaling

In [12]:
scaler = StandardScaler()

X = df.loc[:, df.columns != 'response'].values

y = df['response'].values

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.20, random_state=42)

scaler.fit(X_train)

X_train = scaler.transform(X_train)
X_test = scaler.transform(X_test)

### Grid Search CV

In [14]:
log_root = os.path.join(os.curdir, "my_training_logs")

def get_log_dir():
    import time
    timestamp = time.strftime("run_%Y_%m_%d-%H_%M_%S")
    return os.path.join(log_root, timestamp)

log_dir = get_log_dir()

# # Print current working directory and log directory
# print(os.getcwd())
# print(log_dir)

tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir)

def create_model(num_hidden_layers=1, num_neurons=30, learning_rate=3e-3, input_shape=[28]):
    model = tf.keras.models.Sequential()
    model.add(tf.keras.layers.InputLayer(input_shape=input_shape))
    for layer in range(num_hidden_layers):
        model.add(tf.keras.layers.Dense(num_neurons, activation="relu"))
        model.add(tf.keras.layers.Dropout(.2, input_shape=(2,)))
    model.add(tf.keras.layers.Dense(1, activation="sigmoid"))
    optimizer = tf.keras.optimizers.Adam()
    loss = tf.keras.losses.BinaryCrossentropy()
    model.compile(loss=loss, optimizer=optimizer, metrics=['accuracy'])
    return model

regressor = tf.keras.wrappers.scikit_learn.KerasRegressor(create_model)

param_distributions = {
    "num_hidden_layers": [0, 1, 2, 3],
    "num_neurons": np.arange(1, 500)
}

early_stop = EarlyStopping(monitor='val_loss', patience=3, min_delta=2e-4)

random_search_cv = RandomizedSearchCV(regressor, param_distributions, n_iter=5, cv=3)
random_search_cv.fit(X_train, y_train, epochs=500,
                      validation_data=(X_test, y_test),
                      callbacks=[tensorboard_callback, early_stop], batch_size=1000)


  regressor = tf.keras.wrappers.scikit_learn.KerasRegressor(create_model)


Epoch 1/500
Epoch 2/500
Epoch 3/500
Epoch 4/500
Epoch 5/500
Epoch 1/500
Epoch 2/500
Epoch 3/500
Epoch 4/500
Epoch 1/500
Epoch 2/500
Epoch 3/500
Epoch 4/500
Epoch 5/500
Epoch 6/500
Epoch 1/500
Epoch 2/500
Epoch 3/500
Epoch 4/500
Epoch 5/500
Epoch 1/500
Epoch 2/500
Epoch 3/500
Epoch 4/500
Epoch 5/500
Epoch 6/500
Epoch 7/500
Epoch 1/500
Epoch 2/500
Epoch 3/500
Epoch 4/500
Epoch 5/500
Epoch 6/500
Epoch 7/500
Epoch 1/500
Epoch 2/500
Epoch 3/500
Epoch 4/500
Epoch 5/500
Epoch 6/500
Epoch 7/500
Epoch 8/500
Epoch 9/500
Epoch 10/500
Epoch 11/500
Epoch 12/500
Epoch 13/500
Epoch 14/500
Epoch 15/500
Epoch 16/500
Epoch 17/500
Epoch 18/500
Epoch 19/500


Epoch 20/500
Epoch 1/500
Epoch 2/500
Epoch 3/500
Epoch 4/500
Epoch 5/500
Epoch 6/500
Epoch 7/500
Epoch 8/500
Epoch 9/500
Epoch 10/500
Epoch 11/500
Epoch 12/500
Epoch 13/500
Epoch 14/500
Epoch 15/500
Epoch 16/500
Epoch 1/500
Epoch 2/500
Epoch 3/500
Epoch 4/500
Epoch 5/500
Epoch 6/500
Epoch 7/500
Epoch 8/500
Epoch 9/500
Epoch 10/500
Epoch 11/500
Epoch 12/500
Epoch 13/500
Epoch 14/500
Epoch 15/500
Epoch 16/500
Epoch 17/500
Epoch 18/500
Epoch 1/500
Epoch 2/500
Epoch 3/500
Epoch 4/500
Epoch 5/500
Epoch 6/500
Epoch 7/500
Epoch 8/500
Epoch 1/500
Epoch 2/500
Epoch 3/500
Epoch 4/500
Epoch 5/500
Epoch 6/500
Epoch 7/500
Epoch 8/500
Epoch 9/500
Epoch 10/500


Epoch 11/500
Epoch 1/500
Epoch 2/500
Epoch 3/500
Epoch 4/500
Epoch 5/500
Epoch 6/500
Epoch 7/500
Epoch 8/500
Epoch 9/500
Epoch 10/500
Epoch 11/500
Epoch 1/500
Epoch 2/500
Epoch 3/500
Epoch 4/500
Epoch 5/500
Epoch 6/500
Epoch 7/500
Epoch 8/500
Epoch 9/500
Epoch 10/500
Epoch 11/500
Epoch 12/500
Epoch 13/500
Epoch 14/500
Epoch 15/500
Epoch 16/500
Epoch 17/500
Epoch 1/500
Epoch 2/500
Epoch 3/500
Epoch 4/500
Epoch 5/500
Epoch 6/500
Epoch 7/500
Epoch 8/500
Epoch 9/500
Epoch 10/500
Epoch 11/500
Epoch 12/500
Epoch 13/500
Epoch 14/500
Epoch 15/500
Epoch 16/500
Epoch 17/500
Epoch 18/500
Epoch 19/500
Epoch 20/500
Epoch 21/500
Epoch 1/500
Epoch 2/500
Epoch 3/500


Epoch 4/500
Epoch 5/500
Epoch 6/500
Epoch 7/500
Epoch 8/500
Epoch 9/500
Epoch 10/500
Epoch 11/500
Epoch 12/500
Epoch 13/500
Epoch 14/500
Epoch 15/500
Epoch 16/500
Epoch 17/500
Epoch 1/500
Epoch 2/500
Epoch 3/500
Epoch 4/500
Epoch 5/500
Epoch 6/500
Epoch 7/500
Epoch 8/500
Epoch 9/500
Epoch 10/500
Epoch 11/500
Epoch 12/500
Epoch 13/500
Epoch 14/500
Epoch 15/500
Epoch 16/500
Epoch 17/500
Epoch 18/500
Epoch 19/500
Epoch 20/500


In [18]:
print(random_search_cv.best_params_)
print(random_search_cv.best_score_)

{'num_neurons': 343, 'num_hidden_layers': 2}
-0.2609912157058716
