# Dense Neural Network on Binary Classification with Hyperparameter Tuning

Galen Wilkerson

In [323]:
import pandas as pd

import numpy as np

import matplotlib.pyplot as plt

%matplotlib notebook

from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split


In [324]:
df = pd.read_csv('./Train.csv')

## Understand data

In [325]:
df.shape

(147072, 11)

In [326]:
df.describe()

Unnamed: 0,variable_0,variable_1,variable_2,variable_3,variable_4,variable_5,variable_6,variable_7,variable_8,variable_9,class_col
count,100000.0,100000.0,100000.0,100000.0,100000.0,100000.0,100000.0,100000.0,100000.0,100000.0,100000.0
mean,12.107321,0.013599,0.43397,500.308938,0.167428,0.079292,0.27734,0.010735,0.25183,0.74036,0.06651
std,528.035137,0.671062,4.324017,2181.510949,1.843019,0.860352,4.301489,0.571463,4.287297,1.108458,0.249173
min,-0.291358,-2.363636,0.0,-0.528734,-0.744872,-1.333333,0.0,-0.5,0.0,0.0,0.0
25%,-0.234323,-0.5,0.0,-0.275459,-0.471368,-0.5,0.0,-0.5,0.0,0.0,0.0
50%,0.001486,0.0,0.0,0.000119,0.00453,0.0,0.0,0.0,0.0,0.0,0.0
75%,0.759917,0.5,0.0,0.721373,0.520085,0.5,0.0,0.5,0.0,1.0,0.0
max,95823.65222,2.590909,98.0,318121.7754,305.932051,8.333333,98.0,26.5,98.0,20.0,1.0


In [327]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 147072 entries, 0 to 147071
Data columns (total 11 columns):
variable_0    100000 non-null float64
variable_1    100000 non-null float64
variable_2    100000 non-null float64
variable_3    100000 non-null float64
variable_4    100000 non-null float64
variable_5    100000 non-null float64
variable_6    100000 non-null float64
variable_7    100000 non-null float64
variable_8    100000 non-null float64
variable_9    100000 non-null float64
class_col     100000 non-null float64
dtypes: float64(11)
memory usage: 12.3 MB


In [328]:
df.class_col.unique()

array([  0.,   1.,  nan])

### Take a look at the data distribution, including balance of classes

In [336]:
df.hist(bins=30);

<IPython.core.display.Javascript object>

In [330]:
df

Unnamed: 0,variable_0,variable_1,variable_2,variable_3,variable_4,variable_5,variable_6,variable_7,variable_8,variable_9,class_col
0,1.616623,-0.909091,3.0,-0.041629,0.284188,0.166667,0.0,0.0,1.0,2.0,0.0
1,-0.029310,1.500000,0.0,0.551353,-0.515812,0.166667,0.0,-0.5,0.0,0.0,0.0
2,1.598362,0.227273,0.0,-0.280171,-0.385897,-1.166667,0.0,-0.5,0.0,0.0,0.0
3,0.355677,0.954545,0.0,-0.101158,0.158718,-0.333333,0.0,0.0,0.0,0.0,0.0
4,-0.291358,0.590909,0.0,-0.299727,0.696838,0.000000,0.0,0.5,0.0,0.0,0.0
5,-0.014016,0.000000,0.0,1.195264,0.537179,1.333333,0.0,1.0,0.0,1.0,0.0
6,-0.130741,0.227273,0.0,-0.518843,0.451709,-0.833333,0.0,-0.5,0.0,0.0,0.0
7,0.427274,0.045455,0.0,0.046668,0.810684,1.333333,0.0,0.5,0.0,2.0,0.0
8,-0.257348,0.545455,0.0,3874.367005,-0.744872,-0.166667,0.0,0.0,0.0,0.0,0.0
9,-0.257157,1.318182,0.0,-0.390526,0.896496,-0.166667,0.0,-0.5,0.0,0.0,0.0


## Are there NaN values, and where?

In [331]:
df.isna().sum()

variable_0    47072
variable_1    47072
variable_2    47072
variable_3    47072
variable_4    47072
variable_5    47072
variable_6    47072
variable_7    47072
variable_8    47072
variable_9    47072
class_col     47072
dtype: int64

In [332]:
vals = df.isna().values.astype(int)
vals

array([[0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       ..., 
       [1, 1, 1, ..., 1, 1, 1],
       [1, 1, 1, ..., 1, 1, 1],
       [1, 1, 1, ..., 1, 1, 1]])

In [333]:
df_vals = pd.DataFrame(vals)
df_vals

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10
0,0,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,0
5,0,0,0,0,0,0,0,0,0,0,0
6,0,0,0,0,0,0,0,0,0,0,0
7,0,0,0,0,0,0,0,0,0,0,0
8,0,0,0,0,0,0,0,0,0,0,0
9,0,0,0,0,0,0,0,0,0,0,0


In [334]:
df_vals.shape

(147072, 11)

In [335]:
# Try to understand where the NaN values occur
df_vals.plot();

<IPython.core.display.Javascript object>

## Clean up NaN values

In [284]:
# since the NaN values occur in a block in the latter part of the data, we can just drop them
df.dropna(inplace=True)

In [285]:
df.shape

(100000, 11)

In [286]:
df

Unnamed: 0,variable_0,variable_1,variable_2,variable_3,variable_4,variable_5,variable_6,variable_7,variable_8,variable_9,class_col
0,1.616623,-0.909091,3.0,-0.041629,0.284188,0.166667,0.0,0.0,1.0,2.0,0.0
1,-0.029310,1.500000,0.0,0.551353,-0.515812,0.166667,0.0,-0.5,0.0,0.0,0.0
2,1.598362,0.227273,0.0,-0.280171,-0.385897,-1.166667,0.0,-0.5,0.0,0.0,0.0
3,0.355677,0.954545,0.0,-0.101158,0.158718,-0.333333,0.0,0.0,0.0,0.0,0.0
4,-0.291358,0.590909,0.0,-0.299727,0.696838,0.000000,0.0,0.5,0.0,0.0,0.0
5,-0.014016,0.000000,0.0,1.195264,0.537179,1.333333,0.0,1.0,0.0,1.0,0.0
6,-0.130741,0.227273,0.0,-0.518843,0.451709,-0.833333,0.0,-0.5,0.0,0.0,0.0
7,0.427274,0.045455,0.0,0.046668,0.810684,1.333333,0.0,0.5,0.0,2.0,0.0
8,-0.257348,0.545455,0.0,3874.367005,-0.744872,-0.166667,0.0,0.0,0.0,0.0,0.0
9,-0.257157,1.318182,0.0,-0.390526,0.896496,-0.166667,0.0,-0.5,0.0,0.0,0.0


In [287]:
df.describe()

Unnamed: 0,variable_0,variable_1,variable_2,variable_3,variable_4,variable_5,variable_6,variable_7,variable_8,variable_9,class_col
count,100000.0,100000.0,100000.0,100000.0,100000.0,100000.0,100000.0,100000.0,100000.0,100000.0,100000.0
mean,12.107321,0.013599,0.43397,500.308938,0.167428,0.079292,0.27734,0.010735,0.25183,0.74036,0.06651
std,528.035137,0.671062,4.324017,2181.510949,1.843019,0.860352,4.301489,0.571463,4.287297,1.108458,0.249173
min,-0.291358,-2.363636,0.0,-0.528734,-0.744872,-1.333333,0.0,-0.5,0.0,0.0,0.0
25%,-0.234323,-0.5,0.0,-0.275459,-0.471368,-0.5,0.0,-0.5,0.0,0.0,0.0
50%,0.001486,0.0,0.0,0.000119,0.00453,0.0,0.0,0.0,0.0,0.0,0.0
75%,0.759917,0.5,0.0,0.721373,0.520085,0.5,0.0,0.5,0.0,1.0,0.0
max,95823.65222,2.590909,98.0,318121.7754,305.932051,8.333333,98.0,26.5,98.0,20.0,1.0


In [288]:
df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 100000 entries, 0 to 99999
Data columns (total 11 columns):
variable_0    100000 non-null float64
variable_1    100000 non-null float64
variable_2    100000 non-null float64
variable_3    100000 non-null float64
variable_4    100000 non-null float64
variable_5    100000 non-null float64
variable_6    100000 non-null float64
variable_7    100000 non-null float64
variable_8    100000 non-null float64
variable_9    100000 non-null float64
class_col     100000 non-null float64
dtypes: float64(11)
memory usage: 9.2 MB


In [289]:
df.isna().sum()

variable_0    0
variable_1    0
variable_2    0
variable_3    0
variable_4    0
variable_5    0
variable_6    0
variable_7    0
variable_8    0
variable_9    0
class_col     0
dtype: int64

In [290]:
df.hist(bins = 30);

<IPython.core.display.Javascript object>

## Prepare data for machine learning (scale, center), pop y

In [307]:
y = df.pop('class_col').astype(int).values

In [308]:
y

array([0, 0, 0, ..., 0, 0, 0])

In [341]:
# understand class imbalance
(y == 0).sum() / len(y)

0.93349000000000004

In [342]:
(y == 1).sum() / len(y)

0.06651

In [309]:
df

Unnamed: 0,variable_0,variable_1,variable_2,variable_3,variable_4,variable_5,variable_6,variable_7,variable_8,variable_9
0,1.616623,-0.909091,3.0,-0.041629,0.284188,0.166667,0.0,0.0,1.0,2.0
1,-0.029310,1.500000,0.0,0.551353,-0.515812,0.166667,0.0,-0.5,0.0,0.0
2,1.598362,0.227273,0.0,-0.280171,-0.385897,-1.166667,0.0,-0.5,0.0,0.0
3,0.355677,0.954545,0.0,-0.101158,0.158718,-0.333333,0.0,0.0,0.0,0.0
4,-0.291358,0.590909,0.0,-0.299727,0.696838,0.000000,0.0,0.5,0.0,0.0
5,-0.014016,0.000000,0.0,1.195264,0.537179,1.333333,0.0,1.0,0.0,1.0
6,-0.130741,0.227273,0.0,-0.518843,0.451709,-0.833333,0.0,-0.5,0.0,0.0
7,0.427274,0.045455,0.0,0.046668,0.810684,1.333333,0.0,0.5,0.0,2.0
8,-0.257348,0.545455,0.0,3874.367005,-0.744872,-0.166667,0.0,0.0,0.0,0.0
9,-0.257157,1.318182,0.0,-0.390526,0.896496,-0.166667,0.0,-0.5,0.0,0.0


In [314]:
from sklearn.preprocessing import scale

In [315]:
X = df.values

In [316]:
X = scale(X)

In [319]:
df_temp = pd.DataFrame(X)
df_temp.describe()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9
count,100000.0,100000.0,100000.0,100000.0,100000.0,100000.0,100000.0,100000.0,100000.0,100000.0
mean,7.058278e-18,3.296186e-16,3.272753e-15,-1.817296e-17,-8.086143000000001e-17,7.357448e-18,8.474457e-16,3.115435e-16,2.559908e-15,-6.4726e-18
std,1.000005,1.000005,1.000005,1.000005,1.000005,1.000005,1.000005,1.000005,1.000005,1.000005
min,-0.0234809,-3.542518,-0.1003632,-0.2295841,-0.4950055,-1.641924,-0.06447567,-0.8937361,-0.05873894,-0.6679221
25%,-0.02337289,-0.765357,-0.1003632,-0.229468,-0.3466045,-0.673323,-0.06447567,-0.8937361,-0.05873894,-0.6679221
50%,-0.02292631,-0.02026514,-0.1003632,-0.2293416,-0.08838695,-0.09216239,-0.06447567,-0.0187852,-0.05873894,-0.6679221
75%,-0.02148997,0.7248267,-0.1003632,-0.229011,0.1913488,0.4889982,-0.06447567,0.8561657,-0.05873894,0.2342365
max,181.4501,3.840666,22.56386,145.5977,165.9051,9.593848,22.71845,46.35361,22.7996,17.37525


In [321]:
df_temp.hist(bins = 30);

<IPython.core.display.Javascript object>

In [38]:
X

array([[-0.01986752, -1.37497766,  0.59343952, ..., -0.0187852 ,
         0.17450943,  1.13639505],
       [-0.02298463,  2.21501051, -0.10036319, ..., -0.8937361 ,
        -0.05873894, -0.66792214],
       [-0.01990211,  0.31841298, -0.10036319, ..., -0.8937361 ,
        -0.05873894, -0.66792214],
       ..., 
       [-0.0226886 ,  0.38614861, -0.10036319, ..., -0.0187852 ,
        -0.05873894,  2.03855365],
       [-0.02290489, -1.10403516, -0.10036319, ..., -0.0187852 ,
        -0.05873894, -0.66792214],
       [-0.02236639,  1.1312405 , -0.10036319, ..., -0.0187852 ,
        -0.05873894, -0.66792214]])

## Prepare test data in same way

In [39]:
df_test = pd.read_csv('./Eval.csv')

df_test

Unnamed: 0,variable_0,variable_1,variable_2,variable_3,variable_4,variable_5,variable_6,variable_7,variable_8,variable_9
0,-0.235544,0.090909,0,-0.408420,0.759402,0.000000,0,-0.5,0,0
1,-0.273905,0.681818,1,-0.526768,0.132906,-0.666667,0,-0.5,0,0
2,0.258914,0.045455,0,0.035590,-0.097863,0.333333,0,0.0,0,2
3,-0.291358,0.227273,0,-0.520809,0.032906,-0.500000,0,0.0,0,0
4,1.060935,-0.681818,0,-0.210981,-0.232051,-0.833333,0,-0.5,0,4
5,-0.291358,-0.136364,0,-0.479704,-0.317521,-1.000000,0,-0.5,0,0
6,-0.290537,1.136364,0,-0.528734,-0.744872,-0.666667,0,-0.5,0,0
7,-0.075091,-0.772727,0,0.533724,0.430342,-0.500000,0,0.0,0,0
8,1.598362,1.045455,0,3.799147,-0.744872,-0.833333,0,-0.5,0,0
9,-0.140467,0.318182,0,-0.138854,0.670513,0.166667,0,0.0,0,0


In [40]:
df_test.isna().sum()

variable_0    0
variable_1    0
variable_2    0
variable_3    0
variable_4    0
variable_5    0
variable_6    0
variable_7    0
variable_8    0
variable_9    0
dtype: int64

In [41]:
X_test = scale(df_test.values)

## Split into training, validation sets (this is not needed for all methods)

In [42]:

X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=1)

## Build Baseline Neural Network

In [43]:
from keras import models
from keras import layers
from keras import losses
from keras import metrics
from keras import optimizers

model = models.Sequential()
model.add(layers.Dense(16, activation='relu', input_shape=(10,)))
model.add(layers.Dense(16, activation='relu'))
model.add(layers.Dense(1, activation='sigmoid'))

In [44]:
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_4 (Dense)              (None, 16)                176       
_________________________________________________________________
dense_5 (Dense)              (None, 16)                272       
_________________________________________________________________
dense_6 (Dense)              (None, 1)                 17        
Total params: 465
Trainable params: 465
Non-trainable params: 0
_________________________________________________________________


In [97]:
num_epochs = 300

model.compile(optimizer='rmsprop',
              loss='binary_crossentropy',
              metrics=['acc'])

history = model.fit(X_train,
                    y_train,
                    epochs=num_epochs,
                    batch_size=1024,
                    validation_data=(X_val, y_val))

Train on 80000 samples, validate on 20000 samples
Epoch 1/300
Epoch 2/300
Epoch 3/300
Epoch 4/300
Epoch 5/300
Epoch 6/300
Epoch 7/300
Epoch 8/300
Epoch 9/300
Epoch 10/300
Epoch 11/300
Epoch 12/300
Epoch 13/300
Epoch 14/300
Epoch 15/300
Epoch 16/300
Epoch 17/300
Epoch 18/300
Epoch 19/300
Epoch 20/300
Epoch 21/300
Epoch 22/300
Epoch 23/300
Epoch 24/300
Epoch 25/300
Epoch 26/300
Epoch 27/300
Epoch 28/300
Epoch 29/300
Epoch 30/300
Epoch 31/300
Epoch 32/300
Epoch 33/300
Epoch 34/300
Epoch 35/300
Epoch 36/300
Epoch 37/300
Epoch 38/300
Epoch 39/300
Epoch 40/300
Epoch 41/300
Epoch 42/300
Epoch 43/300
Epoch 44/300
Epoch 45/300
Epoch 46/300
Epoch 47/300
Epoch 48/300
Epoch 49/300
Epoch 50/300
Epoch 51/300
Epoch 52/300
Epoch 53/300
Epoch 54/300
Epoch 55/300
Epoch 56/300
Epoch 57/300
Epoch 58/300
Epoch 59/300
Epoch 60/300
Epoch 61/300
Epoch 62/300
Epoch 63/300
Epoch 64/300
Epoch 65/300
Epoch 66/300
Epoch 67/300
Epoch 68/300
Epoch 69/300
Epoch 70/300
Epoch 71/300
Epoch 72/300
Epoch 73/300
Epoch 74/3

Epoch 127/300
Epoch 128/300
Epoch 129/300
Epoch 130/300
Epoch 131/300
Epoch 132/300
Epoch 133/300
Epoch 134/300
Epoch 135/300
Epoch 136/300
Epoch 137/300
Epoch 138/300
Epoch 139/300
Epoch 140/300
Epoch 141/300
Epoch 142/300
Epoch 143/300
Epoch 144/300
Epoch 145/300
Epoch 146/300
Epoch 147/300
Epoch 148/300
Epoch 149/300
Epoch 150/300
Epoch 151/300
Epoch 152/300
Epoch 153/300
Epoch 154/300
Epoch 155/300
Epoch 156/300
Epoch 157/300
Epoch 158/300
Epoch 159/300
Epoch 160/300
Epoch 161/300
Epoch 162/300
Epoch 163/300
Epoch 164/300
Epoch 165/300
Epoch 166/300
Epoch 167/300
Epoch 168/300
Epoch 169/300
Epoch 170/300
Epoch 171/300
Epoch 172/300
Epoch 173/300
Epoch 174/300
Epoch 175/300
Epoch 176/300
Epoch 177/300
Epoch 178/300
Epoch 179/300
Epoch 180/300
Epoch 181/300
Epoch 182/300
Epoch 183/300
Epoch 184/300
Epoch 185/300
Epoch 186/300
Epoch 187/300
Epoch 188/300
Epoch 189/300
Epoch 190/300
Epoch 191/300
Epoch 192/300
Epoch 193/300
Epoch 194/300
Epoch 195/300
Epoch 196/300
Epoch 197/300
Epoch 

Epoch 253/300
Epoch 254/300
Epoch 255/300
Epoch 256/300
Epoch 257/300
Epoch 258/300
Epoch 259/300
Epoch 260/300
Epoch 261/300
Epoch 262/300
Epoch 263/300
Epoch 264/300
Epoch 265/300
Epoch 266/300
Epoch 267/300
Epoch 268/300
Epoch 269/300
Epoch 270/300
Epoch 271/300
Epoch 272/300
Epoch 273/300
Epoch 274/300
Epoch 275/300
Epoch 276/300
Epoch 277/300
Epoch 278/300
Epoch 279/300
Epoch 280/300
Epoch 281/300
Epoch 282/300
Epoch 283/300
Epoch 284/300
Epoch 285/300
Epoch 286/300
Epoch 287/300
Epoch 288/300
Epoch 289/300
Epoch 290/300
Epoch 291/300
Epoch 292/300
Epoch 293/300
Epoch 294/300
Epoch 295/300
Epoch 296/300
Epoch 297/300
Epoch 298/300
Epoch 299/300
Epoch 300/300


In [100]:
def plot_loss(history, num):
    '''
    input: history from training keras sequential model (with validation data)
    output:  plot validation and training loss
    return:  tuple (list of training loss, list of validation loss)
    '''
    history_dict = history.history
    loss_values = history_dict['loss']
    val_loss_values = history_dict['val_loss']
    epochs = range(1, num_epochs+1)

    plt.figure()
    plt.plot(epochs, loss_values, 'bo', label='Training loss')
    plt.plot(epochs, val_loss_values, 'b', label='Validation loss')
    plt.title('Training and validation loss')
    plt.xlabel('Epochs')
    plt.ylabel('Loss')
    plt.legend()
    plt.show()
    
    return(loss_values, val_loss_values)

In [101]:
loss_values,val_loss_values = plot_loss(history, num_epochs)

<IPython.core.display.Javascript object>

In [102]:
np.min(val_loss_values)

0.1825619833469391

In [103]:
np.argmin(val_loss_values)

208

In [104]:
def plot_accuracy(history, num):
    '''
    input: history from training keras sequential model (with validation data)
    output:  plot validation and training accuracy
    return:  tuple (list of training accuracy, list of validation accuracy)
    '''
    history_dict = history.history
    epochs = range(1, num_epochs+1)
    plt.figure()
    acc_values = history_dict['acc']
    val_acc_values = history_dict['val_acc']
    plt.plot(epochs, acc_values, 'bo', label='Training acc')
    plt.plot(epochs, val_acc_values, 'b', label='Validation acc')
    plt.title('Training and validation accuracy')
    plt.xlabel('Epochs')
    plt.ylabel('Loss')
    plt.legend()
    plt.show()
    return(acc_values, val_acc_values)

In [105]:
acc_values, val_acc_values = plot_accuracy(history, num_epochs)

<IPython.core.display.Javascript object>

In [95]:
np.max(val_acc_values)

0.93689999999999996

In [106]:
np.argmax(val_acc_values)

296

## Tune Hyperparameters

## Only two layers

In [154]:
model = models.Sequential()
model.add(layers.Dense(16, activation='relu', input_shape=(10,)))
model.add(layers.Dense(1, activation='sigmoid'))

num_epochs = 300

model.compile(optimizer='rmsprop',
              loss='binary_crossentropy',
              metrics=['acc'])

model.summary()

history = model.fit(X_train,
                    y_train,
                    epochs=num_epochs,
                    batch_size=1024,
                    validation_data=(X_val, y_val))


_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_19 (Dense)             (None, 16)                176       
_________________________________________________________________
dense_20 (Dense)             (None, 1)                 17        
Total params: 193
Trainable params: 193
Non-trainable params: 0
_________________________________________________________________
Train on 80000 samples, validate on 20000 samples
Epoch 1/300
Epoch 2/300
Epoch 3/300
Epoch 4/300
Epoch 5/300
Epoch 6/300
Epoch 7/300
Epoch 8/300
Epoch 9/300
Epoch 10/300
Epoch 11/300
Epoch 12/300
Epoch 13/300
Epoch 14/300
Epoch 15/300
Epoch 16/300
Epoch 17/300
Epoch 18/300
Epoch 19/300
Epoch 20/300
Epoch 21/300
Epoch 22/300
Epoch 23/300
Epoch 24/300
Epoch 25/300
Epoch 26/300
Epoch 27/300
Epoch 28/300
Epoch 29/300
Epoch 30/300
Epoch 31/300
Epoch 32/300
Epoch 33/300
Epoch 34/300
Epoch 35/300
Epoch 36/300
Epoch 37/300
Epoch 38/300
Epoch

Epoch 123/300
Epoch 124/300
Epoch 125/300
Epoch 126/300
Epoch 127/300
Epoch 128/300
Epoch 129/300
Epoch 130/300
Epoch 131/300
Epoch 132/300
Epoch 133/300
Epoch 134/300
Epoch 135/300
Epoch 136/300
Epoch 137/300
Epoch 138/300
Epoch 139/300
Epoch 140/300
Epoch 141/300
Epoch 142/300
Epoch 143/300
Epoch 144/300
Epoch 145/300
Epoch 146/300
Epoch 147/300
Epoch 148/300
Epoch 149/300
Epoch 150/300
Epoch 151/300
Epoch 152/300
Epoch 153/300
Epoch 154/300
Epoch 155/300
Epoch 156/300
Epoch 157/300
Epoch 158/300
Epoch 159/300
Epoch 160/300
Epoch 161/300
Epoch 162/300
Epoch 163/300
Epoch 164/300
Epoch 165/300
Epoch 166/300
Epoch 167/300
Epoch 168/300
Epoch 169/300
Epoch 170/300
Epoch 171/300
Epoch 172/300
Epoch 173/300
Epoch 174/300
Epoch 175/300
Epoch 176/300
Epoch 177/300
Epoch 178/300
Epoch 179/300
Epoch 180/300
Epoch 181/300
Epoch 182/300
Epoch 183/300
Epoch 184/300
Epoch 185/300
Epoch 186/300
Epoch 187/300
Epoch 188/300
Epoch 189/300
Epoch 190/300
Epoch 191/300
Epoch 192/300
Epoch 193/300
Epoch 

Epoch 248/300
Epoch 249/300
Epoch 250/300
Epoch 251/300
Epoch 252/300
Epoch 253/300
Epoch 254/300
Epoch 255/300
Epoch 256/300
Epoch 257/300
Epoch 258/300
Epoch 259/300
Epoch 260/300
Epoch 261/300
Epoch 262/300
Epoch 263/300
Epoch 264/300
Epoch 265/300
Epoch 266/300
Epoch 267/300
Epoch 268/300
Epoch 269/300
Epoch 270/300
Epoch 271/300
Epoch 272/300
Epoch 273/300
Epoch 274/300
Epoch 275/300
Epoch 276/300
Epoch 277/300
Epoch 278/300
Epoch 279/300
Epoch 280/300
Epoch 281/300
Epoch 282/300
Epoch 283/300
Epoch 284/300
Epoch 285/300
Epoch 286/300
Epoch 287/300
Epoch 288/300
Epoch 289/300
Epoch 290/300
Epoch 291/300
Epoch 292/300
Epoch 293/300
Epoch 294/300
Epoch 295/300
Epoch 296/300
Epoch 297/300
Epoch 298/300
Epoch 299/300
Epoch 300/300


In [155]:
loss_values,val_loss_values = plot_loss(history, num_epochs)

<IPython.core.display.Javascript object>

In [156]:
np.min(val_loss_values)

0.19167156834602356

In [157]:
np.argmin(val_loss_values)

288

In [158]:
acc_values, val_acc_values = plot_accuracy(history, num_epochs)

<IPython.core.display.Javascript object>

In [159]:
np.max(val_acc_values)

0.93645000009536739

In [160]:
np.argmax(val_acc_values)

247

## Smaller network performs almost as well as deeper network

### Try adding dimensions to hidden layer

In [147]:
model = models.Sequential()
model.add(layers.Dense(32, activation='relu', input_shape=(10,)))
model.add(layers.Dense(1, activation='sigmoid'))

num_epochs = 300

model.compile(optimizer='rmsprop',
              loss='binary_crossentropy',
              metrics=['acc'])

model.summary()

history = model.fit(X_train,
                    y_train,
                    epochs=num_epochs,
                    batch_size=1024,
                    validation_data=(X_val, y_val))


_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_17 (Dense)             (None, 32)                352       
_________________________________________________________________
dense_18 (Dense)             (None, 1)                 33        
Total params: 385
Trainable params: 385
Non-trainable params: 0
_________________________________________________________________
Train on 80000 samples, validate on 20000 samples
Epoch 1/300
Epoch 2/300
Epoch 3/300
Epoch 4/300
Epoch 5/300
Epoch 6/300
Epoch 7/300
Epoch 8/300
Epoch 9/300
Epoch 10/300
Epoch 11/300
Epoch 12/300
Epoch 13/300
Epoch 14/300
Epoch 15/300
Epoch 16/300
Epoch 17/300
Epoch 18/300
Epoch 19/300
Epoch 20/300
Epoch 21/300
Epoch 22/300
Epoch 23/300
Epoch 24/300
Epoch 25/300
Epoch 26/300
Epoch 27/300
Epoch 28/300
Epoch 29/300
Epoch 30/300
Epoch 31/300
Epoch 32/300
Epoch 33/300
Epoch 34/300
Epoch 35/300
Epoch 36/300
Epoch 37/300
Epoch 38/300
Epoch

Epoch 123/300
Epoch 124/300
Epoch 125/300
Epoch 126/300
Epoch 127/300
Epoch 128/300
Epoch 129/300
Epoch 130/300
Epoch 131/300
Epoch 132/300
Epoch 133/300
Epoch 134/300
Epoch 135/300
Epoch 136/300
Epoch 137/300
Epoch 138/300
Epoch 139/300
Epoch 140/300
Epoch 141/300
Epoch 142/300
Epoch 143/300
Epoch 144/300
Epoch 145/300
Epoch 146/300
Epoch 147/300
Epoch 148/300
Epoch 149/300
Epoch 150/300
Epoch 151/300
Epoch 152/300
Epoch 153/300
Epoch 154/300
Epoch 155/300
Epoch 156/300
Epoch 157/300
Epoch 158/300
Epoch 159/300
Epoch 160/300
Epoch 161/300
Epoch 162/300
Epoch 163/300
Epoch 164/300
Epoch 165/300
Epoch 166/300
Epoch 167/300
Epoch 168/300
Epoch 169/300
Epoch 170/300
Epoch 171/300
Epoch 172/300
Epoch 173/300
Epoch 174/300
Epoch 175/300
Epoch 176/300
Epoch 177/300
Epoch 178/300
Epoch 179/300
Epoch 180/300
Epoch 181/300
Epoch 182/300
Epoch 183/300
Epoch 184/300
Epoch 185/300
Epoch 186/300
Epoch 187/300
Epoch 188/300
Epoch 189/300
Epoch 190/300
Epoch 191/300
Epoch 192/300
Epoch 193/300
Epoch 

Epoch 249/300
Epoch 250/300
Epoch 251/300
Epoch 252/300
Epoch 253/300
Epoch 254/300
Epoch 255/300
Epoch 256/300
Epoch 257/300
Epoch 258/300
Epoch 259/300
Epoch 260/300
Epoch 261/300
Epoch 262/300
Epoch 263/300
Epoch 264/300
Epoch 265/300
Epoch 266/300
Epoch 267/300
Epoch 268/300
Epoch 269/300
Epoch 270/300
Epoch 271/300
Epoch 272/300
Epoch 273/300
Epoch 274/300
Epoch 275/300
Epoch 276/300
Epoch 277/300
Epoch 278/300
Epoch 279/300
Epoch 280/300
Epoch 281/300
Epoch 282/300
Epoch 283/300
Epoch 284/300
Epoch 285/300
Epoch 286/300
Epoch 287/300
Epoch 288/300
Epoch 289/300
Epoch 290/300
Epoch 291/300
Epoch 292/300
Epoch 293/300
Epoch 294/300
Epoch 295/300
Epoch 296/300
Epoch 297/300
Epoch 298/300
Epoch 299/300
Epoch 300/300


In [148]:
loss_values,val_loss_values = plot_loss(history, num_epochs)

<IPython.core.display.Javascript object>

In [149]:
np.min(val_loss_values)

0.1907979407787323

In [150]:
np.argmin(val_loss_values)

286

In [151]:
acc_values, val_acc_values = plot_accuracy(history, num_epochs)

<IPython.core.display.Javascript object>

In [152]:
np.max(val_acc_values)

0.93725000009536741

In [153]:
np.argmax(val_acc_values)

298

## Finally, let's look at the deeper network with "wider" (higher dimension) hidden layers.  This is also called higher "capacity".

In [161]:
model = models.Sequential()
model.add(layers.Dense(32, activation='relu', input_shape=(10,)))
model.add(layers.Dense(32, activation='relu'))
model.add(layers.Dense(1, activation='sigmoid'))

model.summary()

num_epochs = 300

model.compile(optimizer='rmsprop',
              loss='binary_crossentropy',
              metrics=['acc'])

history = model.fit(X_train,
                    y_train,
                    epochs=num_epochs,
                    batch_size=1024,
                    validation_data=(X_val, y_val))

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_21 (Dense)             (None, 32)                352       
_________________________________________________________________
dense_22 (Dense)             (None, 32)                1056      
_________________________________________________________________
dense_23 (Dense)             (None, 1)                 33        
Total params: 1,441
Trainable params: 1,441
Non-trainable params: 0
_________________________________________________________________
Train on 80000 samples, validate on 20000 samples
Epoch 1/300
Epoch 2/300
Epoch 3/300
Epoch 4/300
Epoch 5/300
Epoch 6/300
Epoch 7/300
Epoch 8/300
Epoch 9/300
Epoch 10/300
Epoch 11/300
Epoch 12/300
Epoch 13/300
Epoch 14/300
Epoch 15/300
Epoch 16/300
Epoch 17/300
Epoch 18/300
Epoch 19/300
Epoch 20/300
Epoch 21/300
Epoch 22/300
Epoch 23/300
Epoch 24/300
Epoch 25/300
Epoch 26/300
Epoch 27/300
Epoch 28/300

Epoch 122/300
Epoch 123/300
Epoch 124/300
Epoch 125/300
Epoch 126/300
Epoch 127/300
Epoch 128/300
Epoch 129/300
Epoch 130/300
Epoch 131/300
Epoch 132/300
Epoch 133/300
Epoch 134/300
Epoch 135/300
Epoch 136/300
Epoch 137/300
Epoch 138/300
Epoch 139/300
Epoch 140/300
Epoch 141/300
Epoch 142/300
Epoch 143/300
Epoch 144/300
Epoch 145/300
Epoch 146/300
Epoch 147/300
Epoch 148/300
Epoch 149/300
Epoch 150/300
Epoch 151/300
Epoch 152/300
Epoch 153/300
Epoch 154/300
Epoch 155/300
Epoch 156/300
Epoch 157/300
Epoch 158/300
Epoch 159/300
Epoch 160/300
Epoch 161/300
Epoch 162/300
Epoch 163/300
Epoch 164/300
Epoch 165/300
Epoch 166/300
Epoch 167/300
Epoch 168/300
Epoch 169/300
Epoch 170/300
Epoch 171/300
Epoch 172/300
Epoch 173/300
Epoch 174/300
Epoch 175/300
Epoch 176/300
Epoch 177/300
Epoch 178/300
Epoch 179/300
Epoch 180/300
Epoch 181/300
Epoch 182/300
Epoch 183/300
Epoch 184/300
Epoch 185/300
Epoch 186/300
Epoch 187/300
Epoch 188/300
Epoch 189/300
Epoch 190/300
Epoch 191/300
Epoch 192/300
Epoch 

Epoch 248/300
Epoch 249/300
Epoch 250/300
Epoch 251/300
Epoch 252/300
Epoch 253/300
Epoch 254/300
Epoch 255/300
Epoch 256/300
Epoch 257/300
Epoch 258/300
Epoch 259/300
Epoch 260/300
Epoch 261/300
Epoch 262/300
Epoch 263/300
Epoch 264/300
Epoch 265/300
Epoch 266/300
Epoch 267/300
Epoch 268/300
Epoch 269/300
Epoch 270/300
Epoch 271/300
Epoch 272/300
Epoch 273/300
Epoch 274/300
Epoch 275/300
Epoch 276/300
Epoch 277/300
Epoch 278/300
Epoch 279/300
Epoch 280/300
Epoch 281/300
Epoch 282/300
Epoch 283/300
Epoch 284/300
Epoch 285/300
Epoch 286/300
Epoch 287/300
Epoch 288/300
Epoch 289/300
Epoch 290/300
Epoch 291/300
Epoch 292/300
Epoch 293/300
Epoch 294/300
Epoch 295/300
Epoch 296/300
Epoch 297/300
Epoch 298/300
Epoch 299/300
Epoch 300/300


In [162]:
loss_values,val_loss_values = plot_loss(history, num_epochs)

<IPython.core.display.Javascript object>

In [163]:
np.min(val_loss_values)

0.19068776938915252

In [164]:
np.argmin(val_loss_values)

148

In [165]:
acc_values, val_acc_values = plot_accuracy(history, num_epochs)

<IPython.core.display.Javascript object>

In [166]:
np.max(val_acc_values)

0.93665000076293947

In [167]:
np.argmax(val_acc_values)

103

## We have achieved a slightly lower validation accuracy, so let's use the previous model

## With more time and computing power, we can grid-search these hyper-parameters either using for loops or sklearn.model_selection.GridSearchCV

## In the beginning, it is useful to carefully do this by hand, to understand and control hyperparameters

## * ~BEST MODEL~ *

In [175]:
model = models.Sequential()
model.add(layers.Dense(64, activation='relu', input_shape=(10,)))
model.add(layers.Dense(1, activation='sigmoid'))

num_epochs = 300

model.compile(optimizer='rmsprop',
              loss='binary_crossentropy',
              metrics=['acc'])

model.summary()

history = model.fit(X_train,
                    y_train,
                    epochs=num_epochs,
                    batch_size=1024,
                    validation_data=(X_val, y_val))

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_26 (Dense)             (None, 64)                704       
_________________________________________________________________
dense_27 (Dense)             (None, 1)                 65        
Total params: 769
Trainable params: 769
Non-trainable params: 0
_________________________________________________________________
Train on 80000 samples, validate on 20000 samples
Epoch 1/300
Epoch 2/300
Epoch 3/300
Epoch 4/300
Epoch 5/300
Epoch 6/300
Epoch 7/300
Epoch 8/300
Epoch 9/300
Epoch 10/300
Epoch 11/300
Epoch 12/300
Epoch 13/300
Epoch 14/300
Epoch 15/300
Epoch 16/300
Epoch 17/300
Epoch 18/300
Epoch 19/300
Epoch 20/300
Epoch 21/300
Epoch 22/300
Epoch 23/300
Epoch 24/300
Epoch 25/300
Epoch 26/300
Epoch 27/300
Epoch 28/300
Epoch 29/300
Epoch 30/300
Epoch 31/300
Epoch 32/300
Epoch 33/300
Epoch 34/300
Epoch 35/300
Epoch 36/300
Epoch 37/300
Epoch 38/300
Epoch

Epoch 123/300
Epoch 124/300
Epoch 125/300
Epoch 126/300
Epoch 127/300
Epoch 128/300
Epoch 129/300
Epoch 130/300
Epoch 131/300
Epoch 132/300
Epoch 133/300
Epoch 134/300
Epoch 135/300
Epoch 136/300
Epoch 137/300
Epoch 138/300
Epoch 139/300
Epoch 140/300
Epoch 141/300
Epoch 142/300
Epoch 143/300
Epoch 144/300
Epoch 145/300
Epoch 146/300
Epoch 147/300
Epoch 148/300
Epoch 149/300
Epoch 150/300
Epoch 151/300
Epoch 152/300
Epoch 153/300
Epoch 154/300
Epoch 155/300
Epoch 156/300
Epoch 157/300
Epoch 158/300
Epoch 159/300
Epoch 160/300
Epoch 161/300
Epoch 162/300
Epoch 163/300
Epoch 164/300
Epoch 165/300
Epoch 166/300
Epoch 167/300
Epoch 168/300
Epoch 169/300
Epoch 170/300
Epoch 171/300
Epoch 172/300
Epoch 173/300
Epoch 174/300
Epoch 175/300
Epoch 176/300
Epoch 177/300
Epoch 178/300
Epoch 179/300
Epoch 180/300
Epoch 181/300
Epoch 182/300
Epoch 183/300
Epoch 184/300
Epoch 185/300
Epoch 186/300
Epoch 187/300
Epoch 188/300
Epoch 189/300
Epoch 190/300
Epoch 191/300
Epoch 192/300
Epoch 193/300
Epoch 

Epoch 249/300
Epoch 250/300
Epoch 251/300
Epoch 252/300
Epoch 253/300
Epoch 254/300
Epoch 255/300
Epoch 256/300
Epoch 257/300
Epoch 258/300
Epoch 259/300
Epoch 260/300
Epoch 261/300
Epoch 262/300
Epoch 263/300
Epoch 264/300
Epoch 265/300
Epoch 266/300
Epoch 267/300
Epoch 268/300
Epoch 269/300
Epoch 270/300
Epoch 271/300
Epoch 272/300
Epoch 273/300
Epoch 274/300
Epoch 275/300
Epoch 276/300
Epoch 277/300
Epoch 278/300
Epoch 279/300
Epoch 280/300
Epoch 281/300
Epoch 282/300
Epoch 283/300
Epoch 284/300
Epoch 285/300
Epoch 286/300
Epoch 287/300
Epoch 288/300
Epoch 289/300
Epoch 290/300
Epoch 291/300
Epoch 292/300
Epoch 293/300
Epoch 294/300
Epoch 295/300
Epoch 296/300
Epoch 297/300
Epoch 298/300
Epoch 299/300
Epoch 300/300


In [176]:
loss_values,val_loss_values = plot_loss(history, num_epochs)

<IPython.core.display.Javascript object>

In [177]:
np.min(val_loss_values)

0.18983906185626984

In [178]:
np.argmin(val_loss_values)

285

In [179]:
acc_values, val_acc_values = plot_accuracy(history, num_epochs)

<IPython.core.display.Javascript object>

In [180]:
np.max(val_acc_values)

0.93774999999999997

In [181]:
np.argmax(val_acc_values)

160

In [182]:
val_loss_values[160]

0.19050341660976411

## We are seeing a plateau of validation accuracy at epoch 160, and validation accuracy is also   as low as the best model.

## One last double-check to see if a deeper model with this capacity does better.

In [183]:
model = models.Sequential()
model.add(layers.Dense(64, activation='relu', input_shape=(10,)))
model.add(layers.Dense(64, activation='relu'))
model.add(layers.Dense(1, activation='sigmoid'))

num_epochs = 300

model.compile(optimizer='rmsprop',
              loss='binary_crossentropy',
              metrics=['acc'])

model.summary()

history = model.fit(X_train,
                    y_train,
                    epochs=num_epochs,
                    batch_size=1024,
                    validation_data=(X_val, y_val))

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_28 (Dense)             (None, 64)                704       
_________________________________________________________________
dense_29 (Dense)             (None, 64)                4160      
_________________________________________________________________
dense_30 (Dense)             (None, 1)                 65        
Total params: 4,929
Trainable params: 4,929
Non-trainable params: 0
_________________________________________________________________
Train on 80000 samples, validate on 20000 samples
Epoch 1/300
Epoch 2/300
Epoch 3/300
Epoch 4/300
Epoch 5/300
Epoch 6/300
Epoch 7/300
Epoch 8/300
Epoch 9/300
Epoch 10/300
Epoch 11/300
Epoch 12/300
Epoch 13/300
Epoch 14/300
Epoch 15/300
Epoch 16/300
Epoch 17/300
Epoch 18/300
Epoch 19/300
Epoch 20/300
Epoch 21/300
Epoch 22/300
Epoch 23/300
Epoch 24/300
Epoch 25/300
Epoch 26/300
Epoch 27/300
Epoch 28/300

Epoch 122/300
Epoch 123/300
Epoch 124/300
Epoch 125/300
Epoch 126/300
Epoch 127/300
Epoch 128/300
Epoch 129/300
Epoch 130/300
Epoch 131/300
Epoch 132/300
Epoch 133/300
Epoch 134/300
Epoch 135/300
Epoch 136/300
Epoch 137/300
Epoch 138/300
Epoch 139/300
Epoch 140/300
Epoch 141/300
Epoch 142/300
Epoch 143/300
Epoch 144/300
Epoch 145/300
Epoch 146/300
Epoch 147/300
Epoch 148/300
Epoch 149/300
Epoch 150/300
Epoch 151/300
Epoch 152/300
Epoch 153/300
Epoch 154/300
Epoch 155/300
Epoch 156/300
Epoch 157/300
Epoch 158/300
Epoch 159/300
Epoch 160/300
Epoch 161/300
Epoch 162/300
Epoch 163/300
Epoch 164/300
Epoch 165/300
Epoch 166/300
Epoch 167/300
Epoch 168/300
Epoch 169/300
Epoch 170/300
Epoch 171/300
Epoch 172/300
Epoch 173/300
Epoch 174/300
Epoch 175/300
Epoch 176/300
Epoch 177/300
Epoch 178/300
Epoch 179/300
Epoch 180/300
Epoch 181/300
Epoch 182/300
Epoch 183/300
Epoch 184/300
Epoch 185/300
Epoch 186/300
Epoch 187/300
Epoch 188/300
Epoch 189/300
Epoch 190/300
Epoch 191/300
Epoch 192/300
Epoch 

Epoch 248/300
Epoch 249/300
Epoch 250/300
Epoch 251/300
Epoch 252/300
Epoch 253/300
Epoch 254/300
Epoch 255/300
Epoch 256/300
Epoch 257/300
Epoch 258/300
Epoch 259/300
Epoch 260/300
Epoch 261/300
Epoch 262/300
Epoch 263/300
Epoch 264/300
Epoch 265/300
Epoch 266/300
Epoch 267/300
Epoch 268/300
Epoch 269/300
Epoch 270/300
Epoch 271/300
Epoch 272/300
Epoch 273/300
Epoch 274/300
Epoch 275/300
Epoch 276/300
Epoch 277/300
Epoch 278/300
Epoch 279/300
Epoch 280/300
Epoch 281/300
Epoch 282/300
Epoch 283/300
Epoch 284/300
Epoch 285/300
Epoch 286/300
Epoch 287/300
Epoch 288/300
Epoch 289/300
Epoch 290/300
Epoch 291/300
Epoch 292/300
Epoch 293/300
Epoch 294/300
Epoch 295/300
Epoch 296/300
Epoch 297/300
Epoch 298/300
Epoch 299/300
Epoch 300/300


In [184]:
loss_values,val_loss_values = plot_loss(history, num_epochs)

<IPython.core.display.Javascript object>

In [185]:
np.min(val_loss_values)

0.19042928180694579

In [186]:
np.argmin(val_loss_values)

176

In [187]:
acc_values, val_acc_values = plot_accuracy(history, num_epochs)

<IPython.core.display.Javascript object>

In [188]:
np.max(val_acc_values)

0.9373500000953674

In [189]:
np.argmax(val_acc_values)

62

In [190]:
val_loss_values[160]

0.19407168040275574

## So the best model has 1 hidden layer and a capacity of 64, and does best at 160 epochs

## However, we notice that the deeper model (more hidden layers) has a better overall accuracy and a larger gap between training accuracy and validation accuracy.  

## That means this model is overfitting, so let's keep this model and see if we can reduce the overfitting using various forms of regularization (below).
  
## Now, let's try some forms of regularization on the shallow and deeper models to see if we can increase the validation accuracy

## Regularization on the "shallow" model (1 hidden layer)

### First, let's try L2 regularization on the shallow model - adding to the loss function of the network a cost proportional to the square of the value of the weight coefficients (the L2 norm of the weights)

In [191]:
from keras import regularizers

model = models.Sequential()
model.add(layers.Dense(64, kernel_regularizer=regularizers.l2(0.001), activation='relu', input_shape=(10,)))

model.add(layers.Dense(1, activation='sigmoid'))

num_epochs = 300

model.compile(optimizer='rmsprop',
              loss='binary_crossentropy',
              metrics=['acc'])

model.summary()

history = model.fit(X_train,
                    y_train,
                    epochs=num_epochs,
                    batch_size=1024,
                    validation_data=(X_val, y_val))



_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_31 (Dense)             (None, 64)                704       
_________________________________________________________________
dense_32 (Dense)             (None, 1)                 65        
Total params: 769
Trainable params: 769
Non-trainable params: 0
_________________________________________________________________
Train on 80000 samples, validate on 20000 samples
Epoch 1/300
Epoch 2/300
Epoch 3/300
Epoch 4/300
Epoch 5/300
Epoch 6/300
Epoch 7/300
Epoch 8/300
Epoch 9/300
Epoch 10/300
Epoch 11/300
Epoch 12/300
Epoch 13/300
Epoch 14/300
Epoch 15/300
Epoch 16/300
Epoch 17/300
Epoch 18/300
Epoch 19/300
Epoch 20/300
Epoch 21/300
Epoch 22/300
Epoch 23/300
Epoch 24/300
Epoch 25/300
Epoch 26/300
Epoch 27/300
Epoch 28/300
Epoch 29/300
Epoch 30/300
Epoch 31/300
Epoch 32/300
Epoch 33/300
Epoch 34/300
Epoch 35/300
Epoch 36/300
Epoch 37/300
Epoch 38/300
Epoch

Epoch 123/300
Epoch 124/300
Epoch 125/300
Epoch 126/300
Epoch 127/300
Epoch 128/300
Epoch 129/300
Epoch 130/300
Epoch 131/300
Epoch 132/300
Epoch 133/300
Epoch 134/300
Epoch 135/300
Epoch 136/300
Epoch 137/300
Epoch 138/300
Epoch 139/300
Epoch 140/300
Epoch 141/300
Epoch 142/300
Epoch 143/300
Epoch 144/300
Epoch 145/300
Epoch 146/300
Epoch 147/300
Epoch 148/300
Epoch 149/300
Epoch 150/300
Epoch 151/300
Epoch 152/300
Epoch 153/300
Epoch 154/300
Epoch 155/300
Epoch 156/300
Epoch 157/300
Epoch 158/300
Epoch 159/300
Epoch 160/300
Epoch 161/300
Epoch 162/300
Epoch 163/300
Epoch 164/300
Epoch 165/300
Epoch 166/300
Epoch 167/300
Epoch 168/300
Epoch 169/300
Epoch 170/300
Epoch 171/300
Epoch 172/300
Epoch 173/300
Epoch 174/300
Epoch 175/300
Epoch 176/300
Epoch 177/300
Epoch 178/300
Epoch 179/300
Epoch 180/300
Epoch 181/300
Epoch 182/300
Epoch 183/300
Epoch 184/300
Epoch 185/300
Epoch 186/300
Epoch 187/300
Epoch 188/300
Epoch 189/300
Epoch 190/300
Epoch 191/300
Epoch 192/300
Epoch 193/300
Epoch 

Epoch 249/300
Epoch 250/300
Epoch 251/300
Epoch 252/300
Epoch 253/300
Epoch 254/300
Epoch 255/300
Epoch 256/300
Epoch 257/300
Epoch 258/300
Epoch 259/300
Epoch 260/300
Epoch 261/300
Epoch 262/300
Epoch 263/300
Epoch 264/300
Epoch 265/300
Epoch 266/300
Epoch 267/300
Epoch 268/300
Epoch 269/300
Epoch 270/300
Epoch 271/300
Epoch 272/300
Epoch 273/300
Epoch 274/300
Epoch 275/300
Epoch 276/300
Epoch 277/300
Epoch 278/300
Epoch 279/300
Epoch 280/300
Epoch 281/300
Epoch 282/300
Epoch 283/300
Epoch 284/300
Epoch 285/300
Epoch 286/300
Epoch 287/300
Epoch 288/300
Epoch 289/300
Epoch 290/300
Epoch 291/300
Epoch 292/300
Epoch 293/300
Epoch 294/300
Epoch 295/300
Epoch 296/300
Epoch 297/300
Epoch 298/300
Epoch 299/300
Epoch 300/300


In [192]:
loss_values,val_loss_values = plot_loss(history, num_epochs)

<IPython.core.display.Javascript object>

In [193]:
np.min(val_loss_values)

0.1930005410194397

In [194]:
np.argmin(val_loss_values)

261

In [195]:
acc_values, val_acc_values = plot_accuracy(history, num_epochs)

<IPython.core.display.Javascript object>

In [196]:
np.max(val_acc_values)

0.93709999933242794

In [197]:
np.argmax(val_acc_values)

131

### Even with a small cost, our validation scores (loss and accuracy) have gotten worse, so L2 regularization is not useful

### Let's try L1 regularization - adding to the loss function of the network a cost proportional to the square of the value of the weight coefficients (the L2 norm of the weights)

In [198]:
from keras import regularizers

model = models.Sequential()
model.add(layers.Dense(64, kernel_regularizer=regularizers.l1(0.001), activation='relu', input_shape=(10,)))

model.add(layers.Dense(1, activation='sigmoid'))

num_epochs = 300

model.compile(optimizer='rmsprop',
              loss='binary_crossentropy',
              metrics=['acc'])

model.summary()

history = model.fit(X_train,
                    y_train,
                    epochs=num_epochs,
                    batch_size=1024,
                    validation_data=(X_val, y_val))



_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_33 (Dense)             (None, 64)                704       
_________________________________________________________________
dense_34 (Dense)             (None, 1)                 65        
Total params: 769
Trainable params: 769
Non-trainable params: 0
_________________________________________________________________
Train on 80000 samples, validate on 20000 samples
Epoch 1/300
Epoch 2/300
Epoch 3/300
Epoch 4/300
Epoch 5/300
Epoch 6/300
Epoch 7/300
Epoch 8/300
Epoch 9/300
Epoch 10/300
Epoch 11/300
Epoch 12/300
Epoch 13/300
Epoch 14/300
Epoch 15/300
Epoch 16/300
Epoch 17/300
Epoch 18/300
Epoch 19/300
Epoch 20/300
Epoch 21/300
Epoch 22/300
Epoch 23/300
Epoch 24/300
Epoch 25/300
Epoch 26/300
Epoch 27/300
Epoch 28/300
Epoch 29/300
Epoch 30/300
Epoch 31/300
Epoch 32/300
Epoch 33/300
Epoch 34/300
Epoch 35/300
Epoch 36/300
Epoch 37/300
Epoch 38/300
Epoch

Epoch 123/300
Epoch 124/300
Epoch 125/300
Epoch 126/300
Epoch 127/300
Epoch 128/300
Epoch 129/300
Epoch 130/300
Epoch 131/300
Epoch 132/300
Epoch 133/300
Epoch 134/300
Epoch 135/300
Epoch 136/300
Epoch 137/300
Epoch 138/300
Epoch 139/300
Epoch 140/300
Epoch 141/300
Epoch 142/300
Epoch 143/300
Epoch 144/300
Epoch 145/300
Epoch 146/300
Epoch 147/300
Epoch 148/300
Epoch 149/300
Epoch 150/300
Epoch 151/300
Epoch 152/300
Epoch 153/300
Epoch 154/300
Epoch 155/300
Epoch 156/300
Epoch 157/300
Epoch 158/300
Epoch 159/300
Epoch 160/300
Epoch 161/300
Epoch 162/300
Epoch 163/300
Epoch 164/300
Epoch 165/300
Epoch 166/300
Epoch 167/300
Epoch 168/300
Epoch 169/300
Epoch 170/300
Epoch 171/300
Epoch 172/300
Epoch 173/300
Epoch 174/300
Epoch 175/300
Epoch 176/300
Epoch 177/300
Epoch 178/300
Epoch 179/300
Epoch 180/300
Epoch 181/300
Epoch 182/300
Epoch 183/300
Epoch 184/300
Epoch 185/300
Epoch 186/300
Epoch 187/300
Epoch 188/300
Epoch 189/300
Epoch 190/300
Epoch 191/300
Epoch 192/300
Epoch 193/300
Epoch 

Epoch 249/300
Epoch 250/300
Epoch 251/300
Epoch 252/300
Epoch 253/300
Epoch 254/300
Epoch 255/300
Epoch 256/300
Epoch 257/300
Epoch 258/300
Epoch 259/300
Epoch 260/300
Epoch 261/300
Epoch 262/300
Epoch 263/300
Epoch 264/300
Epoch 265/300
Epoch 266/300
Epoch 267/300
Epoch 268/300
Epoch 269/300
Epoch 270/300
Epoch 271/300
Epoch 272/300
Epoch 273/300
Epoch 274/300
Epoch 275/300
Epoch 276/300
Epoch 277/300
Epoch 278/300
Epoch 279/300
Epoch 280/300
Epoch 281/300
Epoch 282/300
Epoch 283/300
Epoch 284/300
Epoch 285/300
Epoch 286/300
Epoch 287/300
Epoch 288/300
Epoch 289/300
Epoch 290/300
Epoch 291/300
Epoch 292/300
Epoch 293/300
Epoch 294/300
Epoch 295/300
Epoch 296/300
Epoch 297/300
Epoch 298/300
Epoch 299/300
Epoch 300/300


In [199]:
loss_values,val_loss_values = plot_loss(history, num_epochs)

<IPython.core.display.Javascript object>

In [200]:
np.min(val_loss_values)

0.19757916455268859

In [201]:
np.argmin(val_loss_values)

290

In [202]:
acc_values, val_acc_values = plot_accuracy(history, num_epochs)

<IPython.core.display.Javascript object>

In [203]:
np.max(val_acc_values)

0.93690000066757206

In [204]:
np.argmax(val_acc_values)

18

### Even with a small cost, our validation scores (loss and accuracy) have gotten worse, so L1 regularization is not useful either

### Let's try dropout (randomly turning off output features during training) 

### First we dropout a small percentage to see if there is any improvement

In [205]:
dropout_rate = 0.1

model = models.Sequential()
model.add(layers.Dense(64, activation='relu', input_shape=(10,)))
model.add(layers.Dropout(dropout_rate))
model.add(layers.Dense(1, activation='sigmoid'))


num_epochs = 300

model.compile(optimizer='rmsprop',
              loss='binary_crossentropy',
              metrics=['acc'])

model.summary()

history = model.fit(X_train,
                    y_train,
                    epochs=num_epochs,
                    batch_size=1024,
                    validation_data=(X_val, y_val))



_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_35 (Dense)             (None, 64)                704       
_________________________________________________________________
dropout_1 (Dropout)          (None, 64)                0         
_________________________________________________________________
dense_36 (Dense)             (None, 1)                 65        
Total params: 769
Trainable params: 769
Non-trainable params: 0
_________________________________________________________________
Train on 80000 samples, validate on 20000 samples
Epoch 1/300
Epoch 2/300
Epoch 3/300
Epoch 4/300
Epoch 5/300
Epoch 6/300
Epoch 7/300
Epoch 8/300
Epoch 9/300
Epoch 10/300
Epoch 11/300
Epoch 12/300
Epoch 13/300
Epoch 14/300
Epoch 15/300
Epoch 16/300
Epoch 17/300
Epoch 18/300
Epoch 19/300
Epoch 20/300
Epoch 21/300
Epoch 22/300
Epoch 23/300
Epoch 24/300
Epoch 25/300
Epoch 26/300
Epoch 27/300
Epoch 28/300
Epo

Epoch 122/300
Epoch 123/300
Epoch 124/300
Epoch 125/300
Epoch 126/300
Epoch 127/300
Epoch 128/300
Epoch 129/300
Epoch 130/300
Epoch 131/300
Epoch 132/300
Epoch 133/300
Epoch 134/300
Epoch 135/300
Epoch 136/300
Epoch 137/300
Epoch 138/300
Epoch 139/300
Epoch 140/300
Epoch 141/300
Epoch 142/300
Epoch 143/300
Epoch 144/300
Epoch 145/300
Epoch 146/300
Epoch 147/300
Epoch 148/300
Epoch 149/300
Epoch 150/300
Epoch 151/300
Epoch 152/300
Epoch 153/300
Epoch 154/300
Epoch 155/300
Epoch 156/300
Epoch 157/300
Epoch 158/300
Epoch 159/300
Epoch 160/300
Epoch 161/300
Epoch 162/300
Epoch 163/300
Epoch 164/300
Epoch 165/300
Epoch 166/300
Epoch 167/300
Epoch 168/300
Epoch 169/300
Epoch 170/300
Epoch 171/300
Epoch 172/300
Epoch 173/300
Epoch 174/300
Epoch 175/300
Epoch 176/300
Epoch 177/300
Epoch 178/300
Epoch 179/300
Epoch 180/300
Epoch 181/300
Epoch 182/300
Epoch 183/300
Epoch 184/300
Epoch 185/300
Epoch 186/300
Epoch 187/300
Epoch 188/300
Epoch 189/300
Epoch 190/300
Epoch 191/300
Epoch 192/300
Epoch 

Epoch 248/300
Epoch 249/300
Epoch 250/300
Epoch 251/300
Epoch 252/300
Epoch 253/300
Epoch 254/300
Epoch 255/300
Epoch 256/300
Epoch 257/300
Epoch 258/300
Epoch 259/300
Epoch 260/300
Epoch 261/300
Epoch 262/300
Epoch 263/300
Epoch 264/300
Epoch 265/300
Epoch 266/300
Epoch 267/300
Epoch 268/300
Epoch 269/300
Epoch 270/300
Epoch 271/300
Epoch 272/300
Epoch 273/300
Epoch 274/300
Epoch 275/300
Epoch 276/300
Epoch 277/300
Epoch 278/300
Epoch 279/300
Epoch 280/300
Epoch 281/300
Epoch 282/300
Epoch 283/300
Epoch 284/300
Epoch 285/300
Epoch 286/300
Epoch 287/300
Epoch 288/300
Epoch 289/300
Epoch 290/300
Epoch 291/300
Epoch 292/300
Epoch 293/300
Epoch 294/300
Epoch 295/300
Epoch 296/300
Epoch 297/300
Epoch 298/300
Epoch 299/300
Epoch 300/300


In [206]:
loss_values,val_loss_values = plot_loss(history, num_epochs)

<IPython.core.display.Javascript object>

In [207]:
np.min(val_loss_values)

0.18945955915451049

In [208]:
np.argmin(val_loss_values)

295

In [209]:
acc_values, val_acc_values = plot_accuracy(history, num_epochs)

<IPython.core.display.Javascript object>

In [210]:
np.max(val_acc_values)

0.93729999933242802

In [211]:
np.argmax(val_acc_values)

298

## Regularization on the 'deeper' model (1 hidden layer)

### L2 regularization

In [212]:
model = models.Sequential()
model.add(layers.Dense(64,  kernel_regularizer=regularizers.l2(0.001), activation='relu', input_shape=(10,)))
model.add(layers.Dense(64,  kernel_regularizer=regularizers.l2(0.001), activation='relu'))
model.add(layers.Dense(1, activation='sigmoid'))

num_epochs = 300

model.compile(optimizer='rmsprop',
              loss='binary_crossentropy',
              metrics=['acc'])

model.summary()

history = model.fit(X_train,
                    y_train,
                    epochs=num_epochs,
                    batch_size=1024,
                    validation_data=(X_val, y_val))

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_37 (Dense)             (None, 64)                704       
_________________________________________________________________
dense_38 (Dense)             (None, 64)                4160      
_________________________________________________________________
dense_39 (Dense)             (None, 1)                 65        
Total params: 4,929
Trainable params: 4,929
Non-trainable params: 0
_________________________________________________________________
Train on 80000 samples, validate on 20000 samples
Epoch 1/300
Epoch 2/300
Epoch 3/300
Epoch 4/300
Epoch 5/300
Epoch 6/300
Epoch 7/300
Epoch 8/300
Epoch 9/300
Epoch 10/300
Epoch 11/300
Epoch 12/300
Epoch 13/300
Epoch 14/300
Epoch 15/300
Epoch 16/300
Epoch 17/300
Epoch 18/300
Epoch 19/300
Epoch 20/300
Epoch 21/300
Epoch 22/300
Epoch 23/300
Epoch 24/300
Epoch 25/300
Epoch 26/300
Epoch 27/300
Epoch 28/300

Epoch 122/300
Epoch 123/300
Epoch 124/300
Epoch 125/300
Epoch 126/300
Epoch 127/300
Epoch 128/300
Epoch 129/300
Epoch 130/300
Epoch 131/300
Epoch 132/300
Epoch 133/300
Epoch 134/300
Epoch 135/300
Epoch 136/300
Epoch 137/300
Epoch 138/300
Epoch 139/300
Epoch 140/300
Epoch 141/300
Epoch 142/300
Epoch 143/300
Epoch 144/300
Epoch 145/300
Epoch 146/300
Epoch 147/300
Epoch 148/300
Epoch 149/300
Epoch 150/300
Epoch 151/300
Epoch 152/300
Epoch 153/300
Epoch 154/300
Epoch 155/300
Epoch 156/300
Epoch 157/300
Epoch 158/300
Epoch 159/300
Epoch 160/300
Epoch 161/300
Epoch 162/300
Epoch 163/300
Epoch 164/300
Epoch 165/300
Epoch 166/300
Epoch 167/300
Epoch 168/300
Epoch 169/300
Epoch 170/300
Epoch 171/300
Epoch 172/300
Epoch 173/300
Epoch 174/300
Epoch 175/300
Epoch 176/300
Epoch 177/300
Epoch 178/300
Epoch 179/300
Epoch 180/300
Epoch 181/300
Epoch 182/300
Epoch 183/300
Epoch 184/300
Epoch 185/300
Epoch 186/300
Epoch 187/300
Epoch 188/300
Epoch 189/300
Epoch 190/300
Epoch 191/300
Epoch 192/300
Epoch 

Epoch 247/300
Epoch 248/300
Epoch 249/300
Epoch 250/300
Epoch 251/300
Epoch 252/300
Epoch 253/300
Epoch 254/300
Epoch 255/300
Epoch 256/300
Epoch 257/300
Epoch 258/300
Epoch 259/300
Epoch 260/300
Epoch 261/300
Epoch 262/300
Epoch 263/300
Epoch 264/300
Epoch 265/300
Epoch 266/300
Epoch 267/300
Epoch 268/300
Epoch 269/300
Epoch 270/300
Epoch 271/300
Epoch 272/300
Epoch 273/300
Epoch 274/300
Epoch 275/300
Epoch 276/300
Epoch 277/300
Epoch 278/300
Epoch 279/300
Epoch 280/300
Epoch 281/300
Epoch 282/300
Epoch 283/300
Epoch 284/300
Epoch 285/300
Epoch 286/300
Epoch 287/300
Epoch 288/300
Epoch 289/300
Epoch 290/300
Epoch 291/300
Epoch 292/300
Epoch 293/300
Epoch 294/300
Epoch 295/300
Epoch 296/300
Epoch 297/300
Epoch 298/300
Epoch 299/300
Epoch 300/300


In [218]:
loss_values,val_loss_values = plot_loss(history, num_epochs)



<IPython.core.display.Javascript object>

In [219]:
np.min(val_loss_values)

0.19412237677574157

In [220]:
np.argmin(val_loss_values)

298

In [221]:
acc_values, val_acc_values = plot_accuracy(history, num_epochs)



<IPython.core.display.Javascript object>

In [222]:
np.max(val_acc_values)

0.93700000009536744

In [223]:
np.argmax(val_acc_values)

155

### Even a small L2 regularization has decreased our validation accuracy

### Let's try L1 regularization

In [224]:
model = models.Sequential()
model.add(layers.Dense(64,  kernel_regularizer=regularizers.l1(0.001), activation='relu', input_shape=(10,)))
model.add(layers.Dense(64,  kernel_regularizer=regularizers.l1(0.001), activation='relu'))
model.add(layers.Dense(1, activation='sigmoid'))

num_epochs = 300

model.compile(optimizer='rmsprop',
              loss='binary_crossentropy',
              metrics=['acc'])

model.summary()

history = model.fit(X_train,
                    y_train,
                    epochs=num_epochs,
                    batch_size=1024,
                    validation_data=(X_val, y_val))

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_40 (Dense)             (None, 64)                704       
_________________________________________________________________
dense_41 (Dense)             (None, 64)                4160      
_________________________________________________________________
dense_42 (Dense)             (None, 1)                 65        
Total params: 4,929
Trainable params: 4,929
Non-trainable params: 0
_________________________________________________________________
Train on 80000 samples, validate on 20000 samples
Epoch 1/300
Epoch 2/300
Epoch 3/300
Epoch 4/300
Epoch 5/300
Epoch 6/300
Epoch 7/300
Epoch 8/300
Epoch 9/300
Epoch 10/300
Epoch 11/300
Epoch 12/300
Epoch 13/300
Epoch 14/300
Epoch 15/300
Epoch 16/300
Epoch 17/300
Epoch 18/300
Epoch 19/300
Epoch 20/300
Epoch 21/300
Epoch 22/300
Epoch 23/300
Epoch 24/300
Epoch 25/300
Epoch 26/300
Epoch 27/300
Epoch 28/300

Epoch 122/300
Epoch 123/300
Epoch 124/300
Epoch 125/300
Epoch 126/300
Epoch 127/300
Epoch 128/300
Epoch 129/300
Epoch 130/300
Epoch 131/300
Epoch 132/300
Epoch 133/300
Epoch 134/300
Epoch 135/300
Epoch 136/300
Epoch 137/300
Epoch 138/300
Epoch 139/300
Epoch 140/300
Epoch 141/300
Epoch 142/300
Epoch 143/300
Epoch 144/300
Epoch 145/300
Epoch 146/300
Epoch 147/300
Epoch 148/300
Epoch 149/300
Epoch 150/300
Epoch 151/300
Epoch 152/300
Epoch 153/300
Epoch 154/300
Epoch 155/300
Epoch 156/300
Epoch 157/300
Epoch 158/300
Epoch 159/300
Epoch 160/300
Epoch 161/300
Epoch 162/300
Epoch 163/300
Epoch 164/300
Epoch 165/300
Epoch 166/300
Epoch 167/300
Epoch 168/300
Epoch 169/300
Epoch 170/300
Epoch 171/300
Epoch 172/300
Epoch 173/300
Epoch 174/300
Epoch 175/300
Epoch 176/300
Epoch 177/300
Epoch 178/300
Epoch 179/300
Epoch 180/300
Epoch 181/300
Epoch 182/300
Epoch 183/300
Epoch 184/300
Epoch 185/300
Epoch 186/300
Epoch 187/300
Epoch 188/300
Epoch 189/300
Epoch 190/300
Epoch 191/300
Epoch 192/300
Epoch 

Epoch 248/300
Epoch 249/300
Epoch 250/300
Epoch 251/300
Epoch 252/300
Epoch 253/300
Epoch 254/300
Epoch 255/300
Epoch 256/300
Epoch 257/300
Epoch 258/300
Epoch 259/300
Epoch 260/300
Epoch 261/300
Epoch 262/300
Epoch 263/300
Epoch 264/300
Epoch 265/300
Epoch 266/300
Epoch 267/300
Epoch 268/300
Epoch 269/300
Epoch 270/300
Epoch 271/300
Epoch 272/300
Epoch 273/300
Epoch 274/300
Epoch 275/300
Epoch 276/300
Epoch 277/300
Epoch 278/300
Epoch 279/300
Epoch 280/300
Epoch 281/300
Epoch 282/300
Epoch 283/300
Epoch 284/300
Epoch 285/300
Epoch 286/300
Epoch 287/300
Epoch 288/300
Epoch 289/300
Epoch 290/300
Epoch 291/300
Epoch 292/300
Epoch 293/300
Epoch 294/300
Epoch 295/300
Epoch 296/300
Epoch 297/300
Epoch 298/300
Epoch 299/300
Epoch 300/300


In [225]:
loss_values,val_loss_values = plot_loss(history, num_epochs)



<IPython.core.display.Javascript object>

In [226]:
np.min(val_loss_values)

0.20053317489624023

In [227]:
np.argmin(val_loss_values)

293

In [228]:
acc_values, val_acc_values = plot_accuracy(history, num_epochs)



<IPython.core.display.Javascript object>

In [229]:
np.max(val_acc_values)

0.9366499999046326

In [230]:
np.argmax(val_acc_values)

22

### Even a small L1 regularization has decreased our validation accuracy

### Finally, let's try dropout on this deeper model

In [231]:

dropout_rate = 0.1

model = models.Sequential()
model.add(layers.Dense(64, activation='relu', input_shape=(10,)))
model.add(layers.Dropout(dropout_rate))
model.add(layers.Dense(64, activation='relu'))
model.add(layers.Dropout(dropout_rate))
model.add(layers.Dense(1, activation='sigmoid'))


num_epochs = 300

model.compile(optimizer='rmsprop',
              loss='binary_crossentropy',
              metrics=['acc'])

model.summary()

history = model.fit(X_train,
                    y_train,
                    epochs=num_epochs,
                    batch_size=1024,
                    validation_data=(X_val, y_val))

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_43 (Dense)             (None, 64)                704       
_________________________________________________________________
dropout_2 (Dropout)          (None, 64)                0         
_________________________________________________________________
dense_44 (Dense)             (None, 64)                4160      
_________________________________________________________________
dropout_3 (Dropout)          (None, 64)                0         
_________________________________________________________________
dense_45 (Dense)             (None, 1)                 65        
Total params: 4,929
Trainable params: 4,929
Non-trainable params: 0
_________________________________________________________________
Train on 80000 samples, validate on 20000 samples
Epoch 1/300
Epoch 2/300
Epoch 3/300
Epoch 4/300
Epoch 5/300
Epoch 6/300
Epoch 7/300
Epoch 

Epoch 120/300
Epoch 121/300
Epoch 122/300
Epoch 123/300
Epoch 124/300
Epoch 125/300
Epoch 126/300
Epoch 127/300
Epoch 128/300
Epoch 129/300
Epoch 130/300
Epoch 131/300
Epoch 132/300
Epoch 133/300
Epoch 134/300
Epoch 135/300
Epoch 136/300
Epoch 137/300
Epoch 138/300
Epoch 139/300
Epoch 140/300
Epoch 141/300
Epoch 142/300
Epoch 143/300
Epoch 144/300
Epoch 145/300
Epoch 146/300
Epoch 147/300
Epoch 148/300
Epoch 149/300
Epoch 150/300
Epoch 151/300
Epoch 152/300
Epoch 153/300
Epoch 154/300
Epoch 155/300
Epoch 156/300
Epoch 157/300
Epoch 158/300
Epoch 159/300
Epoch 160/300
Epoch 161/300
Epoch 162/300
Epoch 163/300
Epoch 164/300
Epoch 165/300
Epoch 166/300
Epoch 167/300
Epoch 168/300
Epoch 169/300
Epoch 170/300
Epoch 171/300
Epoch 172/300
Epoch 173/300
Epoch 174/300
Epoch 175/300
Epoch 176/300
Epoch 177/300
Epoch 178/300
Epoch 179/300
Epoch 180/300
Epoch 181/300
Epoch 182/300
Epoch 183/300
Epoch 184/300
Epoch 185/300
Epoch 186/300
Epoch 187/300
Epoch 188/300
Epoch 189/300
Epoch 190/300
Epoch 

Epoch 246/300
Epoch 247/300
Epoch 248/300
Epoch 249/300
Epoch 250/300
Epoch 251/300
Epoch 252/300
Epoch 253/300
Epoch 254/300
Epoch 255/300
Epoch 256/300
Epoch 257/300
Epoch 258/300
Epoch 259/300
Epoch 260/300
Epoch 261/300
Epoch 262/300
Epoch 263/300
Epoch 264/300
Epoch 265/300
Epoch 266/300
Epoch 267/300
Epoch 268/300
Epoch 269/300
Epoch 270/300
Epoch 271/300
Epoch 272/300
Epoch 273/300
Epoch 274/300
Epoch 275/300
Epoch 276/300
Epoch 277/300
Epoch 278/300
Epoch 279/300
Epoch 280/300
Epoch 281/300
Epoch 282/300
Epoch 283/300
Epoch 284/300
Epoch 285/300
Epoch 286/300
Epoch 287/300
Epoch 288/300
Epoch 289/300
Epoch 290/300
Epoch 291/300
Epoch 292/300
Epoch 293/300
Epoch 294/300
Epoch 295/300
Epoch 296/300
Epoch 297/300
Epoch 298/300
Epoch 299/300
Epoch 300/300


In [232]:
loss_values,val_loss_values = plot_loss(history, num_epochs)



<IPython.core.display.Javascript object>

In [233]:
np.min(val_loss_values)

0.18476238024234773

In [234]:
np.argmin(val_loss_values)

293

In [235]:
acc_values, val_acc_values = plot_accuracy(history, num_epochs)



<IPython.core.display.Javascript object>

In [236]:
np.max(val_acc_values)

0.93725000009536741

In [237]:
np.argmax(val_acc_values)

52

### The model does not do better than the deep model above, but not significantly worse either

### Finally, let's try increasing the dropout rate to .5 on this deeper model

In [238]:
dropout_rate = 0.5

model = models.Sequential()
model.add(layers.Dense(64, activation='relu', input_shape=(10,)))
model.add(layers.Dropout(dropout_rate))
model.add(layers.Dense(64, activation='relu'))
model.add(layers.Dropout(dropout_rate))
model.add(layers.Dense(1, activation='sigmoid'))


num_epochs = 300

model.compile(optimizer='rmsprop',
              loss='binary_crossentropy',
              metrics=['acc'])

model.summary()

history = model.fit(X_train,
                    y_train,
                    epochs=num_epochs,
                    batch_size=1024,
                    validation_data=(X_val, y_val))

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_46 (Dense)             (None, 64)                704       
_________________________________________________________________
dropout_4 (Dropout)          (None, 64)                0         
_________________________________________________________________
dense_47 (Dense)             (None, 64)                4160      
_________________________________________________________________
dropout_5 (Dropout)          (None, 64)                0         
_________________________________________________________________
dense_48 (Dense)             (None, 1)                 65        
Total params: 4,929
Trainable params: 4,929
Non-trainable params: 0
_________________________________________________________________
Train on 80000 samples, validate on 20000 samples
Epoch 1/300
Epoch 2/300
Epoch 3/300
Epoch 4/300
Epoch 5/300
Epoch 6/300
Epoch 7/300
Epoch 

Epoch 120/300
Epoch 121/300
Epoch 122/300
Epoch 123/300
Epoch 124/300
Epoch 125/300
Epoch 126/300
Epoch 127/300
Epoch 128/300
Epoch 129/300
Epoch 130/300
Epoch 131/300
Epoch 132/300
Epoch 133/300
Epoch 134/300
Epoch 135/300
Epoch 136/300
Epoch 137/300
Epoch 138/300
Epoch 139/300
Epoch 140/300
Epoch 141/300
Epoch 142/300
Epoch 143/300
Epoch 144/300
Epoch 145/300
Epoch 146/300
Epoch 147/300
Epoch 148/300
Epoch 149/300
Epoch 150/300
Epoch 151/300
Epoch 152/300
Epoch 153/300
Epoch 154/300
Epoch 155/300
Epoch 156/300
Epoch 157/300
Epoch 158/300
Epoch 159/300
Epoch 160/300
Epoch 161/300
Epoch 162/300
Epoch 163/300
Epoch 164/300
Epoch 165/300
Epoch 166/300
Epoch 167/300
Epoch 168/300
Epoch 169/300
Epoch 170/300
Epoch 171/300
Epoch 172/300
Epoch 173/300
Epoch 174/300
Epoch 175/300
Epoch 176/300
Epoch 177/300
Epoch 178/300
Epoch 179/300
Epoch 180/300
Epoch 181/300
Epoch 182/300
Epoch 183/300
Epoch 184/300
Epoch 185/300
Epoch 186/300
Epoch 187/300
Epoch 188/300
Epoch 189/300
Epoch 190/300
Epoch 

Epoch 245/300
Epoch 246/300
Epoch 247/300
Epoch 248/300
Epoch 249/300
Epoch 250/300
Epoch 251/300
Epoch 252/300
Epoch 253/300
Epoch 254/300
Epoch 255/300
Epoch 256/300
Epoch 257/300
Epoch 258/300
Epoch 259/300
Epoch 260/300
Epoch 261/300
Epoch 262/300
Epoch 263/300
Epoch 264/300
Epoch 265/300
Epoch 266/300
Epoch 267/300
Epoch 268/300
Epoch 269/300
Epoch 270/300
Epoch 271/300
Epoch 272/300
Epoch 273/300
Epoch 274/300
Epoch 275/300
Epoch 276/300
Epoch 277/300
Epoch 278/300
Epoch 279/300
Epoch 280/300
Epoch 281/300
Epoch 282/300
Epoch 283/300
Epoch 284/300
Epoch 285/300
Epoch 286/300
Epoch 287/300
Epoch 288/300
Epoch 289/300
Epoch 290/300
Epoch 291/300
Epoch 292/300
Epoch 293/300
Epoch 294/300
Epoch 295/300
Epoch 296/300
Epoch 297/300
Epoch 298/300
Epoch 299/300
Epoch 300/300


In [239]:
loss_values,val_loss_values = plot_loss(history, num_epochs)



<IPython.core.display.Javascript object>

In [240]:
np.min(val_loss_values)

0.18628931789398193

In [241]:
np.argmin(val_loss_values)

293

In [242]:
acc_values, val_acc_values = plot_accuracy(history, num_epochs)



<IPython.core.display.Javascript object>

In [243]:
np.max(val_acc_values)

0.93644999980926513

In [244]:
np.argmax(val_acc_values)

128

## So, after checking the deeper and shallow models with regularization, the best model has 1 hidden layer and a capacity of 64, and does best at 160 epochs

## FInally, let's train this model on the complete training set, then use it to predict the test output

In [245]:
model = models.Sequential()
model.add(layers.Dense(64, activation='relu', input_shape=(10,)))
model.add(layers.Dense(1, activation='sigmoid'))

num_epochs = 160

model.compile(optimizer='rmsprop',
              loss='binary_crossentropy',
              metrics=['acc'])

model.summary()

history = model.fit(X,
                    y,
                    epochs=num_epochs,
                    batch_size=1024)

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_49 (Dense)             (None, 64)                704       
_________________________________________________________________
dense_50 (Dense)             (None, 1)                 65        
Total params: 769
Trainable params: 769
Non-trainable params: 0
_________________________________________________________________
Epoch 1/160
Epoch 2/160
Epoch 3/160
Epoch 4/160
Epoch 5/160
Epoch 6/160
Epoch 7/160
Epoch 8/160
Epoch 9/160
Epoch 10/160
Epoch 11/160
Epoch 12/160
Epoch 13/160
Epoch 14/160
Epoch 15/160
Epoch 16/160
Epoch 17/160
Epoch 18/160
Epoch 19/160
Epoch 20/160
Epoch 21/160
Epoch 22/160
Epoch 23/160
Epoch 24/160
Epoch 25/160
Epoch 26/160
Epoch 27/160
Epoch 28/160
Epoch 29/160
Epoch 30/160
Epoch 31/160
Epoch 32/160
Epoch 33/160
Epoch 34/160
Epoch 35/160
Epoch 36/160
Epoch 37/160
Epoch 38/160
Epoch 39/160
Epoch 40/160
Epoch 41/160
Epoch 42/160
Epo

Epoch 159/160
Epoch 160/160


In [303]:
# the output probabilities

y_test = model.predict(X_test)

y_test

array([[ 0.02053768],
       [ 0.06507171],
       [ 0.0179081 ],
       ..., 
       [ 0.03147818],
       [ 0.00588191],
       [ 0.03958736]], dtype=float32)

In [304]:
y_test.astype(int)

array([[0],
       [0],
       [0],
       ..., 
       [0],
       [0],
       [0]])

In [305]:
eval_df = pd.DataFrame(y_test, columns = ['yhat'])
eval_df.head()
eval_df['row_id'] = eval_df.index

In [306]:
eval_df.to_csv('./eval_output_Galen_Wilkerson.csv', columns=['row_id' ,'yhat'], index = False)