# Part A - Deep Learning Model

In [1]:
%matplotlib inline 
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from scipy import stats
import seaborn as sns
import warnings
import random
from datetime import datetime
random.seed(datetime.now())
warnings.filterwarnings('ignore')
 
# Make plots larger
plt.rcParams['figure.figsize'] = (10, 6)

In [2]:
import keras
from keras.datasets import mnist
from keras.models import Sequential
from keras.layers import Dense
from keras.optimizers import SGD
from keras.optimizers import RMSprop
from keras.optimizers import Adagrad
from keras.optimizers import Adadelta

Using TensorFlow backend.


In [3]:
import pandas as pd

df_train= pd.read_csv('./train.csv',nrows=200000, parse_dates=['click_time']) #train data subset, original too large
df_train.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 200000 entries, 0 to 199999
Data columns (total 8 columns):
ip                 200000 non-null int64
app                200000 non-null int64
device             200000 non-null int64
os                 200000 non-null int64
channel            200000 non-null int64
click_time         200000 non-null datetime64[ns]
attributed_time    348 non-null object
is_attributed      200000 non-null int64
dtypes: datetime64[ns](1), int64(6), object(1)
memory usage: 12.2+ MB


In [4]:
df_train.head(10)

Unnamed: 0,ip,app,device,os,channel,click_time,attributed_time,is_attributed
0,83230,3,1,13,379,2017-11-06 14:32:21,,0
1,17357,3,1,19,379,2017-11-06 14:33:34,,0
2,35810,3,1,13,379,2017-11-06 14:34:12,,0
3,45745,14,1,13,478,2017-11-06 14:34:52,,0
4,161007,3,1,13,379,2017-11-06 14:35:08,,0
5,18787,3,1,16,379,2017-11-06 14:36:26,,0
6,103022,3,1,23,379,2017-11-06 14:37:44,,0
7,114221,3,1,19,379,2017-11-06 14:37:59,,0
8,165970,3,1,13,379,2017-11-06 14:38:10,,0
9,74544,64,1,22,459,2017-11-06 14:38:23,,0


In [5]:
df_train["is_attributed"].value_counts()

0    199652
1       348
Name: is_attributed, dtype: int64

In [6]:
import pandas as pd
df_test= pd.read_csv('./train.csv', nrows=50000,skiprows=range(1, 400000), parse_dates=['click_time']) #train data subset, original too large
df_test.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 50000 entries, 0 to 49999
Data columns (total 8 columns):
ip                 50000 non-null int64
app                50000 non-null int64
device             50000 non-null int64
os                 50000 non-null int64
channel            50000 non-null int64
click_time         50000 non-null datetime64[ns]
attributed_time    88 non-null object
is_attributed      50000 non-null int64
dtypes: datetime64[ns](1), int64(6), object(1)
memory usage: 3.1+ MB


In [7]:
df_test.head(10)

Unnamed: 0,ip,app,device,os,channel,click_time,attributed_time,is_attributed
0,115115,8,1,13,145,2017-11-06 16:07:47,,0
1,21633,1,1,19,178,2017-11-06 16:07:47,,0
2,144498,12,1,17,178,2017-11-06 16:07:47,,0
3,76919,2,1,6,237,2017-11-06 16:07:47,,0
4,1556,15,1,13,245,2017-11-06 16:07:47,,0
5,67467,15,1,37,245,2017-11-06 16:07:47,,0
6,20266,64,1,19,459,2017-11-06 16:07:47,,0
7,31564,15,1,19,265,2017-11-06 16:07:47,,0
8,1732,9,1,13,134,2017-11-06 16:07:47,,0
9,111114,15,1,13,245,2017-11-06 16:07:47,,0


In [8]:
df_test["is_attributed"].value_counts()

0    49912
1       88
Name: is_attributed, dtype: int64

In [9]:
X_train = df_train.loc[:,["ip","app","device","os","channel"]]
X_test = df_test.loc[:,["ip","app","device","os","channel"]]

In [10]:
X_train=X_train.values
X_test= X_test.values

In [11]:
X_train.shape

(200000, 5)

In [12]:
X_test.shape

(50000, 5)

In [13]:
X_train[0]

array([83230,     3,     1,    13,   379])

In [14]:
X_test[0]

array([115115,      8,      1,     13,    145])

In [15]:
y_train = df_train["is_attributed"]

In [16]:
y_train = y_train.values

In [17]:
y_test =df_test["is_attributed"]

In [18]:
y_test= y_test.values

In [19]:
n_classes = 2
y_train = keras.utils.to_categorical(y_train, n_classes)
y_test = keras.utils.to_categorical(y_test, n_classes)

In [20]:
y_train[0:5]

array([[1., 0.],
       [1., 0.],
       [1., 0.],
       [1., 0.],
       [1., 0.]], dtype=float32)

## ANN With One Dense Layer

In [21]:
def shallow_net_A(n=55,i=5,o=2):
    # create simple one dense layer net
    # default 55 neurons, input 5, output 2
    net = Sequential()
    net.add(Dense(n, activation='sigmoid', input_shape=(i,)))
    net.add(Dense(2, activation='softmax'))
    # Compile net
    net.compile(loss='mean_squared_error', optimizer=SGD(lr=0.01), metrics=['accuracy'])
    return net

In [22]:
nn=shallow_net_A()

In [23]:
nn.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_1 (Dense)              (None, 55)                330       
_________________________________________________________________
dense_2 (Dense)              (None, 2)                 112       
Total params: 442
Trainable params: 442
Non-trainable params: 0
_________________________________________________________________


In [252]:
X_train[0]

array([83230,     3,     1,    13,   379])

In [253]:
nn.fit(X_train, y_train, batch_size=128, epochs=99, verbose=1, validation_data=(X_train, y_train))

Train on 200000 samples, validate on 200000 samples
Epoch 1/99
Epoch 2/99
Epoch 3/99
Epoch 4/99
Epoch 5/99
Epoch 6/99
Epoch 7/99
Epoch 8/99
Epoch 9/99
Epoch 10/99
Epoch 11/99
Epoch 12/99
Epoch 13/99
Epoch 14/99
Epoch 15/99
Epoch 16/99
Epoch 17/99
Epoch 18/99
Epoch 19/99
Epoch 20/99
Epoch 21/99
Epoch 22/99
Epoch 23/99
Epoch 24/99
Epoch 25/99
Epoch 26/99
Epoch 27/99
Epoch 28/99
Epoch 29/99
Epoch 30/99
Epoch 31/99
Epoch 32/99
Epoch 33/99
Epoch 34/99
Epoch 35/99
Epoch 36/99
Epoch 37/99
Epoch 38/99
Epoch 39/99
Epoch 40/99
Epoch 41/99
Epoch 42/99
Epoch 43/99
Epoch 44/99
Epoch 45/99
Epoch 46/99
Epoch 47/99
Epoch 48/99
Epoch 49/99
Epoch 50/99
Epoch 51/99
Epoch 52/99
Epoch 53/99
Epoch 54/99
Epoch 55/99
Epoch 56/99
Epoch 57/99
Epoch 58/99
Epoch 59/99
Epoch 60/99
Epoch 61/99
Epoch 62/99
Epoch 63/99
Epoch 64/99
Epoch 65/99
Epoch 66/99
Epoch 67/99
Epoch 68/99
Epoch 69/99
Epoch 70/99
Epoch 71/99
Epoch 72/99
Epoch 73/99
Epoch 74/99
Epoch 75/99
Epoch 76/99
Epoch 77/99
Epoch 78/99
Epoch 79/99
Epoch 80/

<keras.callbacks.History at 0x1a37dd1710>

In [254]:
# 99.824% accuracy after 99 epochs
nn.evaluate(X_test,y_test)



[0.0017578391818658565, 0.99824]

In [29]:
nn.fit(X_train, y_train, batch_size=128, epochs=99, verbose=1, validation_data=(X_test, y_test))

Train on 200000 samples, validate on 200000 samples
Epoch 1/99
Epoch 2/99
Epoch 3/99
Epoch 4/99
Epoch 5/99
Epoch 6/99
Epoch 7/99
Epoch 8/99
Epoch 9/99
Epoch 10/99
Epoch 11/99
Epoch 12/99
Epoch 13/99
Epoch 14/99
Epoch 15/99
Epoch 16/99
Epoch 17/99
Epoch 18/99
Epoch 19/99
Epoch 20/99
Epoch 21/99
Epoch 22/99
Epoch 23/99
Epoch 24/99
Epoch 25/99
Epoch 26/99
Epoch 27/99
Epoch 28/99
Epoch 29/99
Epoch 30/99
Epoch 31/99
Epoch 32/99
Epoch 33/99
Epoch 34/99
Epoch 35/99
Epoch 36/99
Epoch 37/99
Epoch 38/99
Epoch 39/99
Epoch 40/99
Epoch 41/99
Epoch 42/99
Epoch 43/99
Epoch 44/99
Epoch 45/99
Epoch 46/99
Epoch 47/99
Epoch 48/99
Epoch 49/99
Epoch 50/99
Epoch 51/99
Epoch 52/99
Epoch 53/99
Epoch 54/99
Epoch 55/99
Epoch 56/99
Epoch 57/99
Epoch 58/99
Epoch 59/99
Epoch 60/99
Epoch 61/99
Epoch 62/99
Epoch 63/99
Epoch 64/99
Epoch 65/99
Epoch 66/99
Epoch 67/99
Epoch 68/99
Epoch 69/99
Epoch 70/99
Epoch 71/99
Epoch 72/99
Epoch 73/99
Epoch 74/99
Epoch 75/99
Epoch 76/99
Epoch 77/99
Epoch 78/99
Epoch 79/99
Epoch 80/

<keras.callbacks.History at 0x10ed0f438>

In [30]:
# 99.82% accuracy after 99 epochs
nn.evaluate(X_test,y_test)



[0.0017770527876763663, 0.99822]

In [31]:
# Predicting the Test set results
y_pred = nn.predict(X_test)
y_pred = (y_pred > 0.5)

In [32]:
from sklearn.metrics import confusion_matrix
confusion_matrix(y_test.argmax(axis=1), y_pred.argmax(axis=1))

array([[199644,      0],
       [   356,      0]])

#### 199644/(199644+356)=0.99822, so we achieved 99.82% accuracy.

## MLP

In [66]:
def shallow_net_C(n=55,i=5,o=2):
    # create two dense layers net
    # default 55 neurons, input 5, output 2
    net = Sequential()
    net.add(Dense(n, activation='sigmoid', input_shape=(i,)))
    net.add(Dense(n, activation='sigmoid', input_shape=(i,)))
    net.add(Dense(2, activation='softmax'))
    # Compile net
    net.compile(loss='mean_squared_error', optimizer=SGD(lr=0.01), metrics=['accuracy'])
    return net

In [67]:
nn3=shallow_net_C()
nn3.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_3 (Dense)              (None, 55)                330       
_________________________________________________________________
dense_4 (Dense)              (None, 55)                3080      
_________________________________________________________________
dense_5 (Dense)              (None, 2)                 112       
Total params: 3,522
Trainable params: 3,522
Non-trainable params: 0
_________________________________________________________________


In [68]:
nn3.fit(X_train, y_train, batch_size=128, epochs=99, verbose=1, validation_data=(X_test, y_test))

Train on 200000 samples, validate on 200000 samples
Epoch 1/99
Epoch 2/99
Epoch 3/99
Epoch 4/99
Epoch 5/99
Epoch 6/99
Epoch 7/99
Epoch 8/99
Epoch 9/99
Epoch 10/99
Epoch 11/99
Epoch 12/99
Epoch 13/99
Epoch 14/99
Epoch 15/99
Epoch 16/99
Epoch 17/99
Epoch 18/99
Epoch 19/99
Epoch 20/99
Epoch 21/99
Epoch 22/99
Epoch 23/99
Epoch 24/99
Epoch 25/99
Epoch 26/99
Epoch 27/99
Epoch 28/99
Epoch 29/99
Epoch 30/99
Epoch 31/99
Epoch 32/99
Epoch 33/99
Epoch 34/99
Epoch 35/99
Epoch 36/99
Epoch 37/99
Epoch 38/99
Epoch 39/99
Epoch 40/99
Epoch 41/99
Epoch 42/99
Epoch 43/99
Epoch 44/99
Epoch 45/99
Epoch 46/99
Epoch 47/99
Epoch 48/99
Epoch 49/99
Epoch 50/99
Epoch 51/99
Epoch 52/99
Epoch 53/99
Epoch 54/99
Epoch 55/99
Epoch 56/99
Epoch 57/99
Epoch 58/99
Epoch 59/99
Epoch 60/99
Epoch 61/99
Epoch 62/99
Epoch 63/99
Epoch 64/99
Epoch 65/99
Epoch 66/99
Epoch 67/99
Epoch 68/99
Epoch 69/99
Epoch 70/99
Epoch 71/99
Epoch 72/99
Epoch 73/99
Epoch 74/99
Epoch 75/99
Epoch 76/99
Epoch 77/99
Epoch 78/99
Epoch 79/99
Epoch 80/

<keras.callbacks.History at 0x1a28a976a0>

In [69]:
# 99.82% accuracy after 99 epochs
nn3.evaluate(X_test, y_test)



[0.0017777226833951135, 0.99822]

In [70]:
nn3.fit(X_train, y_train, batch_size=128, epochs=99, verbose=1, validation_data=(X_test, y_test))

Train on 200000 samples, validate on 200000 samples
Epoch 1/99
Epoch 2/99
Epoch 3/99
Epoch 4/99
Epoch 5/99
Epoch 6/99
Epoch 7/99
Epoch 8/99
Epoch 9/99
Epoch 10/99
Epoch 11/99
Epoch 12/99
Epoch 13/99
Epoch 14/99
Epoch 15/99
Epoch 16/99
Epoch 17/99
Epoch 18/99
Epoch 19/99
Epoch 20/99
Epoch 21/99
Epoch 22/99
Epoch 23/99
Epoch 24/99
Epoch 25/99
Epoch 26/99
Epoch 27/99
Epoch 28/99
Epoch 29/99
Epoch 30/99
Epoch 31/99
Epoch 32/99
Epoch 33/99
Epoch 34/99
Epoch 35/99
Epoch 36/99
Epoch 37/99
Epoch 38/99
Epoch 39/99
Epoch 40/99
Epoch 41/99
Epoch 42/99
Epoch 43/99
Epoch 44/99
Epoch 45/99
Epoch 46/99
Epoch 47/99
Epoch 48/99
Epoch 49/99
Epoch 50/99
Epoch 51/99
Epoch 52/99
Epoch 53/99
Epoch 54/99
Epoch 55/99
Epoch 56/99
Epoch 57/99
Epoch 58/99
Epoch 59/99
Epoch 60/99
Epoch 61/99
Epoch 62/99
Epoch 63/99
Epoch 64/99
Epoch 65/99
Epoch 66/99
Epoch 67/99
Epoch 68/99
Epoch 69/99
Epoch 70/99
Epoch 71/99
Epoch 72/99
Epoch 73/99
Epoch 74/99
Epoch 75/99
Epoch 76/99
Epoch 77/99
Epoch 78/99
Epoch 79/99
Epoch 80/

<keras.callbacks.History at 0x1a28a5c5f8>

In [71]:
# Predicting the Test set results
y_pred3 = nn3.predict(X_test)
y_pred3 = (y_pred3 > 0.5)

In [72]:
from sklearn.metrics import confusion_matrix
confusion_matrix(y_test.argmax(axis=1), y_pred3.argmax(axis=1))

array([[199644,      0],
       [   356,      0]])

#### 199644/(199644+356)=0.99822, so we achieved 99.82% accuracy.

In [73]:
# 99.82% accuracy after 99 epochs
nn3.evaluate(X_test, y_test)



[0.001777018947737597, 0.99822]

# Part B - Activation Function

#### Rectified linear unit (ReLU)

In [74]:
def shallow_net_B(n=55,i=5,o=2):
    # create two dense layers net
    # default 55 neurons, input 5, output 2
    # Using relu
    net = Sequential()
    net.add(Dense(n, activation='relu', input_shape=(i,)))
    net.add(Dense(n, activation='relu', input_shape=(i,)))
    net.add(Dense(2, activation='softmax'))
    # Compile net
    net.compile(loss='mean_squared_error', optimizer=SGD(lr=0.01), metrics=['accuracy'])
    return net

In [75]:
nn2=shallow_net_B()
nn2.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_6 (Dense)              (None, 55)                330       
_________________________________________________________________
dense_7 (Dense)              (None, 55)                3080      
_________________________________________________________________
dense_8 (Dense)              (None, 2)                 112       
Total params: 3,522
Trainable params: 3,522
Non-trainable params: 0
_________________________________________________________________


In [76]:
nn2.fit(X_train, y_train, batch_size=128, epochs=99, verbose=1, validation_data=(X_test, y_test))

Train on 200000 samples, validate on 200000 samples
Epoch 1/99
Epoch 2/99
Epoch 3/99
Epoch 4/99
Epoch 5/99
Epoch 6/99
Epoch 7/99
Epoch 8/99
Epoch 9/99
Epoch 10/99
Epoch 11/99
Epoch 12/99
Epoch 13/99
Epoch 14/99
Epoch 15/99
Epoch 16/99
Epoch 17/99
Epoch 18/99
Epoch 19/99
Epoch 20/99
Epoch 21/99
Epoch 22/99
Epoch 23/99
Epoch 24/99
Epoch 25/99
Epoch 26/99
Epoch 27/99
Epoch 28/99
Epoch 29/99
Epoch 30/99
Epoch 31/99
Epoch 32/99
Epoch 33/99
Epoch 34/99
Epoch 35/99
Epoch 36/99
Epoch 37/99
Epoch 38/99
Epoch 39/99
Epoch 40/99
Epoch 41/99
Epoch 42/99
Epoch 43/99
Epoch 44/99
Epoch 45/99
Epoch 46/99
Epoch 47/99
Epoch 48/99
Epoch 49/99
Epoch 50/99
Epoch 51/99
Epoch 52/99
Epoch 53/99
Epoch 54/99
Epoch 55/99
Epoch 56/99
Epoch 57/99
Epoch 58/99
Epoch 59/99
Epoch 60/99
Epoch 61/99
Epoch 62/99
Epoch 63/99
Epoch 64/99
Epoch 65/99
Epoch 66/99
Epoch 67/99
Epoch 68/99
Epoch 69/99
Epoch 70/99
Epoch 71/99
Epoch 72/99
Epoch 73/99
Epoch 74/99
Epoch 75/99
Epoch 76/99
Epoch 77/99
Epoch 78/99
Epoch 79/99
Epoch 80/

<keras.callbacks.History at 0x1a289facf8>

In [77]:
# Only 0.25% accuracy after 99 epochs
nn2.evaluate(X_test, y_test)



[0.0018950000000930842, 0.998105]

In [78]:
nn2.fit(X_train, y_train, batch_size=128, epochs=99, verbose=1, validation_data=(X_test, y_test))

Train on 200000 samples, validate on 200000 samples
Epoch 1/99
Epoch 2/99
Epoch 3/99
Epoch 4/99
Epoch 5/99
Epoch 6/99
Epoch 7/99
Epoch 8/99
Epoch 9/99
Epoch 10/99
Epoch 11/99
Epoch 12/99
Epoch 13/99
Epoch 14/99
Epoch 15/99
Epoch 16/99
Epoch 17/99
Epoch 18/99
Epoch 19/99
Epoch 20/99
Epoch 21/99
Epoch 22/99
Epoch 23/99
Epoch 24/99
Epoch 25/99
Epoch 26/99
Epoch 27/99
Epoch 28/99
Epoch 29/99
Epoch 30/99
Epoch 31/99
Epoch 32/99
Epoch 33/99
Epoch 34/99
Epoch 35/99
Epoch 36/99
Epoch 37/99
Epoch 38/99
Epoch 39/99
Epoch 40/99
Epoch 41/99
Epoch 42/99
Epoch 43/99
Epoch 44/99
Epoch 45/99
Epoch 46/99
Epoch 47/99
Epoch 48/99
Epoch 49/99
Epoch 50/99
Epoch 51/99
Epoch 52/99
Epoch 53/99
Epoch 54/99
Epoch 55/99
Epoch 56/99
Epoch 57/99
Epoch 58/99
Epoch 59/99
Epoch 60/99
Epoch 61/99
Epoch 62/99
Epoch 63/99
Epoch 64/99
Epoch 65/99
Epoch 66/99
Epoch 67/99
Epoch 68/99
Epoch 69/99
Epoch 70/99
Epoch 71/99
Epoch 72/99
Epoch 73/99
Epoch 74/99
Epoch 75/99
Epoch 76/99
Epoch 77/99
Epoch 78/99
Epoch 79/99
Epoch 80/

<keras.callbacks.History at 0x1a2a4c7c88>

In [79]:
# Predicting the Test set results
y_pred2 = nn2.predict(X_test)
y_pred2 = (y_pred2 > 0.5)

In [80]:
confusion_matrix(y_test.argmax(axis=1), y_pred2.argmax(axis=1))

array([[199621,     23],
       [   356,      0]])

#### 153/(199491+356+153)=0.000765, so we only achieved 0.0765% accuracy.

In [81]:
# Only 0.25% accuracy after 99 epochs
nn2.evaluate(X_test, y_test)



[0.0018949999988735755, 0.998105]

#### TanH

In [82]:
def shallow_net_D(n=55,i=5,o=2):
    # create two dense layers net
    # default 55 neurons, input 5, output 2
    # Using TanH
    net = Sequential()
    net.add(Dense(n, activation='tanh', input_shape=(i,)))
    net.add(Dense(n, activation='tanh', input_shape=(i,)))
    net.add(Dense(2, activation='softmax'))
    # Compile net
    net.compile(loss='mean_squared_error', optimizer=SGD(lr=0.01), metrics=['accuracy'])
    return net

In [83]:
nn4=shallow_net_D()
nn4.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_9 (Dense)              (None, 55)                330       
_________________________________________________________________
dense_10 (Dense)             (None, 55)                3080      
_________________________________________________________________
dense_11 (Dense)             (None, 2)                 112       
Total params: 3,522
Trainable params: 3,522
Non-trainable params: 0
_________________________________________________________________


In [84]:
nn4.fit(X_train, y_train, batch_size=128, epochs=99, verbose=1, validation_data=(X_test, y_test))

Train on 200000 samples, validate on 200000 samples
Epoch 1/99
Epoch 2/99
Epoch 3/99
Epoch 4/99
Epoch 5/99
Epoch 6/99
Epoch 7/99
Epoch 8/99
Epoch 9/99
Epoch 10/99
Epoch 11/99
Epoch 12/99
Epoch 13/99
Epoch 14/99
Epoch 15/99
Epoch 16/99
Epoch 17/99
Epoch 18/99
Epoch 19/99
Epoch 20/99
Epoch 21/99
Epoch 22/99
Epoch 23/99
Epoch 24/99
Epoch 25/99
Epoch 26/99
Epoch 27/99
Epoch 28/99
Epoch 29/99
Epoch 30/99
Epoch 31/99
Epoch 32/99
Epoch 33/99
Epoch 34/99
Epoch 35/99
Epoch 36/99
Epoch 37/99
Epoch 38/99
Epoch 39/99
Epoch 40/99
Epoch 41/99
Epoch 42/99
Epoch 43/99
Epoch 44/99
Epoch 45/99
Epoch 46/99
Epoch 47/99
Epoch 48/99
Epoch 49/99
Epoch 50/99
Epoch 51/99
Epoch 52/99
Epoch 53/99
Epoch 54/99
Epoch 55/99
Epoch 56/99
Epoch 57/99
Epoch 58/99
Epoch 59/99
Epoch 60/99
Epoch 61/99
Epoch 62/99
Epoch 63/99
Epoch 64/99
Epoch 65/99
Epoch 66/99
Epoch 67/99
Epoch 68/99
Epoch 69/99
Epoch 70/99
Epoch 71/99
Epoch 72/99
Epoch 73/99
Epoch 74/99
Epoch 75/99
Epoch 76/99
Epoch 77/99
Epoch 78/99
Epoch 79/99
Epoch 80/

<keras.callbacks.History at 0x1a2a4cd358>

In [85]:
# 99.82% accuracy after 99 epochs
nn4.evaluate(X_test, y_test)



[0.0017800749149445255, 0.99822]

In [86]:
nn4.fit(X_train, y_train, batch_size=128, epochs=99, verbose=1, validation_data=(X_test, y_test))

Train on 200000 samples, validate on 200000 samples
Epoch 1/99
Epoch 2/99
Epoch 3/99
Epoch 4/99
Epoch 5/99
Epoch 6/99
Epoch 7/99
Epoch 8/99
Epoch 9/99
Epoch 10/99
Epoch 11/99
Epoch 12/99
Epoch 13/99
Epoch 14/99
Epoch 15/99
Epoch 16/99
Epoch 17/99
Epoch 18/99
Epoch 19/99
Epoch 20/99
Epoch 21/99
Epoch 22/99
Epoch 23/99
Epoch 24/99
Epoch 25/99
Epoch 26/99
Epoch 27/99
Epoch 28/99
Epoch 29/99
Epoch 30/99
Epoch 31/99
Epoch 32/99
Epoch 33/99
Epoch 34/99
Epoch 35/99
Epoch 36/99
Epoch 37/99
Epoch 38/99
Epoch 39/99
Epoch 40/99
Epoch 41/99
Epoch 42/99
Epoch 43/99
Epoch 44/99
Epoch 45/99
Epoch 46/99
Epoch 47/99
Epoch 48/99
Epoch 49/99
Epoch 50/99
Epoch 51/99
Epoch 52/99
Epoch 53/99
Epoch 54/99
Epoch 55/99
Epoch 56/99
Epoch 57/99
Epoch 58/99
Epoch 59/99
Epoch 60/99
Epoch 61/99
Epoch 62/99
Epoch 63/99
Epoch 64/99
Epoch 65/99
Epoch 66/99
Epoch 67/99
Epoch 68/99
Epoch 69/99
Epoch 70/99
Epoch 71/99
Epoch 72/99
Epoch 73/99
Epoch 74/99
Epoch 75/99
Epoch 76/99
Epoch 77/99
Epoch 78/99
Epoch 79/99
Epoch 80/

<keras.callbacks.History at 0x1a2b3449e8>

In [87]:
# Predicting the Test set results
y_pred4 = nn4.predict(X_test)
y_pred4 = (y_pred4 > 0.5)

In [88]:
confusion_matrix(y_test.argmax(axis=1), y_pred4.argmax(axis=1))

array([[199644,      0],
       [   356,      0]])

#### 199644/(199644+356)=0.99822, so we achieved 99.82% accuracy.

In [89]:
# 99.82% accuracy after 99 epochs
nn4.evaluate(X_test, y_test)



[0.0017786161237332999, 0.99822]

#### Exponential linear unit (ELU)

In [90]:
def shallow_net_E(n=55,i=5,o=2):
    # create two dense layers net
    # default 55 neurons, input 5, output 2
    # Using ELU
    net = Sequential()
    net.add(Dense(n, activation='elu', input_shape=(i,)))
    net.add(Dense(n, activation='elu', input_shape=(i,)))
    net.add(Dense(2, activation='softmax'))
    # Compile net
    net.compile(loss='mean_squared_error', optimizer=SGD(lr=0.01), metrics=['accuracy'])
    return net

In [91]:
nn5=shallow_net_E()
nn5.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_12 (Dense)             (None, 55)                330       
_________________________________________________________________
dense_13 (Dense)             (None, 55)                3080      
_________________________________________________________________
dense_14 (Dense)             (None, 2)                 112       
Total params: 3,522
Trainable params: 3,522
Non-trainable params: 0
_________________________________________________________________


In [92]:
nn5.fit(X_train, y_train, batch_size=128, epochs=99, verbose=1, validation_data=(X_test, y_test))

Train on 200000 samples, validate on 200000 samples
Epoch 1/99
Epoch 2/99
Epoch 3/99
Epoch 4/99
Epoch 5/99
Epoch 6/99
Epoch 7/99
Epoch 8/99
Epoch 9/99
Epoch 10/99
Epoch 11/99
Epoch 12/99
Epoch 13/99
Epoch 14/99
Epoch 15/99
Epoch 16/99
Epoch 17/99
Epoch 18/99
Epoch 19/99
Epoch 20/99
Epoch 21/99
Epoch 22/99
Epoch 23/99
Epoch 24/99
Epoch 25/99
Epoch 26/99
Epoch 27/99
Epoch 28/99
Epoch 29/99
Epoch 30/99
Epoch 31/99
Epoch 32/99
Epoch 33/99
Epoch 34/99
Epoch 35/99
Epoch 36/99
Epoch 37/99
Epoch 38/99
Epoch 39/99
Epoch 40/99
Epoch 41/99
Epoch 42/99
Epoch 43/99
Epoch 44/99
Epoch 45/99
Epoch 46/99
Epoch 47/99
Epoch 48/99
Epoch 49/99
Epoch 50/99
Epoch 51/99
Epoch 52/99
Epoch 53/99
Epoch 54/99
Epoch 55/99
Epoch 56/99
Epoch 57/99
Epoch 58/99
Epoch 59/99
Epoch 60/99
Epoch 61/99
Epoch 62/99
Epoch 63/99
Epoch 64/99
Epoch 65/99
Epoch 66/99
Epoch 67/99
Epoch 68/99
Epoch 69/99
Epoch 70/99
Epoch 71/99
Epoch 72/99
Epoch 73/99
Epoch 74/99
Epoch 75/99
Epoch 76/99
Epoch 77/99
Epoch 78/99
Epoch 79/99
Epoch 80/

<keras.callbacks.History at 0x1a2b354c88>

In [93]:
# 99.82% accuracy after 99 epochs
nn5.evaluate(X_test, y_test)



[0.00178, 0.99822]

In [94]:
nn5.fit(X_train, y_train, batch_size=128, epochs=99, verbose=1, validation_data=(X_test, y_test))

Train on 200000 samples, validate on 200000 samples
Epoch 1/99
Epoch 2/99
Epoch 3/99
Epoch 4/99
Epoch 5/99
Epoch 6/99
Epoch 7/99
Epoch 8/99
Epoch 9/99
Epoch 10/99
Epoch 11/99
Epoch 12/99
Epoch 13/99
Epoch 14/99
Epoch 15/99
Epoch 16/99
Epoch 17/99
Epoch 18/99
Epoch 19/99
Epoch 20/99
Epoch 21/99
Epoch 22/99
Epoch 23/99
Epoch 24/99
Epoch 25/99
Epoch 26/99
Epoch 27/99
Epoch 28/99
Epoch 29/99
Epoch 30/99
Epoch 31/99
Epoch 32/99
Epoch 33/99
Epoch 34/99
Epoch 35/99
Epoch 36/99
Epoch 37/99
Epoch 38/99
Epoch 39/99
Epoch 40/99
Epoch 41/99
Epoch 42/99
Epoch 43/99
Epoch 44/99
Epoch 45/99
Epoch 46/99
Epoch 47/99
Epoch 48/99
Epoch 49/99
Epoch 50/99
Epoch 51/99
Epoch 52/99
Epoch 53/99
Epoch 54/99
Epoch 55/99
Epoch 56/99
Epoch 57/99
Epoch 58/99
Epoch 59/99
Epoch 60/99
Epoch 61/99
Epoch 62/99
Epoch 63/99
Epoch 64/99
Epoch 65/99
Epoch 66/99
Epoch 67/99
Epoch 68/99
Epoch 69/99
Epoch 70/99
Epoch 71/99
Epoch 72/99
Epoch 73/99
Epoch 74/99
Epoch 75/99
Epoch 76/99
Epoch 77/99
Epoch 78/99
Epoch 79/99
Epoch 80/

<keras.callbacks.History at 0xb230a0ac8>

In [95]:
# Predicting the Test set results
y_pred5 = nn5.predict(X_test)
y_pred5 = (y_pred5 > 0.5)

In [96]:
confusion_matrix(y_test.argmax(axis=1), y_pred5.argmax(axis=1))

array([[199644,      0],
       [   356,      0]])

#### 199644/(199644+356)=0.99822, so we achieved 99.82% accuracy.

In [97]:
# 99.82% accuracy after 99 epochs
nn5.evaluate(X_test, y_test)



[0.00178, 0.99822]

#### Scaled exponential linear unit (SELU)

In [98]:
def shallow_net_F(n=55,i=5,o=2):
    # create two dense layers net
    # default 55 neurons, input 5, output 2
    # Using PReLU
    net = Sequential()
    net.add(Dense(n, activation='selu', input_shape=(i,)))
    net.add(Dense(n, activation='selu', input_shape=(i,)))
    net.add(Dense(2, activation='softmax'))
    # Compile net
    net.compile(loss='mean_squared_error', optimizer=SGD(lr=0.01), metrics=['accuracy'])
    return net

In [99]:
nn6=shallow_net_F()
nn6.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_15 (Dense)             (None, 55)                330       
_________________________________________________________________
dense_16 (Dense)             (None, 55)                3080      
_________________________________________________________________
dense_17 (Dense)             (None, 2)                 112       
Total params: 3,522
Trainable params: 3,522
Non-trainable params: 0
_________________________________________________________________


In [100]:
nn6.fit(X_train, y_train, batch_size=128, epochs=99, verbose=1, validation_data=(X_test, y_test))

Train on 200000 samples, validate on 200000 samples
Epoch 1/99
Epoch 2/99
Epoch 3/99
Epoch 4/99
Epoch 5/99
Epoch 6/99
Epoch 7/99
Epoch 8/99
Epoch 9/99
Epoch 10/99
Epoch 11/99
Epoch 12/99
Epoch 13/99
Epoch 14/99
Epoch 15/99
Epoch 16/99
Epoch 17/99
Epoch 18/99
Epoch 19/99
Epoch 20/99
Epoch 21/99
Epoch 22/99
Epoch 23/99
Epoch 24/99
Epoch 25/99
Epoch 26/99
Epoch 27/99
Epoch 28/99
Epoch 29/99
Epoch 30/99
Epoch 31/99
Epoch 32/99
Epoch 33/99
Epoch 34/99
Epoch 35/99
Epoch 36/99
Epoch 37/99
Epoch 38/99
Epoch 39/99
Epoch 40/99
Epoch 41/99
Epoch 42/99
Epoch 43/99
Epoch 44/99
Epoch 45/99
Epoch 46/99
Epoch 47/99
Epoch 48/99
Epoch 49/99
Epoch 50/99
Epoch 51/99
Epoch 52/99
Epoch 53/99
Epoch 54/99
Epoch 55/99
Epoch 56/99
Epoch 57/99
Epoch 58/99
Epoch 59/99
Epoch 60/99
Epoch 61/99
Epoch 62/99
Epoch 63/99
Epoch 64/99
Epoch 65/99
Epoch 66/99
Epoch 67/99
Epoch 68/99
Epoch 69/99
Epoch 70/99
Epoch 71/99
Epoch 72/99
Epoch 73/99
Epoch 74/99
Epoch 75/99
Epoch 76/99
Epoch 77/99
Epoch 78/99
Epoch 79/99
Epoch 80/

<keras.callbacks.History at 0xb24e49630>

In [101]:
# 99.816% accuracy after 99 epochs
nn6.evaluate(X_test, y_test)



[0.0018400000000054328, 0.99816]

In [102]:
nn6.fit(X_train, y_train, batch_size=128, epochs=99, verbose=1, validation_data=(X_test, y_test))

Train on 200000 samples, validate on 200000 samples
Epoch 1/99
Epoch 2/99
Epoch 3/99
Epoch 4/99
Epoch 5/99
Epoch 6/99
Epoch 7/99
Epoch 8/99
Epoch 9/99
Epoch 10/99
Epoch 11/99
Epoch 12/99
Epoch 13/99
Epoch 14/99
Epoch 15/99
Epoch 16/99
Epoch 17/99
Epoch 18/99
Epoch 19/99
Epoch 20/99
Epoch 21/99
Epoch 22/99
Epoch 23/99
Epoch 24/99
Epoch 25/99
Epoch 26/99
Epoch 27/99
Epoch 28/99
Epoch 29/99
Epoch 30/99
Epoch 31/99
Epoch 32/99
Epoch 33/99
Epoch 34/99
Epoch 35/99
Epoch 36/99
Epoch 37/99
Epoch 38/99
Epoch 39/99
Epoch 40/99
Epoch 41/99
Epoch 42/99
Epoch 43/99
Epoch 44/99
Epoch 45/99
Epoch 46/99
Epoch 47/99
Epoch 48/99
Epoch 49/99
Epoch 50/99
Epoch 51/99
Epoch 52/99
Epoch 53/99
Epoch 54/99
Epoch 55/99
Epoch 56/99
Epoch 57/99
Epoch 58/99
Epoch 59/99
Epoch 60/99
Epoch 61/99
Epoch 62/99
Epoch 63/99
Epoch 64/99
Epoch 65/99
Epoch 66/99
Epoch 67/99
Epoch 68/99
Epoch 69/99
Epoch 70/99
Epoch 71/99
Epoch 72/99
Epoch 73/99
Epoch 74/99
Epoch 75/99
Epoch 76/99
Epoch 77/99
Epoch 78/99
Epoch 79/99
Epoch 80/

<keras.callbacks.History at 0x1a2eace5f8>

In [103]:
# Predicting the Test set results
y_pred6 = nn6.predict(X_test)
y_pred6 = (y_pred6 > 0.5)

In [104]:
confusion_matrix(y_test.argmax(axis=1), y_pred6.argmax(axis=1))

array([[199632,     12],
       [   356,      0]])

#### 199632/(199632+356+12)=0.99816, so we achieved 99.816% accuracy.

In [105]:
# 99.82% accuracy after 99 epochs
nn5.evaluate(X_test, y_test)



[0.00178, 0.99822]

### Change the activation function. How does it effect the accuracy?

With Rectified linear unit (ReLU) activation function, the accuracy is only <font color='red'>0.25%</font> after 99 epochs.
With TanH activation function, the accuracy is <font color='red'>99.8222%</font> after 99 epochs.
With Exponential linear unit (ELU) activation function, the accuracy is <font color='red'>99.8215%</font> after 99 epochs.
With Scaled exponential linear unit (SELU) activation function, the accuracy is <font color='red'>99.816%</font> after 99 epochs.

### How does it effect how quickly the network plateaus?

With Rectified linear unit (ReLU) activation function, the accuracy is only 0.25% after 99 epochs, it took <font color='red'>2s 9us</font>.
With TanH activation function, the accuracy is 99.8222% after 99 epochs, it took <font color='red'>2s 9us</font>.
With Exponential linear unit (ELU) activation function, the accuracy is 99.8215% after 99 epochs, it took <font color='red'>2s 10us</font>.
With Scaled exponential linear unit (SELU) activation function, the accuracy is 99.816% after 99 epochs, it took <font color='red'>2s 11us</font>.
So, the most accuracy activation function in this case is <font color='red'>TanH</font>, it takes the least time as well.

# Part C - Cost Function

#### Quadratic cost (mean-square error)

In [106]:
def shallow_net_mse(n=55,i=5,o=2):
    # create two dense layers net
    # default 55 neurons, input 5, output 2
    # Using PReLU
    net = Sequential()
    net.add(Dense(n, activation='selu', input_shape=(i,)))
    net.add(Dense(n, activation='selu', input_shape=(i,)))
    net.add(Dense(2, activation='softmax'))
    # Compile net
    net.compile(loss='mean_squared_error', optimizer=SGD(lr=0.01), metrics=['accuracy'])
    return net

In [107]:
nnm=shallow_net_mse()
nnm.fit(X_train, y_train, batch_size=128, epochs=99, verbose=1, validation_data=(X_test, y_test))

Train on 200000 samples, validate on 200000 samples
Epoch 1/99
Epoch 2/99
Epoch 3/99
Epoch 4/99
Epoch 5/99
Epoch 6/99
Epoch 7/99
Epoch 8/99
Epoch 9/99
Epoch 10/99
Epoch 11/99
Epoch 12/99
Epoch 13/99
Epoch 14/99
Epoch 15/99
Epoch 16/99
Epoch 17/99
Epoch 18/99
Epoch 19/99
Epoch 20/99
Epoch 21/99
Epoch 22/99
Epoch 23/99
Epoch 24/99
Epoch 25/99
Epoch 26/99
Epoch 27/99
Epoch 28/99
Epoch 29/99
Epoch 30/99
Epoch 31/99
Epoch 32/99
Epoch 33/99
Epoch 34/99
Epoch 35/99
Epoch 36/99
Epoch 37/99
Epoch 38/99
Epoch 39/99
Epoch 40/99
Epoch 41/99
Epoch 42/99
Epoch 43/99
Epoch 44/99
Epoch 45/99
Epoch 46/99
Epoch 47/99
Epoch 48/99
Epoch 49/99
Epoch 50/99
Epoch 51/99
Epoch 52/99
Epoch 53/99
Epoch 54/99
Epoch 55/99
Epoch 56/99
Epoch 57/99
Epoch 58/99
Epoch 59/99
Epoch 60/99
Epoch 61/99
Epoch 62/99
Epoch 63/99
Epoch 64/99
Epoch 65/99
Epoch 66/99
Epoch 67/99
Epoch 68/99
Epoch 69/99
Epoch 70/99
Epoch 71/99
Epoch 72/99
Epoch 73/99
Epoch 74/99
Epoch 75/99
Epoch 76/99
Epoch 77/99
Epoch 78/99
Epoch 79/99
Epoch 80/

<keras.callbacks.History at 0x1a2ead0f28>

In [108]:
nnm.evaluate(X_test, y_test)



[0.99822, 0.00178]

#### Hinge

In [109]:
def shallow_net_hinge(n=55,i=5,o=2):
    # create two dense layers net
    # default 55 neurons, input 5, output 2
    # Using PReLU
    net = Sequential()
    net.add(Dense(n, activation='selu', input_shape=(i,)))
    net.add(Dense(n, activation='selu', input_shape=(i,)))
    net.add(Dense(2, activation='softmax'))
    # Compile net
    net.compile(loss='hinge', optimizer=SGD(lr=0.01), metrics=['accuracy'])
    return net

In [110]:
nnh=shallow_net_hinge()
nnh.fit(X_train, y_train, batch_size=128, epochs=99, verbose=1, validation_data=(X_test, y_test))

Train on 200000 samples, validate on 200000 samples
Epoch 1/99
Epoch 2/99
Epoch 3/99
Epoch 4/99
Epoch 5/99
Epoch 6/99
Epoch 7/99
Epoch 8/99
Epoch 9/99
Epoch 10/99
Epoch 11/99
Epoch 12/99
Epoch 13/99
Epoch 14/99
Epoch 15/99
Epoch 16/99
Epoch 17/99
Epoch 18/99
Epoch 19/99
Epoch 20/99
Epoch 21/99
Epoch 22/99
Epoch 23/99
Epoch 24/99
Epoch 25/99
Epoch 26/99
Epoch 27/99
Epoch 28/99
Epoch 29/99
Epoch 30/99
Epoch 31/99
Epoch 32/99
Epoch 33/99
Epoch 34/99
Epoch 35/99
Epoch 36/99
Epoch 37/99
Epoch 38/99
Epoch 39/99
Epoch 40/99
Epoch 41/99
Epoch 42/99
Epoch 43/99
Epoch 44/99
Epoch 45/99
Epoch 46/99
Epoch 47/99
Epoch 48/99
Epoch 49/99
Epoch 50/99
Epoch 51/99
Epoch 52/99
Epoch 53/99
Epoch 54/99
Epoch 55/99
Epoch 56/99
Epoch 57/99
Epoch 58/99
Epoch 59/99
Epoch 60/99
Epoch 61/99
Epoch 62/99
Epoch 63/99
Epoch 64/99
Epoch 65/99
Epoch 66/99
Epoch 67/99
Epoch 68/99
Epoch 69/99
Epoch 70/99
Epoch 71/99
Epoch 72/99
Epoch 73/99
Epoch 74/99
Epoch 75/99
Epoch 76/99
Epoch 77/99
Epoch 78/99
Epoch 79/99
Epoch 80/

<keras.callbacks.History at 0x1a2f0ef160>

In [111]:
nnh.evaluate(X_test, y_test)



[0.500890000038147, 0.99822]

#### Cosine Proximity

In [112]:
def shallow_net_cosine(n=55,i=5,o=2):
    # create two dense layers net
    # default 55 neurons, input 5, output 2
    # Using PReLU
    net = Sequential()
    net.add(Dense(n, activation='selu', input_shape=(i,)))
    net.add(Dense(n, activation='selu', input_shape=(i,)))
    net.add(Dense(2, activation='softmax'))
    # Compile net
    net.compile(loss='cosine_proximity', optimizer=SGD(lr=0.01), metrics=['accuracy'])
    return net

In [113]:
nnc=shallow_net_cosine()
nnc.fit(X_train, y_train, batch_size=128, epochs=99, verbose=1, validation_data=(X_test, y_test))

Train on 200000 samples, validate on 200000 samples
Epoch 1/99
Epoch 2/99
Epoch 3/99
Epoch 4/99
Epoch 5/99
Epoch 6/99
Epoch 7/99
Epoch 8/99
Epoch 9/99
Epoch 10/99
Epoch 11/99
Epoch 12/99
Epoch 13/99
Epoch 14/99
Epoch 15/99
Epoch 16/99
Epoch 17/99
Epoch 18/99
Epoch 19/99
Epoch 20/99
Epoch 21/99
Epoch 22/99
Epoch 23/99
Epoch 24/99
Epoch 25/99
Epoch 26/99
Epoch 27/99
Epoch 28/99
Epoch 29/99
Epoch 30/99
Epoch 31/99
Epoch 32/99
Epoch 33/99
Epoch 34/99
Epoch 35/99
Epoch 36/99
Epoch 37/99
Epoch 38/99
Epoch 39/99
Epoch 40/99
Epoch 41/99
Epoch 42/99
Epoch 43/99
Epoch 44/99
Epoch 45/99
Epoch 46/99
Epoch 47/99
Epoch 48/99
Epoch 49/99
Epoch 50/99
Epoch 51/99
Epoch 52/99
Epoch 53/99
Epoch 54/99
Epoch 55/99
Epoch 56/99
Epoch 57/99
Epoch 58/99
Epoch 59/99
Epoch 60/99
Epoch 61/99
Epoch 62/99
Epoch 63/99
Epoch 64/99
Epoch 65/99
Epoch 66/99
Epoch 67/99
Epoch 68/99
Epoch 69/99
Epoch 70/99
Epoch 71/99
Epoch 72/99
Epoch 73/99
Epoch 74/99
Epoch 75/99
Epoch 76/99
Epoch 77/99
Epoch 78/99
Epoch 79/99
Epoch 80/

<keras.callbacks.History at 0x1a2f6ee358>

In [114]:
nnc.evaluate(X_test, y_test)



[-0.9982198807907104, 0.99822]

#### Kullback–Leibler divergence

In [115]:
def shallow_net_kullback(n=55,i=5,o=2):
    # create two dense layers net
    # default 55 neurons, input 5, output 2
    # Using PReLU
    net = Sequential()
    net.add(Dense(n, activation='selu', input_shape=(i,)))
    net.add(Dense(n, activation='selu', input_shape=(i,)))
    net.add(Dense(2, activation='softmax'))
    # Compile net
    net.compile(loss='kullback_leibler_divergence', optimizer=SGD(lr=0.01), metrics=['accuracy'])
    return net

In [116]:
nnk=shallow_net_kullback()
nnk.fit(X_train, y_train, batch_size=128, epochs=99, verbose=1, validation_data=(X_test, y_test))

Train on 200000 samples, validate on 200000 samples
Epoch 1/99
Epoch 2/99
Epoch 3/99
Epoch 4/99
Epoch 5/99
Epoch 6/99
Epoch 7/99
Epoch 8/99
Epoch 9/99
Epoch 10/99
Epoch 11/99
Epoch 12/99
Epoch 13/99
Epoch 14/99
Epoch 15/99
Epoch 16/99
Epoch 17/99
Epoch 18/99
Epoch 19/99
Epoch 20/99
Epoch 21/99
Epoch 22/99
Epoch 23/99
Epoch 24/99
Epoch 25/99
Epoch 26/99
Epoch 27/99
Epoch 28/99
Epoch 29/99
Epoch 30/99
Epoch 31/99
Epoch 32/99
Epoch 33/99
Epoch 34/99
Epoch 35/99
Epoch 36/99
Epoch 37/99
Epoch 38/99
Epoch 39/99
Epoch 40/99
Epoch 41/99
Epoch 42/99
Epoch 43/99
Epoch 44/99
Epoch 45/99
Epoch 46/99
Epoch 47/99
Epoch 48/99
Epoch 49/99
Epoch 50/99
Epoch 51/99
Epoch 52/99
Epoch 53/99
Epoch 54/99
Epoch 55/99
Epoch 56/99
Epoch 57/99
Epoch 58/99
Epoch 59/99
Epoch 60/99
Epoch 61/99
Epoch 62/99
Epoch 63/99
Epoch 64/99
Epoch 65/99
Epoch 66/99
Epoch 67/99
Epoch 68/99
Epoch 69/99
Epoch 70/99
Epoch 71/99
Epoch 72/99
Epoch 73/99
Epoch 74/99
Epoch 75/99
Epoch 76/99
Epoch 77/99
Epoch 78/99
Epoch 79/99
Epoch 80/

<keras.callbacks.History at 0x1a2fc63470>

In [117]:
nnk.evaluate(X_test, y_test)



[0.0286902064037323, 0.99822]

### QUESTIONS:
1. Change the cost function. How does it affect the accuracy?
   * Quadratic cost (mean-square error): Test accuracy: 0.00178
   * hinge_loss: Test accuracy: 0.99822
   * Hinge_loss has the higher accuracy than Quadratic cost
2. How does it affect how quickly the network plateaus?
   * They varys slightly. Only less than 3us can be detected, but Quadratic cost (mean-square error) is a little more quicky than hinge_loss.

# Part D - Epochs

#### epoch:20

In [118]:
def shallow_net_mse_20(n=55,i=5,o=2):
    # create two dense layers net
    # default 55 neurons, input 5, output 2
    # Using PReLU
    net = Sequential()
    net.add(Dense(n, activation='selu', input_shape=(i,)))
    net.add(Dense(n, activation='selu', input_shape=(i,)))
    net.add(Dense(2, activation='softmax'))
    # Compile net
    net.compile(loss='mean_squared_error', optimizer=SGD(lr=0.01), metrics=['accuracy'])
    return net

In [119]:
nnm20=shallow_net_mse_20()
nnm20.fit(X_train, y_train, batch_size=128, epochs=20, verbose=1, validation_data=(X_test, y_test))

Train on 200000 samples, validate on 200000 samples
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<keras.callbacks.History at 0x1a2fd39908>

In [120]:
nnm20.evaluate(X_test, y_test)



[0.99822, 0.00178]

#### epochs:40

In [121]:
def shallow_net_mse_40(n=55,i=5,o=2):
    # create two dense layers net
    # default 55 neurons, input 5, output 2
    # Using PReLU
    net = Sequential()
    net.add(Dense(n, activation='selu', input_shape=(i,)))
    net.add(Dense(n, activation='selu', input_shape=(i,)))
    net.add(Dense(2, activation='softmax'))
    # Compile net
    net.compile(loss='mean_squared_error', optimizer=SGD(lr=0.01), metrics=['accuracy'])
    return net

In [122]:
nnm40=shallow_net_mse_40()
nnm40.fit(X_train, y_train, batch_size=128, epochs=40, verbose=1, validation_data=(X_test, y_test))

Train on 200000 samples, validate on 200000 samples
Epoch 1/40
Epoch 2/40
Epoch 3/40
Epoch 4/40
Epoch 5/40
Epoch 6/40
Epoch 7/40
Epoch 8/40
Epoch 9/40
Epoch 10/40
Epoch 11/40
Epoch 12/40
Epoch 13/40
Epoch 14/40
Epoch 15/40
Epoch 16/40
Epoch 17/40
Epoch 18/40
Epoch 19/40
Epoch 20/40
Epoch 21/40
Epoch 22/40
Epoch 23/40
Epoch 24/40
Epoch 25/40
Epoch 26/40
Epoch 27/40
Epoch 28/40
Epoch 29/40
Epoch 30/40
Epoch 31/40
Epoch 32/40
Epoch 33/40
Epoch 34/40
Epoch 35/40
Epoch 36/40
Epoch 37/40
Epoch 38/40
Epoch 39/40
Epoch 40/40


<keras.callbacks.History at 0x1a2ffbf4e0>

In [123]:
nnm40.evaluate(X_test, y_test)



[0.00178, 0.99822]

#### epochs:80

In [124]:
def shallow_net_mse_80(n=55,i=5,o=2):
    # create two dense layers net
    # default 55 neurons, input 5, output 2
    # Using PReLU
    net = Sequential()
    net.add(Dense(n, activation='selu', input_shape=(i,)))
    net.add(Dense(n, activation='selu', input_shape=(i,)))
    net.add(Dense(2, activation='softmax'))
    # Compile net
    net.compile(loss='mean_squared_error', optimizer=SGD(lr=0.01), metrics=['accuracy'])
    return net

In [125]:
nnm80=shallow_net_mse_80()
nnm80.fit(X_train, y_train, batch_size=128, epochs=80, verbose=1, validation_data=(X_test, y_test))

Train on 200000 samples, validate on 200000 samples
Epoch 1/80
Epoch 2/80
Epoch 3/80
Epoch 4/80
Epoch 5/80
Epoch 6/80
Epoch 7/80
Epoch 8/80
Epoch 9/80
Epoch 10/80
Epoch 11/80
Epoch 12/80
Epoch 13/80
Epoch 14/80
Epoch 15/80
Epoch 16/80
Epoch 17/80
Epoch 18/80
Epoch 19/80
Epoch 20/80
Epoch 21/80
Epoch 22/80
Epoch 23/80
Epoch 24/80
Epoch 25/80
Epoch 26/80
Epoch 27/80
Epoch 28/80
Epoch 29/80
Epoch 30/80
Epoch 31/80
Epoch 32/80
Epoch 33/80
Epoch 34/80
Epoch 35/80
Epoch 36/80
Epoch 37/80
Epoch 38/80
Epoch 39/80
Epoch 40/80
Epoch 41/80
Epoch 42/80
Epoch 43/80
Epoch 44/80
Epoch 45/80
Epoch 46/80
Epoch 47/80
Epoch 48/80
Epoch 49/80
Epoch 50/80
Epoch 51/80
Epoch 52/80
Epoch 53/80
Epoch 54/80
Epoch 55/80
Epoch 56/80
Epoch 57/80
Epoch 58/80
Epoch 59/80
Epoch 60/80
Epoch 61/80
Epoch 62/80
Epoch 63/80
Epoch 64/80
Epoch 65/80
Epoch 66/80
Epoch 67/80
Epoch 68/80
Epoch 69/80
Epoch 70/80
Epoch 71/80
Epoch 72/80
Epoch 73/80
Epoch 74/80
Epoch 75/80
Epoch 76/80
Epoch 77/80
Epoch 78/80
Epoch 79/80
Epoch 80/

<keras.callbacks.History at 0x1a2fc582b0>

In [126]:
nnm80.evaluate(X_test, y_test)



[0.0018199999666246838, 0.99818]

#### epochs:99

In [127]:
def shallow_net_mse_99(n=55,i=5,o=2):
    # create two dense layers net
    # default 55 neurons, input 5, output 2
    # Using PReLU
    net = Sequential()
    net.add(Dense(n, activation='selu', input_shape=(i,)))
    net.add(Dense(n, activation='selu', input_shape=(i,)))
    net.add(Dense(2, activation='softmax'))
    # Compile net
    net.compile(loss='mean_squared_error', optimizer=SGD(lr=0.01), metrics=['accuracy'])
    return net

In [128]:
nnm99=shallow_net_mse_99()
nnm99.fit(X_train, y_train, batch_size=128, epochs=99, verbose=1, validation_data=(X_test, y_test))

Train on 200000 samples, validate on 200000 samples
Epoch 1/99
Epoch 2/99
Epoch 3/99
Epoch 4/99
Epoch 5/99
Epoch 6/99
Epoch 7/99
Epoch 8/99
Epoch 9/99
Epoch 10/99
Epoch 11/99
Epoch 12/99
Epoch 13/99
Epoch 14/99
Epoch 15/99
Epoch 16/99
Epoch 17/99
Epoch 18/99
Epoch 19/99
Epoch 20/99
Epoch 21/99
Epoch 22/99
Epoch 23/99
Epoch 24/99
Epoch 25/99
Epoch 26/99
Epoch 27/99
Epoch 28/99
Epoch 29/99
Epoch 30/99
Epoch 31/99
Epoch 32/99
Epoch 33/99
Epoch 34/99
Epoch 35/99
Epoch 36/99
Epoch 37/99
Epoch 38/99
Epoch 39/99
Epoch 40/99
Epoch 41/99
Epoch 42/99
Epoch 43/99
Epoch 44/99
Epoch 45/99
Epoch 46/99
Epoch 47/99
Epoch 48/99
Epoch 49/99
Epoch 50/99
Epoch 51/99
Epoch 52/99
Epoch 53/99
Epoch 54/99
Epoch 55/99
Epoch 56/99
Epoch 57/99
Epoch 58/99
Epoch 59/99
Epoch 60/99
Epoch 61/99
Epoch 62/99
Epoch 63/99
Epoch 64/99
Epoch 65/99
Epoch 66/99
Epoch 67/99
Epoch 68/99
Epoch 69/99
Epoch 70/99
Epoch 71/99
Epoch 72/99
Epoch 73/99
Epoch 74/99
Epoch 75/99
Epoch 76/99
Epoch 77/99
Epoch 78/99
Epoch 79/99
Epoch 80/

<keras.callbacks.History at 0x1a30a73c50>

In [129]:
nnm99.evaluate(X_test, y_test)



[0.0017800061932637453, 0.99822]

### QUESTIONS:
1. Change the number of epochs initialization. How does it affect the accuracy?
   * epochs = 20: Test accuracy: 0.00178
   * epochs = 40: Test accuracy: 0.99822
   * epochs = 80: Test accuracy: 0.99818
   * epochs = 99: Test accuracy: 0.99822
   * the more number of epochs initialization, the higher accuracy of the test and it will reach the top and stay stable later.
2. How does it affect how quickly the network plateaus?
   * the more number of epochs initialization, the more slowly of network plateaus.

# Part E - Gradient Estimation

#### Stochastic Gradient Descent

In [130]:
def shallow_net_mse_sgd(n=55,i=5,o=2):
    # create two dense layers net
    # default 55 neurons, input 5, output 2
    # Using PReLU
    net = Sequential()
    net.add(Dense(n, activation='selu', input_shape=(i,)))
    net.add(Dense(n, activation='selu', input_shape=(i,)))
    net.add(Dense(2, activation='softmax'))
    # Compile net
    net.compile(loss='mean_squared_error', optimizer=SGD(lr=0.01), metrics=['accuracy'])
    return net

In [131]:
nnmsgd=shallow_net_mse_sgd()
nnmsgd.fit(X_train, y_train, batch_size=128, epochs=99, verbose=1, validation_data=(X_test, y_test))

Train on 200000 samples, validate on 200000 samples
Epoch 1/99
Epoch 2/99
Epoch 3/99
Epoch 4/99
Epoch 5/99
Epoch 6/99
Epoch 7/99
Epoch 8/99
Epoch 9/99
Epoch 10/99
Epoch 11/99
Epoch 12/99
Epoch 13/99
Epoch 14/99
Epoch 15/99
Epoch 16/99
Epoch 17/99
Epoch 18/99
Epoch 19/99
Epoch 20/99
Epoch 21/99
Epoch 22/99
Epoch 23/99
Epoch 24/99
Epoch 25/99
Epoch 26/99
Epoch 27/99
Epoch 28/99
Epoch 29/99
Epoch 30/99
Epoch 31/99
Epoch 32/99
Epoch 33/99
Epoch 34/99
Epoch 35/99
Epoch 36/99
Epoch 37/99
Epoch 38/99
Epoch 39/99
Epoch 40/99
Epoch 41/99
Epoch 42/99
Epoch 43/99
Epoch 44/99
Epoch 45/99
Epoch 46/99
Epoch 47/99
Epoch 48/99
Epoch 49/99
Epoch 50/99
Epoch 51/99
Epoch 52/99
Epoch 53/99
Epoch 54/99
Epoch 55/99
Epoch 56/99
Epoch 57/99
Epoch 58/99
Epoch 59/99
Epoch 60/99
Epoch 61/99
Epoch 62/99
Epoch 63/99
Epoch 64/99
Epoch 65/99
Epoch 66/99
Epoch 67/99
Epoch 68/99
Epoch 69/99
Epoch 70/99
Epoch 71/99
Epoch 72/99
Epoch 73/99
Epoch 74/99
Epoch 75/99
Epoch 76/99
Epoch 77/99
Epoch 78/99
Epoch 79/99
Epoch 80/

<keras.callbacks.History at 0x1a2fd443c8>

In [132]:
nnmsgd.evaluate(X_test, y_test)



[0.0017800000000000003, 0.99822]

#### RMSProp

In [133]:
def shallow_net_mse_rms(n=55,i=5,o=2):
    # create two dense layers net
    # default 55 neurons, input 5, output 2
    # Using PReLU
    net = Sequential()
    net.add(Dense(n, activation='selu', input_shape=(i,)))
    net.add(Dense(n, activation='selu', input_shape=(i,)))
    net.add(Dense(2, activation='softmax'))
    # Compile net
    net.compile(loss='mean_squared_error', optimizer=RMSprop(lr=0.001), metrics=['accuracy'])
    return net

In [134]:
nnmrms=shallow_net_mse_rms()
nnmrms.fit(X_train, y_train, batch_size=128, epochs=99, verbose=1, validation_data=(X_test, y_test))

Train on 200000 samples, validate on 200000 samples
Epoch 1/99
Epoch 2/99
Epoch 3/99
Epoch 4/99
Epoch 5/99
Epoch 6/99
Epoch 7/99
Epoch 8/99
Epoch 9/99
Epoch 10/99
Epoch 11/99
Epoch 12/99
Epoch 13/99
Epoch 14/99
Epoch 15/99
Epoch 16/99
Epoch 17/99
Epoch 18/99
Epoch 19/99
Epoch 20/99
Epoch 21/99
Epoch 22/99
Epoch 23/99
Epoch 24/99
Epoch 25/99
Epoch 26/99
Epoch 27/99
Epoch 28/99
Epoch 29/99
Epoch 30/99
Epoch 31/99
Epoch 32/99
Epoch 33/99
Epoch 34/99
Epoch 35/99
Epoch 36/99
Epoch 37/99
Epoch 38/99
Epoch 39/99
Epoch 40/99
Epoch 41/99
Epoch 42/99
Epoch 43/99
Epoch 44/99
Epoch 45/99
Epoch 46/99
Epoch 47/99
Epoch 48/99
Epoch 49/99
Epoch 50/99
Epoch 51/99
Epoch 52/99
Epoch 53/99
Epoch 54/99
Epoch 55/99
Epoch 56/99
Epoch 57/99
Epoch 58/99
Epoch 59/99
Epoch 60/99
Epoch 61/99
Epoch 62/99
Epoch 63/99
Epoch 64/99
Epoch 65/99
Epoch 66/99
Epoch 67/99
Epoch 68/99
Epoch 69/99
Epoch 70/99
Epoch 71/99
Epoch 72/99
Epoch 73/99
Epoch 74/99
Epoch 75/99
Epoch 76/99
Epoch 77/99
Epoch 78/99
Epoch 79/99
Epoch 80/

<keras.callbacks.History at 0x1a30e06358>

In [135]:
nnmrms.evaluate(X_test, y_test)



[0.0018500000000000057, 0.99815]

#### Adagrad

In [136]:
def shallow_net_mse_adagrad(n=55,i=5,o=2):
    # create two dense layers net
    # default 55 neurons, input 5, output 2
    # Using PReLU
    net = Sequential()
    net.add(Dense(n, activation='selu', input_shape=(i,)))
    net.add(Dense(n, activation='selu', input_shape=(i,)))
    net.add(Dense(2, activation='softmax'))
    # Compile net
    net.compile(loss='mean_squared_error', optimizer=Adagrad(lr=0.01), metrics=['accuracy'])
    return net

In [137]:
nnmadagrad=shallow_net_mse_adagrad()
nnmadagrad.fit(X_train, y_train, batch_size=128, epochs=99, verbose=1, validation_data=(X_test, y_test))

Train on 200000 samples, validate on 200000 samples
Epoch 1/99
Epoch 2/99
Epoch 3/99
Epoch 4/99
Epoch 5/99
Epoch 6/99
Epoch 7/99
Epoch 8/99
Epoch 9/99
Epoch 10/99
Epoch 11/99
Epoch 12/99
Epoch 13/99
Epoch 14/99
Epoch 15/99
Epoch 16/99
Epoch 17/99
Epoch 18/99
Epoch 19/99
Epoch 20/99
Epoch 21/99
Epoch 22/99
Epoch 23/99
Epoch 24/99
Epoch 25/99
Epoch 26/99
Epoch 27/99
Epoch 28/99
Epoch 29/99
Epoch 30/99
Epoch 31/99
Epoch 32/99
Epoch 33/99
Epoch 34/99
Epoch 35/99
Epoch 36/99
Epoch 37/99
Epoch 38/99
Epoch 39/99
Epoch 40/99
Epoch 41/99
Epoch 42/99
Epoch 43/99
Epoch 44/99
Epoch 45/99
Epoch 46/99
Epoch 47/99
Epoch 48/99
Epoch 49/99
Epoch 50/99
Epoch 51/99
Epoch 52/99
Epoch 53/99
Epoch 54/99
Epoch 55/99
Epoch 56/99
Epoch 57/99
Epoch 58/99
Epoch 59/99
Epoch 60/99
Epoch 61/99
Epoch 62/99
Epoch 63/99
Epoch 64/99
Epoch 65/99
Epoch 66/99
Epoch 67/99
Epoch 68/99
Epoch 69/99
Epoch 70/99
Epoch 71/99
Epoch 72/99
Epoch 73/99
Epoch 74/99
Epoch 75/99
Epoch 76/99
Epoch 77/99
Epoch 78/99
Epoch 79/99
Epoch 80/

<keras.callbacks.History at 0x1a31155668>

In [138]:
nnmadagrad.evaluate(X_test, y_test)



[0.0017800000000000548, 0.99822]

#### Adadelta

In [139]:
def shallow_net_mse_adadelta(n=55,i=5,o=2):
    # create two dense layers net
    # default 55 neurons, input 5, output 2
    # Using PReLU
    net = Sequential()
    net.add(Dense(n, activation='selu', input_shape=(i,)))
    net.add(Dense(n, activation='selu', input_shape=(i,)))
    net.add(Dense(2, activation='softmax'))
    # Compile net
    net.compile(loss='mean_squared_error', optimizer=Adadelta(lr=1.0), metrics=['accuracy'])
    return net

In [140]:
nnmadadelta=shallow_net_mse_adadelta()
nnmadadelta.fit(X_train, y_train, batch_size=128, epochs=99, verbose=1, validation_data=(X_test, y_test))

Train on 200000 samples, validate on 200000 samples
Epoch 1/99
Epoch 2/99
Epoch 3/99
Epoch 4/99
Epoch 5/99
Epoch 6/99
Epoch 7/99
Epoch 8/99
Epoch 9/99
Epoch 10/99
Epoch 11/99
Epoch 12/99
Epoch 13/99
Epoch 14/99
Epoch 15/99
Epoch 16/99
Epoch 17/99
Epoch 18/99
Epoch 19/99
Epoch 20/99
Epoch 21/99
Epoch 22/99
Epoch 23/99
Epoch 24/99
Epoch 25/99
Epoch 26/99
Epoch 27/99
Epoch 28/99
Epoch 29/99
Epoch 30/99
Epoch 31/99
Epoch 32/99
Epoch 33/99
Epoch 34/99
Epoch 35/99
Epoch 36/99
Epoch 37/99
Epoch 38/99
Epoch 39/99
Epoch 40/99
Epoch 41/99
Epoch 42/99
Epoch 43/99
Epoch 44/99
Epoch 45/99
Epoch 46/99
Epoch 47/99
Epoch 48/99
Epoch 49/99
Epoch 50/99
Epoch 51/99
Epoch 52/99
Epoch 53/99
Epoch 54/99
Epoch 55/99
Epoch 56/99
Epoch 57/99
Epoch 58/99
Epoch 59/99
Epoch 60/99
Epoch 61/99
Epoch 62/99
Epoch 63/99
Epoch 64/99
Epoch 65/99
Epoch 66/99
Epoch 67/99
Epoch 68/99
Epoch 69/99
Epoch 70/99
Epoch 71/99
Epoch 72/99
Epoch 73/99
Epoch 74/99
Epoch 75/99
Epoch 76/99
Epoch 77/99
Epoch 78/99
Epoch 79/99
Epoch 80/

<keras.callbacks.History at 0x1a31514470>

In [141]:
nnmadadelta.evaluate(X_test, y_test)



[0.0017800000000008445, 0.99822]

### QUESTIONS:
1. Change the gradient estimation. How does it affect the accuracy?
   * RMSProp: Test accuracy: 0.99815
   * Stochastic Gradient Descent: Test accuracy: 0.99822
   * Adagrad: Test accuracy: 0.99822
   * Adadelta: Test accuracy: 0.99822
   * Stochastic, Adagrad, Adadelta are better than RMSProp in my project.
   * With 99 epochs, they doesn't effect accuracy significantly.
2. How does it affect how quickly the network plateaus?
   * Adadelta is more quickly than others.

# Part F - Network Architecture

#### Change the number of layers

In [33]:
def shallow_net_nl(n=55,i=5,o=2):
    # create two dense layers net
    # default 55 neurons, input 5, output 2
    # Using PReLU
    net = Sequential()
    net.add(Dense(n, activation='selu', input_shape=(i,)))
    net.add(Dense(2, activation='softmax'))
    net.add(Dense(2, activation='softmax'))
    net.add(Dense(2, activation='softmax'))
    # Compile net
    net.compile(loss='mean_squared_error', optimizer=Adadelta(lr=1.0), metrics=['accuracy'])
    return net

In [34]:
nnl=shallow_net_nl()
nnl.fit(X_train, y_train, batch_size=128, epochs=99, verbose=1, validation_data=(X_test, y_test))

Train on 200000 samples, validate on 200000 samples
Epoch 1/99
Epoch 2/99
Epoch 3/99
Epoch 4/99
Epoch 5/99
Epoch 6/99
Epoch 7/99
Epoch 8/99
Epoch 9/99
Epoch 10/99
Epoch 11/99
Epoch 12/99
Epoch 13/99
Epoch 14/99
Epoch 15/99
Epoch 16/99
Epoch 17/99
Epoch 18/99
Epoch 19/99
Epoch 20/99
Epoch 21/99
Epoch 22/99
Epoch 23/99
Epoch 24/99
Epoch 25/99
Epoch 26/99
Epoch 27/99
Epoch 28/99
Epoch 29/99
Epoch 30/99
Epoch 31/99
Epoch 32/99
Epoch 33/99
Epoch 34/99
Epoch 35/99
Epoch 36/99
Epoch 37/99
Epoch 38/99
Epoch 39/99
Epoch 40/99
Epoch 41/99
Epoch 42/99
Epoch 43/99
Epoch 44/99
Epoch 45/99
Epoch 46/99
Epoch 47/99
Epoch 48/99
Epoch 49/99
Epoch 50/99
Epoch 51/99
Epoch 52/99
Epoch 53/99
Epoch 54/99
Epoch 55/99
Epoch 56/99
Epoch 57/99
Epoch 58/99
Epoch 59/99
Epoch 60/99
Epoch 61/99
Epoch 62/99
Epoch 63/99
Epoch 64/99
Epoch 65/99
Epoch 66/99
Epoch 67/99
Epoch 68/99
Epoch 69/99
Epoch 70/99
Epoch 71/99
Epoch 72/99
Epoch 73/99
Epoch 74/99
Epoch 75/99
Epoch 76/99
Epoch 77/99
Epoch 78/99
Epoch 79/99
Epoch 80/

<keras.callbacks.History at 0x2198783c8>

In [35]:
nnl.evaluate(X_test, y_test)



[0.0017768355437127684, 0.99822]

#### Change Size of each layer

In [41]:
def shallow_net_size(n=22,i=5,o=2):
    # create two dense layers net
    # default 22 neurons, input 5, output 2
    # Using PReLU
    net = Sequential()
    net.add(Dense(n, activation='selu', input_shape=(i,)))
    net.add(Dense(2, activation='softmax'))
    # Compile net
    net.compile(loss='mean_squared_error', optimizer=Adadelta(lr=1.0), metrics=['accuracy'])
    return net

In [42]:
nns=shallow_net_size()
nns.fit(X_train, y_train, batch_size=128, epochs=99, verbose=1, validation_data=(X_test, y_test))

Train on 200000 samples, validate on 200000 samples
Epoch 1/99
Epoch 2/99
Epoch 3/99
Epoch 4/99
Epoch 5/99
Epoch 6/99
Epoch 7/99
Epoch 8/99
Epoch 9/99
Epoch 10/99
Epoch 11/99
Epoch 12/99
Epoch 13/99
Epoch 14/99
Epoch 15/99
Epoch 16/99
Epoch 17/99
Epoch 18/99
Epoch 19/99
Epoch 20/99
Epoch 21/99
Epoch 22/99
Epoch 23/99
Epoch 24/99
Epoch 25/99
Epoch 26/99
Epoch 27/99
Epoch 28/99
Epoch 29/99
Epoch 30/99
Epoch 31/99
Epoch 32/99
Epoch 33/99
Epoch 34/99
Epoch 35/99
Epoch 36/99
Epoch 37/99
Epoch 38/99
Epoch 39/99
Epoch 40/99
Epoch 41/99
Epoch 42/99
Epoch 43/99
Epoch 44/99
Epoch 45/99
Epoch 46/99
Epoch 47/99
Epoch 48/99
Epoch 49/99
Epoch 50/99
Epoch 51/99
Epoch 52/99
Epoch 53/99
Epoch 54/99
Epoch 55/99
Epoch 56/99
Epoch 57/99
Epoch 58/99
Epoch 59/99
Epoch 60/99
Epoch 61/99
Epoch 62/99
Epoch 63/99
Epoch 64/99
Epoch 65/99
Epoch 66/99
Epoch 67/99
Epoch 68/99
Epoch 69/99
Epoch 70/99
Epoch 71/99
Epoch 72/99
Epoch 73/99
Epoch 74/99
Epoch 75/99
Epoch 76/99
Epoch 77/99
Epoch 78/99
Epoch 79/99
Epoch 80/

<keras.callbacks.History at 0x1a2cc4c630>

In [43]:
nns.evaluate(X_test, y_test)



[0.0017800000000038018, 0.99822]

#### Change the connection type

In [44]:
def shallow_net_type(n=55,i=5,o=2):
    # create two dense layers net
    # default 55 neurons, input 5, output 2
    # Using PReLU
    net = Sequential()
    net.add(Dense(n, activation='selu', input_shape=(i,)))
    net.add(Dense(2, activation='sigmoid'))
    # Compile net
    net.compile(loss='mean_squared_error', optimizer=Adadelta(lr=1.0), metrics=['accuracy'])
    return net

In [45]:
nnt=shallow_net_type()
nnt.fit(X_train, y_train, batch_size=128, epochs=99, verbose=1, validation_data=(X_test, y_test))

Train on 200000 samples, validate on 200000 samples
Epoch 1/99
Epoch 2/99
Epoch 3/99
Epoch 4/99
Epoch 5/99
Epoch 6/99
Epoch 7/99
Epoch 8/99
Epoch 9/99
Epoch 10/99
Epoch 11/99
Epoch 12/99
Epoch 13/99
Epoch 14/99
Epoch 15/99
Epoch 16/99
Epoch 17/99
Epoch 18/99
Epoch 19/99
Epoch 20/99
Epoch 21/99
Epoch 22/99
Epoch 23/99
Epoch 24/99
Epoch 25/99
Epoch 26/99
Epoch 27/99
Epoch 28/99
Epoch 29/99
Epoch 30/99
Epoch 31/99
Epoch 32/99
Epoch 33/99
Epoch 34/99
Epoch 35/99
Epoch 36/99
Epoch 37/99
Epoch 38/99
Epoch 39/99
Epoch 40/99
Epoch 41/99
Epoch 42/99
Epoch 43/99
Epoch 44/99
Epoch 45/99
Epoch 46/99
Epoch 47/99
Epoch 48/99
Epoch 49/99
Epoch 50/99
Epoch 51/99
Epoch 52/99
Epoch 53/99
Epoch 54/99
Epoch 55/99
Epoch 56/99
Epoch 57/99
Epoch 58/99
Epoch 59/99
Epoch 60/99
Epoch 61/99
Epoch 62/99
Epoch 63/99
Epoch 64/99
Epoch 65/99
Epoch 66/99
Epoch 67/99
Epoch 68/99
Epoch 69/99
Epoch 70/99
Epoch 71/99
Epoch 72/99
Epoch 73/99
Epoch 74/99
Epoch 75/99
Epoch 76/99
Epoch 77/99
Epoch 78/99
Epoch 79/99
Epoch 80/

<keras.callbacks.History at 0x1a2a16f3c8>

In [89]:
nnt.evaluate(X_test, y_test)



[0.0017822998186825532, 0.99822]

### QUESTIONS:
1. Change the Network Architecture. How does it affect the accuracy?
   * Number of layers: num_layers
   * Size of each layer: layer_size
   * layer_size = 55 and num_layers = 4: Test accuracy: 0.99822
   * layer_size = 22 and num_layers = 2: Test accuracy: 0.99822
   * Both are similar in my project.
2. How does it affect how quickly the network plateaus?
   * the less num_layers * lstm_size, the more faster of network plateaus.
3. Pre-trained components?
   * With Pre-trained, the model will run better.

# Part G - Network Initialization

#### Uniform

In [120]:
def shallow_net_zero(n=55,i=5,o=2):
    # create two dense layers net
    # default 55 neurons, input 5, output 2
    # Using PReLU
    net = Sequential()
    net.add(Dense(n, activation='selu', input_shape=(i,), kernel_initializer='random_uniform'))
    net.add(Dense(2, kernel_initializer='random_uniform'))
    # Compile net
    net.compile(loss='mean_squared_error', optimizer=Adadelta(lr=1.0), metrics=['accuracy'])
    return net

In [121]:
nnz=shallow_net_zero()
nnz.fit(X_train, y_train, batch_size=128, epochs=99, verbose=1, validation_data=(X_test, y_test))

Train on 200000 samples, validate on 200000 samples
Epoch 1/99
Epoch 2/99
Epoch 3/99
Epoch 4/99
Epoch 5/99
Epoch 6/99
Epoch 7/99
Epoch 8/99
Epoch 9/99
Epoch 10/99
Epoch 11/99
Epoch 12/99
Epoch 13/99
Epoch 14/99
Epoch 15/99
Epoch 16/99
Epoch 17/99
Epoch 18/99
Epoch 19/99
Epoch 20/99
Epoch 21/99
Epoch 22/99
Epoch 23/99
Epoch 24/99
Epoch 25/99
Epoch 26/99
Epoch 27/99
Epoch 28/99
Epoch 29/99
Epoch 30/99
Epoch 31/99
Epoch 32/99
Epoch 33/99
Epoch 34/99
Epoch 35/99
Epoch 36/99
Epoch 37/99
Epoch 38/99
Epoch 39/99
Epoch 40/99
Epoch 41/99
Epoch 42/99
Epoch 43/99
Epoch 44/99
Epoch 45/99
Epoch 46/99
Epoch 47/99
Epoch 48/99
Epoch 49/99
Epoch 50/99
Epoch 51/99
Epoch 52/99
Epoch 53/99
Epoch 54/99
Epoch 55/99
Epoch 56/99
Epoch 57/99
Epoch 58/99
Epoch 59/99
Epoch 60/99
Epoch 61/99
Epoch 62/99
Epoch 63/99
Epoch 64/99
Epoch 65/99
Epoch 66/99
Epoch 67/99
Epoch 68/99
Epoch 69/99
Epoch 70/99
Epoch 71/99
Epoch 72/99
Epoch 73/99
Epoch 74/99
Epoch 75/99
Epoch 76/99
Epoch 77/99
Epoch 78/99
Epoch 79/99
Epoch 80/

<keras.callbacks.History at 0x1a2e768ba8>

In [92]:
nnz.evaluate(X_test, y_test)



[0.008659059896469116, 0.99822]

#### 0

In [154]:
def shallow_net_uniform(n=55,i=5,o=2):
    # create two dense layers net
    # default 55 neurons, input 5, output 2
    # Using PReLU
    net = Sequential()
    net.add(Dense(n, activation='selu', input_shape=(i,), kernel_initializer='zeros'))
    net.add(Dense(2, kernel_initializer='zeros'))
    # Compile net
    net.compile(loss='mean_squared_error', optimizer=Adadelta(lr=1.0), metrics=['accuracy'])
    return net

In [167]:
nnf=shallow_net_uniform()
his = nnf.fit(X_train, y_train, batch_size=128, epochs=99, verbose=1, validation_data=(X_test, y_test))

Train on 200000 samples, validate on 200000 samples
Epoch 1/99
Epoch 2/99
Epoch 3/99
Epoch 4/99
Epoch 5/99
Epoch 6/99
Epoch 7/99
Epoch 8/99
Epoch 9/99
Epoch 10/99
Epoch 11/99
Epoch 12/99
Epoch 13/99
Epoch 14/99
Epoch 15/99
Epoch 16/99
Epoch 17/99
Epoch 18/99
Epoch 19/99
Epoch 20/99
Epoch 21/99
Epoch 22/99
Epoch 23/99
Epoch 24/99
Epoch 25/99
Epoch 26/99
Epoch 27/99
Epoch 28/99
Epoch 29/99
Epoch 30/99
Epoch 31/99
Epoch 32/99
Epoch 33/99
Epoch 34/99
Epoch 35/99
Epoch 36/99
Epoch 37/99
Epoch 38/99
Epoch 39/99
Epoch 40/99
Epoch 41/99
Epoch 42/99
Epoch 43/99
Epoch 44/99
Epoch 45/99
Epoch 46/99
Epoch 47/99
Epoch 48/99
Epoch 49/99
Epoch 50/99
Epoch 51/99
Epoch 52/99
Epoch 53/99
Epoch 54/99
Epoch 55/99
Epoch 56/99
Epoch 57/99
Epoch 58/99
Epoch 59/99
Epoch 60/99
Epoch 61/99
Epoch 62/99
Epoch 63/99
Epoch 64/99
Epoch 65/99
Epoch 66/99
Epoch 67/99
Epoch 68/99
Epoch 69/99
Epoch 70/99
Epoch 71/99
Epoch 72/99
Epoch 73/99
Epoch 74/99
Epoch 75/99
Epoch 76/99
Epoch 77/99
Epoch 78/99
Epoch 79/99
Epoch 80/

In [166]:
his.history

{'acc': [0.99826, 0.99826, 0.99826],
 'loss': [0.08030173456932671, 0.0017379888619989052, 0.0017385286231042655],
 'val_acc': [0.99822, 0.99822, 0.99822],
 'val_loss': [0.0017792068115263806,
  0.0017769054921933276,
  0.0017785380588233556]}

In [168]:
nnf.evaluate(X_test, y_test)



[0.001785538724353537, 0.99822]

### QUESTIONS:
1. Change the network initialization. How does it affect the accuracy?
   * Uniform: Test accuracy: 0.99822
   * 0: Test accuracy: 0.99822
   * It is similar when Network initialization change in my project.
2. How does it affect how quickly the network plateaus?
   * It is more quickly when Network initialization is than set is as 0.

# Part H - Tensorboard

In [179]:
import tensorflow as tf
with tf.summary.FileWriter(logdir='/Users/PP/Documents/7390/Assignment6_YipengHong_001228971/logs', graph=tf.get_default_graph()) as writer:
    writer.flush()
# In current folder, enter in the shell: tensorboard --logdir=logs
# And check by entering ( http://localhost:6006 ) on Chrome