# Neural Networks with Keras

In [1]:
# Set the seed value for the notebook so the results are reproducible
from numpy.random import seed
seed(42)

In [3]:
# Generate some fake data with 3 features

from sklearn.datasets import make_classification

X, y = make_classification(n_features=3, n_redundant=0, n_informative=3,
                           random_state=42, n_classes=2, n_clusters_per_class=1)

y = y.reshape(-1, 1)

print(X.shape)
print(y.shape)
X

(100, 3)
(100, 1)


array([[-0.86958066, -0.87701464,  0.442002  ],
       [-2.1527845 , -0.37230103, -0.64729718],
       [-2.11161929, -0.48086296, -0.51786597],
       [ 1.12660111, -2.70423633,  2.8518044 ],
       [-1.56110675, -0.87568159,  0.30653508],
       [-2.18180724, -0.71670684,  0.60054178],
       [ 1.74807165, -2.29941587,  1.16791411],
       [-1.1519394 ,  1.8077843 ,  1.14552284],
       [ 1.03544232, -0.26934229, -1.19390553],
       [-2.8264353 , -0.74038267,  1.53726559],
       [ 0.04512781,  0.25748089,  0.93574304],
       [ 0.93404195, -1.12574522,  0.22329785],
       [-1.86135823, -0.86125991,  0.9781879 ],
       [ 2.57105677, -1.97018772,  0.23926237],
       [-0.32417081, -0.55605608, -1.24505785],
       [ 0.91885124,  0.18895024, -0.41128972],
       [-0.27804043, -1.19890971,  0.73100285],
       [-0.24428604, -1.15939109,  2.72813921],
       [-1.14475154, -1.04293995,  2.16686541],
       [-3.24479978, -0.599969  ,  0.92297922],
       [-1.166365  , -1.08462353,  1.626

Use train_test_split to create training and testing data

In [4]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=1)

## Data Preprocessing

It is really important to scale our data before using multilayer perceptron models. 

Without scaling, it is often difficult for the training cycle to converge

In [5]:
from sklearn.preprocessing import StandardScaler

X_scaler = StandardScaler().fit(X_train)

Remember to scale both the training and testing data

In [6]:
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

One-hot encode the labels

In [7]:
from tensorflow.keras.utils import to_categorical

In [8]:
# One-hot encoding
y_train_categorical = to_categorical(y_train)
y_test_categorical = to_categorical(y_test)
y_train_categorical

array([[0., 1.],
       [1., 0.],
       [0., 1.],
       [1., 0.],
       [0., 1.],
       [1., 0.],
       [0., 1.],
       [1., 0.],
       [0., 1.],
       [1., 0.],
       [1., 0.],
       [1., 0.],
       [0., 1.],
       [1., 0.],
       [0., 1.],
       [1., 0.],
       [1., 0.],
       [1., 0.],
       [0., 1.],
       [0., 1.],
       [0., 1.],
       [0., 1.],
       [1., 0.],
       [1., 0.],
       [0., 1.],
       [0., 1.],
       [1., 0.],
       [0., 1.],
       [1., 0.],
       [1., 0.],
       [0., 1.],
       [0., 1.],
       [1., 0.],
       [0., 1.],
       [1., 0.],
       [1., 0.],
       [1., 0.],
       [0., 1.],
       [0., 1.],
       [0., 1.],
       [0., 1.],
       [0., 1.],
       [0., 1.],
       [1., 0.],
       [1., 0.],
       [1., 0.],
       [0., 1.],
       [1., 0.],
       [1., 0.],
       [0., 1.],
       [1., 0.],
       [0., 1.],
       [0., 1.],
       [0., 1.],
       [1., 0.],
       [0., 1.],
       [1., 0.],
       [0., 1.],
       [1., 0.

## Creating our Model

We must first decide what kind of model to apply to our data. 

For numerical data, we use a regressor model. 

For categorical data, we use a classifier model. 

In this example, we will use a classifier to build the following network:

![nnet.png](../Images/nnet.png)

## Defining our Model Architecture (the layers)

We first need to create a sequential model

In [9]:
from tensorflow.keras.models import Sequential

model = Sequential()

Next, we add our first layer. This layer requires you to specify both the number of inputs and the number of nodes that you want in the hidden layer.

In [10]:
from tensorflow.keras.layers import Dense
number_inputs = 3
number_hidden_nodes = 4
model.add(Dense(units=number_hidden_nodes,
                activation='relu', input_dim=number_inputs))

![first_layer](../Images/nnet_first_layer.png)

Our final layer is the output layer. Here, we need to specify the activation function (typically `softmax` for classification) and the number of classes (labels) that we are trying to predict (2 in this example).

In [11]:
number_classes = 2
model.add(Dense(units=number_classes, activation='softmax'))

![output_layer](../Images/nnet_output_layer.png)

## Model Summary

In [12]:
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense (Dense)                (None, 4)                 16        
_________________________________________________________________
dense_1 (Dense)              (None, 2)                 10        
Total params: 26
Trainable params: 26
Non-trainable params: 0
_________________________________________________________________


## Compile the Model

Now that we have our model architecture defined, we must compile the model using a loss function and optimizer. We can also specify additional training metrics such as accuracy.

In [13]:
# Use categorical crossentropy for categorical data and mean squared error for regression
# Hint: your output layer in this example is using software for logistic regression (categorical)
# If your output layer activation was `linear` then you may want to use `mse` for loss
model.compile(optimizer='adam',
              loss='categorical_crossentropy',
              metrics=['accuracy'])

## Training the Model
Finally, we train our model using our training data

Training consists of updating our weights using our optimizer and loss function. In this example, we choose 1000 iterations (loops) of training that are called epochs.

We also choose to shuffle our training data and increase the detail printed out during each training cycle.

In [14]:
# Fit (train) the model
model.fit(
    X_train_scaled,
    y_train_categorical,
    epochs=1000,
    shuffle=True,
    verbose=2
)

Train on 75 samples
Epoch 1/1000
75/75 - 0s - loss: 0.7596 - accuracy: 0.5067
Epoch 2/1000
75/75 - 0s - loss: 0.7537 - accuracy: 0.5600
Epoch 3/1000
75/75 - 0s - loss: 0.7480 - accuracy: 0.5333
Epoch 4/1000
75/75 - 0s - loss: 0.7424 - accuracy: 0.5467
Epoch 5/1000
75/75 - 0s - loss: 0.7366 - accuracy: 0.5467
Epoch 6/1000
75/75 - 0s - loss: 0.7313 - accuracy: 0.5333
Epoch 7/1000
75/75 - 0s - loss: 0.7258 - accuracy: 0.5333
Epoch 8/1000
75/75 - 0s - loss: 0.7205 - accuracy: 0.5200
Epoch 9/1000
75/75 - 0s - loss: 0.7155 - accuracy: 0.5200
Epoch 10/1000
75/75 - 0s - loss: 0.7107 - accuracy: 0.5200
Epoch 11/1000
75/75 - 0s - loss: 0.7059 - accuracy: 0.5200
Epoch 12/1000
75/75 - 0s - loss: 0.7010 - accuracy: 0.5200
Epoch 13/1000
75/75 - 0s - loss: 0.6965 - accuracy: 0.5200
Epoch 14/1000
75/75 - 0s - loss: 0.6917 - accuracy: 0.5200
Epoch 15/1000
75/75 - 0s - loss: 0.6872 - accuracy: 0.5200
Epoch 16/1000
75/75 - 0s - loss: 0.6828 - accuracy: 0.5333
Epoch 17/1000
75/75 - 0s - loss: 0.6785 - acc

Epoch 140/1000
75/75 - 0s - loss: 0.3611 - accuracy: 0.9200
Epoch 141/1000
75/75 - 0s - loss: 0.3590 - accuracy: 0.9200
Epoch 142/1000
75/75 - 0s - loss: 0.3570 - accuracy: 0.9200
Epoch 143/1000
75/75 - 0s - loss: 0.3549 - accuracy: 0.9200
Epoch 144/1000
75/75 - 0s - loss: 0.3528 - accuracy: 0.9200
Epoch 145/1000
75/75 - 0s - loss: 0.3508 - accuracy: 0.9200
Epoch 146/1000
75/75 - 0s - loss: 0.3488 - accuracy: 0.9200
Epoch 147/1000
75/75 - 0s - loss: 0.3467 - accuracy: 0.9200
Epoch 148/1000
75/75 - 0s - loss: 0.3448 - accuracy: 0.9200
Epoch 149/1000
75/75 - 0s - loss: 0.3427 - accuracy: 0.9200
Epoch 150/1000
75/75 - 0s - loss: 0.3407 - accuracy: 0.9200
Epoch 151/1000
75/75 - 0s - loss: 0.3387 - accuracy: 0.9200
Epoch 152/1000
75/75 - 0s - loss: 0.3369 - accuracy: 0.9333
Epoch 153/1000
75/75 - 0s - loss: 0.3349 - accuracy: 0.9333
Epoch 154/1000
75/75 - 0s - loss: 0.3330 - accuracy: 0.9333
Epoch 155/1000
75/75 - 0s - loss: 0.3311 - accuracy: 0.9333
Epoch 156/1000
75/75 - 0s - loss: 0.3293

Epoch 277/1000
75/75 - 0s - loss: 0.1739 - accuracy: 0.9467
Epoch 278/1000
75/75 - 0s - loss: 0.1731 - accuracy: 0.9467
Epoch 279/1000
75/75 - 0s - loss: 0.1724 - accuracy: 0.9467
Epoch 280/1000
75/75 - 0s - loss: 0.1716 - accuracy: 0.9467
Epoch 281/1000
75/75 - 0s - loss: 0.1709 - accuracy: 0.9600
Epoch 282/1000
75/75 - 0s - loss: 0.1701 - accuracy: 0.9600
Epoch 283/1000
75/75 - 0s - loss: 0.1694 - accuracy: 0.9600
Epoch 284/1000
75/75 - 0s - loss: 0.1687 - accuracy: 0.9600
Epoch 285/1000
75/75 - 0s - loss: 0.1679 - accuracy: 0.9600
Epoch 286/1000
75/75 - 0s - loss: 0.1672 - accuracy: 0.9600
Epoch 287/1000
75/75 - 0s - loss: 0.1665 - accuracy: 0.9600
Epoch 288/1000
75/75 - 0s - loss: 0.1657 - accuracy: 0.9600
Epoch 289/1000
75/75 - 0s - loss: 0.1650 - accuracy: 0.9600
Epoch 290/1000
75/75 - 0s - loss: 0.1643 - accuracy: 0.9600
Epoch 291/1000
75/75 - 0s - loss: 0.1636 - accuracy: 0.9600
Epoch 292/1000
75/75 - 0s - loss: 0.1629 - accuracy: 0.9600
Epoch 293/1000
75/75 - 0s - loss: 0.1622

Epoch 414/1000
75/75 - 0s - loss: 0.0984 - accuracy: 0.9867
Epoch 415/1000
75/75 - 0s - loss: 0.0981 - accuracy: 0.9867
Epoch 416/1000
75/75 - 0s - loss: 0.0977 - accuracy: 0.9867
Epoch 417/1000
75/75 - 0s - loss: 0.0974 - accuracy: 0.9867
Epoch 418/1000
75/75 - 0s - loss: 0.0970 - accuracy: 0.9867
Epoch 419/1000
75/75 - 0s - loss: 0.0966 - accuracy: 0.9867
Epoch 420/1000
75/75 - 0s - loss: 0.0963 - accuracy: 0.9867
Epoch 421/1000
75/75 - 0s - loss: 0.0959 - accuracy: 0.9867
Epoch 422/1000
75/75 - 0s - loss: 0.0957 - accuracy: 0.9867
Epoch 423/1000
75/75 - 0s - loss: 0.0953 - accuracy: 0.9867
Epoch 424/1000
75/75 - 0s - loss: 0.0950 - accuracy: 0.9867
Epoch 425/1000
75/75 - 0s - loss: 0.0947 - accuracy: 0.9867
Epoch 426/1000
75/75 - 0s - loss: 0.0944 - accuracy: 0.9867
Epoch 427/1000
75/75 - 0s - loss: 0.0941 - accuracy: 0.9867
Epoch 428/1000
75/75 - 0s - loss: 0.0938 - accuracy: 0.9867
Epoch 429/1000
75/75 - 0s - loss: 0.0935 - accuracy: 0.9867
Epoch 430/1000
75/75 - 0s - loss: 0.0932

Epoch 551/1000
75/75 - 0s - loss: 0.0656 - accuracy: 0.9867
Epoch 552/1000
75/75 - 0s - loss: 0.0655 - accuracy: 0.9867
Epoch 553/1000
75/75 - 0s - loss: 0.0654 - accuracy: 0.9867
Epoch 554/1000
75/75 - 0s - loss: 0.0652 - accuracy: 0.9867
Epoch 555/1000
75/75 - 0s - loss: 0.0651 - accuracy: 0.9733
Epoch 556/1000
75/75 - 0s - loss: 0.0649 - accuracy: 0.9733
Epoch 557/1000
75/75 - 0s - loss: 0.0648 - accuracy: 0.9733
Epoch 558/1000
75/75 - 0s - loss: 0.0646 - accuracy: 0.9733
Epoch 559/1000
75/75 - 0s - loss: 0.0645 - accuracy: 0.9733
Epoch 560/1000
75/75 - 0s - loss: 0.0644 - accuracy: 0.9733
Epoch 561/1000
75/75 - 0s - loss: 0.0643 - accuracy: 0.9867
Epoch 562/1000
75/75 - 0s - loss: 0.0641 - accuracy: 0.9867
Epoch 563/1000
75/75 - 0s - loss: 0.0640 - accuracy: 0.9867
Epoch 564/1000
75/75 - 0s - loss: 0.0638 - accuracy: 0.9867
Epoch 565/1000
75/75 - 0s - loss: 0.0637 - accuracy: 0.9867
Epoch 566/1000
75/75 - 0s - loss: 0.0636 - accuracy: 0.9867
Epoch 567/1000
75/75 - 0s - loss: 0.0634

Epoch 688/1000
75/75 - 0s - loss: 0.0475 - accuracy: 0.9733
Epoch 689/1000
75/75 - 0s - loss: 0.0474 - accuracy: 0.9733
Epoch 690/1000
75/75 - 0s - loss: 0.0473 - accuracy: 0.9733
Epoch 691/1000
75/75 - 0s - loss: 0.0472 - accuracy: 0.9733
Epoch 692/1000
75/75 - 0s - loss: 0.0471 - accuracy: 0.9733
Epoch 693/1000
75/75 - 0s - loss: 0.0470 - accuracy: 0.9733
Epoch 694/1000
75/75 - 0s - loss: 0.0470 - accuracy: 0.9733
Epoch 695/1000
75/75 - 0s - loss: 0.0469 - accuracy: 0.9733
Epoch 696/1000
75/75 - 0s - loss: 0.0468 - accuracy: 0.9733
Epoch 697/1000
75/75 - 0s - loss: 0.0467 - accuracy: 0.9733
Epoch 698/1000
75/75 - 0s - loss: 0.0466 - accuracy: 0.9733
Epoch 699/1000
75/75 - 0s - loss: 0.0465 - accuracy: 0.9733
Epoch 700/1000
75/75 - 0s - loss: 0.0465 - accuracy: 0.9733
Epoch 701/1000
75/75 - 0s - loss: 0.0463 - accuracy: 0.9733
Epoch 702/1000
75/75 - 0s - loss: 0.0462 - accuracy: 0.9733
Epoch 703/1000
75/75 - 0s - loss: 0.0461 - accuracy: 0.9733
Epoch 704/1000
75/75 - 0s - loss: 0.0461

Epoch 825/1000
75/75 - 0s - loss: 0.0386 - accuracy: 0.9733
Epoch 826/1000
75/75 - 0s - loss: 0.0385 - accuracy: 0.9733
Epoch 827/1000
75/75 - 0s - loss: 0.0385 - accuracy: 0.9733
Epoch 828/1000
75/75 - 0s - loss: 0.0384 - accuracy: 0.9733
Epoch 829/1000
75/75 - 0s - loss: 0.0384 - accuracy: 0.9733
Epoch 830/1000
75/75 - 0s - loss: 0.0383 - accuracy: 0.9733
Epoch 831/1000
75/75 - 0s - loss: 0.0382 - accuracy: 0.9733
Epoch 832/1000
75/75 - 0s - loss: 0.0382 - accuracy: 0.9733
Epoch 833/1000
75/75 - 0s - loss: 0.0383 - accuracy: 0.9733
Epoch 834/1000
75/75 - 0s - loss: 0.0382 - accuracy: 0.9733
Epoch 835/1000
75/75 - 0s - loss: 0.0382 - accuracy: 0.9733
Epoch 836/1000
75/75 - 0s - loss: 0.0381 - accuracy: 0.9733
Epoch 837/1000
75/75 - 0s - loss: 0.0381 - accuracy: 0.9733
Epoch 838/1000
75/75 - 0s - loss: 0.0380 - accuracy: 0.9733
Epoch 839/1000
75/75 - 0s - loss: 0.0380 - accuracy: 0.9733
Epoch 840/1000
75/75 - 0s - loss: 0.0379 - accuracy: 0.9733
Epoch 841/1000
75/75 - 0s - loss: 0.0379

Epoch 962/1000
75/75 - 0s - loss: 0.0338 - accuracy: 0.9867
Epoch 963/1000
75/75 - 0s - loss: 0.0337 - accuracy: 0.9867
Epoch 964/1000
75/75 - 0s - loss: 0.0338 - accuracy: 0.9733
Epoch 965/1000
75/75 - 0s - loss: 0.0337 - accuracy: 0.9733
Epoch 966/1000
75/75 - 0s - loss: 0.0336 - accuracy: 0.9733
Epoch 967/1000
75/75 - 0s - loss: 0.0336 - accuracy: 0.9733
Epoch 968/1000
75/75 - 0s - loss: 0.0335 - accuracy: 0.9733
Epoch 969/1000
75/75 - 0s - loss: 0.0335 - accuracy: 0.9733
Epoch 970/1000
75/75 - 0s - loss: 0.0335 - accuracy: 0.9733
Epoch 971/1000
75/75 - 0s - loss: 0.0335 - accuracy: 0.9733
Epoch 972/1000
75/75 - 0s - loss: 0.0334 - accuracy: 0.9733
Epoch 973/1000
75/75 - 0s - loss: 0.0334 - accuracy: 0.9733
Epoch 974/1000
75/75 - 0s - loss: 0.0334 - accuracy: 0.9733
Epoch 975/1000
75/75 - 0s - loss: 0.0334 - accuracy: 0.9733
Epoch 976/1000
75/75 - 0s - loss: 0.0334 - accuracy: 0.9733
Epoch 977/1000
75/75 - 0s - loss: 0.0334 - accuracy: 0.9733
Epoch 978/1000
75/75 - 0s - loss: 0.0333

<tensorflow.python.keras.callbacks.History at 0x225e68c0f60>

## Quantifying the Model
We use our testing data to validate our model. This is how we determine the validity of our model (i.e. the ability to predict new and previously unseen data points)

In [15]:
# Evaluate the model using the testing data
model_loss, model_accuracy = model.evaluate(
    X_test_scaled, y_test_categorical, verbose=2)
print(f"Loss: {model_loss}, Accuracy: {model_accuracy}")

25/25 - 0s - loss: 0.5067 - accuracy: 0.8400
Loss: 0.5067323446273804, Accuracy: 0.8399999737739563


## Making Predictions with new data

We can use our trained model to make predictions using `model.predict`

In [16]:
import numpy as np
new_data = np.array([[0.2, 0.3, 0.4]])
print(f"Predicted class: {model.predict_classes(new_data)}")

Predicted class: [1]
