# Neural Networks with Keras

In [1]:
# Load libraries
import numpy as np
import pandas as pd
import tensorflow.keras as keras
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, Activation
from tensorflow.keras.optimizers import SGD

# The core data structure of Keras is a model, a way to organize layers
# Here we are using a Sequential model architecture
model = Sequential()

In [2]:
# Build layers with Dense(), followed by Activation()

# 2 hidden layers with 32 nodes
# Activation is set to relu
# 1 output layer with 10 categories
# Softmax function used to calculate 0 to 1 probabilities for each of 10 categories

model = Sequential([
    Dense(32, input_shape=(784,)),
    Activation('relu'),
    Dense(32),
    Activation('relu'),
    Dense(10),
    Activation('softmax'),
])

model.summary()

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [3]:
# Model with 3 hidden layers

model = Sequential([
    Dense(32, input_shape=(784,)),
    Activation('relu'),
    Dense(32),
    Activation('relu'),
    Dense(32),
    Activation('relu'),
    Dense(10),
    Activation('softmax'),
])

model.summary()

In [4]:
# Or build a model in steps using .add():
model = Sequential()
model.add(Dense(units=64, activation='relu', input_dim=784))
model.add(Dense(units=64, activation='relu'))
model.add(Dense(units=10, activation='softmax'))

model.summary()

In [5]:
# Once your model looks good, configure its learning process with .compile():

model.compile(loss='categorical_crossentropy',
              optimizer='sgd',
              metrics=['accuracy'])

**Loss can be set to:**
- `binary_crossentropy` for binary categories
- `categorical_crossentropy` for multiple categories
- `mse` (mean squared error) for regression

**Optimizer can be set to:**
- `sgd` (stochastic gradient descent)
- `adam`
- A variety of [others](https://keras.io/api/optimizers/) are available.

## Training a keras model

Keras models are trained on Numpy arrays of input data and labels. For training a model, you will typically use the  fit function.

In [6]:
# Generate dummy data
data = np.random.random((1000, 100)) # X data with 100 features
labels = np.random.randint(2, size=(1000, 1)) # y data with binary classification

# Set up the model
model = Sequential()
model.add(Dense(32, activation='relu', input_dim=100))
model.add(Dense(1, activation='sigmoid'))
model.compile(optimizer='sgd',
              loss='binary_crossentropy',
              metrics=['accuracy']) # Change to 'AUC' for ROC area under the curve

# Train the model, iterating on the data in batches of 32 samples
model.fit(data, labels, validation_split=0.20, epochs=10, batch_size=32)

Epoch 1/10
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 24ms/step - accuracy: 0.5463 - loss: 0.6873 - val_accuracy: 0.4500 - val_loss: 0.7149
Epoch 2/10
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - accuracy: 0.5392 - loss: 0.6868 - val_accuracy: 0.4500 - val_loss: 0.7164
Epoch 3/10
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - accuracy: 0.5454 - loss: 0.6887 - val_accuracy: 0.4500 - val_loss: 0.7169
Epoch 4/10
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - accuracy: 0.5222 - loss: 0.6900 - val_accuracy: 0.4600 - val_loss: 0.7158
Epoch 5/10
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - accuracy: 0.5770 - loss: 0.6809 - val_accuracy: 0.4550 - val_loss: 0.7170
Epoch 6/10
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - accuracy: 0.5207 - loss: 0.6907 - val_accuracy: 0.4700 - val_loss: 0.7158
Epoch 7/10
[1m25/25[0m [32m━━━━━━━━━

<keras.src.callbacks.history.History at 0x7ab0de423af0>

The `validation_split` parameter uses a piece of the training data to score each epoch.  You can also first use `train_test_split()` to generate `X_test` and `y_test` data. That can then be passed in with the argument `validation_data=(X_test,y_test)`. Best practice is still to hold out your `X_test` and `y_test` data to be evaluated separately at the very end.

In [7]:
# For multiple categories you need to one-hot encode

# Generate dummy data
x_train = np.random.random((1000, 20))
y_train = np.random.randint(10, size=(1000, 1))
x_test = np.random.random((100, 20))
y_test = np.random.randint(10, size=(100, 1))

# One-hot encode the y data using to_categorical()
y_train = keras.utils.to_categorical(y_train, num_classes=10)
y_test = keras.utils.to_categorical(y_test, num_classes=10)

# Build the model
model = Sequential()
model.add(Dense(64, activation='relu', input_dim=20))
model.add(Dense(10, activation='softmax'))

# Optimize using SGD with a learning rate
sgd = SGD(learning_rate=.01)
model.compile(loss='categorical_crossentropy',
              optimizer=sgd,
              metrics=['accuracy'])

model.fit(x_train, y_train,
          epochs=20,
          batch_size=128)
score = model.evaluate(x_test, y_test, batch_size=128) # extract loss and accuracy from test data evaluation
print(score)

Epoch 1/20
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 5ms/step - accuracy: 0.0938 - loss: 2.3496  
Epoch 2/20
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.0899 - loss: 2.3522  
Epoch 3/20
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.0950 - loss: 2.3463 
Epoch 4/20
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.1028 - loss: 2.3329  
Epoch 5/20
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.1109 - loss: 2.3245  
Epoch 6/20
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.0940 - loss: 2.3268 
Epoch 7/20
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.0981 - loss: 2.3222 
Epoch 8/20
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.0837 - loss: 2.3292 
Epoch 9/20
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0

In [8]:
# Prediction from keras classification model

# model.predict() the prediction of each category as columns, largest is selected for predict_classes()
y_pred = model.predict(x_test)
print(y_pred[0])

# Find the index of the largest probability using argmax()
class_pred = []
for i in range(len(y_pred)):
  class_pred.append(y_pred[i].argmax())
print(class_pred)

[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 16ms/step
[0.10444587 0.09421442 0.11339059 0.11201114 0.10853785 0.10455655
 0.09447579 0.09915754 0.09063447 0.07857577]
[2, 2, 8, 6, 7, 8, 5, 3, 8, 7, 8, 8, 7, 4, 4, 4, 8, 4, 5, 4, 0, 5, 5, 7, 3, 9, 5, 7, 6, 7, 4, 9, 5, 5, 7, 3, 2, 0, 3, 6, 0, 5, 3, 5, 3, 0, 4, 4, 4, 8, 5, 5, 7, 0, 8, 4, 3, 2, 0, 4, 2, 7, 0, 5, 2, 6, 8, 4, 4, 4, 7, 0, 5, 7, 5, 2, 5, 6, 2, 9, 3, 3, 7, 8, 7, 2, 9, 8, 9, 8, 4, 0, 4, 4, 9, 0, 0, 7, 3, 3]


## **Now you try:** Can you fit a neural network model to the Iris dataset? Run models that change the structure of the network (e.g. hidden layers and activations). Try to improve your validation accuracy as much as possible.

Data can be imported via the following link:

http://vincentarelbundock.github.io/Rdatasets/csv/datasets/iris.csv

In [9]:
data = pd.read_csv("http://vincentarelbundock.github.io/Rdatasets/csv/datasets/iris.csv")

# Update data to set up for train test split
data = data.iloc[:,1:]
X = data.loc[:, data.columns != 'Species']
y = data['Species']

display(X)
display(y)
display(pd.get_dummies(y))

Unnamed: 0,Sepal.Length,Sepal.Width,Petal.Length,Petal.Width
0,5.1,3.5,1.4,0.2
1,4.9,3.0,1.4,0.2
2,4.7,3.2,1.3,0.2
3,4.6,3.1,1.5,0.2
4,5.0,3.6,1.4,0.2
...,...,...,...,...
145,6.7,3.0,5.2,2.3
146,6.3,2.5,5.0,1.9
147,6.5,3.0,5.2,2.0
148,6.2,3.4,5.4,2.3


Unnamed: 0,Species
0,setosa
1,setosa
2,setosa
3,setosa
4,setosa
...,...
145,virginica
146,virginica
147,virginica
148,virginica


Unnamed: 0,setosa,versicolor,virginica
0,True,False,False
1,True,False,False
2,True,False,False
3,True,False,False
4,True,False,False
...,...,...,...
145,False,False,True
146,False,False,True
147,False,False,True
148,False,False,True
