Evaluating neural networks

Once a model is trained, model.evalute() applies and evaluates the model on testing data and mode.predict() calculates predicted values for specific instances. Like model.fit(), model.evaluate() uses batches to perform the computation. Details for each method can be found in the documentation.

In [1]:
import numpy as np
import pandas as pd
import os

os.environ['KERAS_BACKEND'] = 'tensorflow'

In [2]:
# The backend must be set before importing keras, not after
import keras as keras
import keras.datasets.fashion_mnist

In [3]:
# Load built-in Fashion MNIST
(Xtrain, ytrain), (Xtest, ytest) = keras.datasets.fashion_mnist.load_data()

# Scale images to the [0, 1] range and make sure all have same type
Xtrain = Xtrain.reshape(60000, 784).astype('float32') / 255
Xtest = Xtest.reshape(10000, 784).astype('float32') / 255
ytrain = ytrain.astype('float32')
ytest = ytest.astype('float32')

In [4]:
# Define the model structure using keras.Sequential
n_classes = 10
input_size = 784  # Example input size
model = keras.Sequential(
    [
        # Input layer
        keras.layers.Input(shape=(input_size, )),
        # Hidden layer 1 = 256 nodes, linear activation
        keras.layers.Dense(512, activation='relu'), # Input layer with ReLU activation
        # Hidden layer 2: 128 nodes, linear activation
        keras.layers.Dense(512, activation='relu'), # Hidden layer with ReLU activation
        # Output layer: 10 nodes, one per class
        keras.layers.Dense(n_classes), # Output layer (without activation, as in PyTorch)
    ]
)

In [5]:
model.summary()

In [15]:
from tensorflow.keras import layers, optimizers, losses, metrics
# Specify training choices (optimizer, loss function, metrics)
model.compile(
    optimizer='Adam',  # Optimizer
    # Loss function to minimize
    loss=losses.SparseCategoricalCrossentropy(from_logits=True),
    # List of metrics to monitor
    metrics=['accuracy'],
)

In [16]:
# Train the model with validation
training = model.fit(Xtrain, ytrain, batch_size=64, epochs=50, validation_split=0.1)

Epoch 1/50
[1m844/844[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3ms/step - accuracy: 0.7746 - loss: 0.6315 - val_accuracy: 0.8603 - val_loss: 0.3817
Epoch 2/50
[1m844/844[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3ms/step - accuracy: 0.8648 - loss: 0.3642 - val_accuracy: 0.8743 - val_loss: 0.3470
Epoch 3/50
[1m844/844[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3ms/step - accuracy: 0.8788 - loss: 0.3215 - val_accuracy: 0.8617 - val_loss: 0.3917
Epoch 4/50
[1m844/844[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3ms/step - accuracy: 0.8863 - loss: 0.3010 - val_accuracy: 0.8795 - val_loss: 0.3313
Epoch 5/50
[1m844/844[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3ms/step - accuracy: 0.8963 - loss: 0.2755 - val_accuracy: 0.8718 - val_loss: 0.3453
Epoch 6/50
[1m844/844[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3ms/step - accuracy: 0.9029 - loss: 0.2589 - val_accuracy: 0.8883 - val_loss: 0.3113
Epoch 7/50
[1m844/844[0m 

In [17]:
# View the training history
training.history

{'accuracy': [0.82575923204422,
  0.8681296110153198,
  0.8809629678726196,
  0.889296293258667,
  0.8958703875541687,
  0.9026851654052734,
  0.9058333039283752,
  0.9110184907913208,
  0.9148518443107605,
  0.9184814691543579,
  0.9214074015617371,
  0.9254074096679688,
  0.9282962679862976,
  0.9314629435539246,
  0.9326296448707581,
  0.9369999766349792,
  0.9388889074325562,
  0.9422592520713806,
  0.9437037110328674,
  0.9449999928474426,
  0.947092592716217,
  0.9495740532875061,
  0.9519444704055786,
  0.953166663646698,
  0.954537034034729,
  0.9554073810577393,
  0.9582407474517822,
  0.9589999914169312,
  0.9614074230194092,
  0.9609444737434387,
  0.9637407660484314,
  0.9646852016448975,
  0.9637963175773621,
  0.9659629464149475,
  0.9669814705848694,
  0.9669814705848694,
  0.969870388507843,
  0.9722592830657959,
  0.9718518257141113,
  0.9718888998031616,
  0.9717592597007751,
  0.9735555648803711,
  0.9747222065925598,
  0.9759259223937988,
  0.9752777814865112,
  0.9

In [18]:
# Evaluate the model on the testing data
results = model.evaluate(Xtest, ytest, batch_size=64)
print('Test loss, accuracy', results)

[1m157/157[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.8918 - loss: 0.6419
Test loss, accuracy [0.6332319974899292, 0.8938000202178955]


In [19]:
# Generate predictions (probabilities -- the output of the last layer)
predictions = model.predict(Xtest[:3])
print('Predictions:', predictions.round(3))
print('Actual values:', ytest[:3])

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 33ms/step
Predictions: [[ -22.344  -14.584  -31.566  -17.929  -26.69   -12.231  -20.221   -4.046
   -24.653   14.175]
 [  -6.769  -48.169   11.586  -38.219   -7.279  -26.792   -5.333  -41.417
   -31.667  -49.646]
 [ -91.243   87.555  -64.375  -34.411  -96.14   -72.436  -47.979 -132.824
   -72.784  -87.72 ]]
Actual values: [9. 2. 1.]
