In [40]:
# tensorflow stuff
import tensorflow as tf
from tensorflow.keras.models import Sequential

from tensorflow.keras.optimizers import Adam
from tensorflow.keras.activations import relu, sigmoid, softmax
from tensorflow.keras.losses import SparseCategoricalCrossentropy
from tensorflow.keras.layers import Dense, Conv2D, Flatten, Dropout, MaxPool2D

import matplotlib.pyplot as plt

# load the mnist dataset
mnist = tf.keras.datasets.mnist
# load the dataset and split it into train and test data
(x_train, y_train), (x_test, y_test) = mnist.load_data()

# imgs size (28 x 28 Pixel)
img_rows = 28
img_cols = 28

input_shape = (img_rows, img_cols, 1)

# was ist x_train.shape[0] ???
x_train = x_train.reshape(x_train.shape[0], img_rows, img_cols, 1)
x_test = x_test.reshape(x_test.shape[0], img_rows, img_cols, 1)

# convert training and test data to floats 
x_train = x_train.astype('float32')
x_test = x_test.astype('float32')

# 255 because of greyscaled images (black & white)
x_train = x_train / 255  
x_test = x_test / 255

In [41]:
# creates a linear stack of layers
model = Sequential()

# add a layer to the model
model.add(
    # create a 2D convolution layer
    Conv2D(
        input_shape = input_shape, # only needed in the input layer
        
        # required parameters
        filters = 1, # dimensionality of the output space -> output params = input classes * filters
        kernel_size = (3, 3), 
        
        # optional parameters
        activation = relu, # applies the rectified linear unit activation function, default is the linear activation function

        # strides = (1, 1), # for specifying the strides of the convolution along the height and width
        # padding = 'valid', # one of "valid" or "same", same results in an output with the same size as the input
        # dilation_rate = (1, 1), # specifying the dilation rate to use for dilated convolution
        # use_bias = True, # Boolean, whether the layer uses a bias vector
        # kernel_initializer = 'glorot_uniform', # Initializer for the kernel weights matrix
        # bias_initializer = 'zeros', # Initializer for the bias vector
        # kernel_regularizer = None, # Regularizer function applied to the kernel weights matrix
        # bias_regularizer = None, # Regularizer function applied to the bias vector
        # activity_regularizer = None, # Regularizer function applied to the output of the layer (its "activation")
        # kernel_constraint = None, # Constraint function applied to the kernel matrix
        # bias_constraint = None, # Constraint function applied to the bias vector
        ))

# add a second layer to the model
model.add(
    # flattens the Conv2D layer from a 2d-array to a 1d-array (because a dense layer needs a 1d-array)
    Flatten()
    )

# add a third layer to the model
model.add(
    # regular fully-connected NN layer
    Dense(
        units=10, # dimensionality of the output space -> how many classes we want to predict
        activation=sigmoid) # applies the sigmoid activation function
    )

model.summary()

Model: "sequential_11"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_21 (Conv2D)           (None, 26, 26, 1)         10        
_________________________________________________________________
flatten_10 (Flatten)         (None, 676)               0         
_________________________________________________________________
dense_15 (Dense)             (None, 10)                6770      
Total params: 6,780
Trainable params: 6,780
Non-trainable params: 0
_________________________________________________________________


In [30]:
# was macht die loss function genau? Gibt es da noch andere?
model.compile(
    optimizer = Adam(),  # adam optimization is a stochastic gradient descent method that is based on adaptive estimation of first-order and second-order moments
    # computes the crossentropy loss between the labels and predictions. Use this crossentropy loss function when there are two or more label classes
    loss = SparseCategoricalCrossentropy(),
    # metrics to be evaluated by the model during training and testing
    metrics = ["accuracy"]]
)

# batch size?
model.fit(
    x = x_train,
    y = y_train,
    batch_size = 32, # Number of samples per gradient update. If unspecified, batch_size will default to 32
    epochs = 1, # Number of epochs to train the model. An epoch is an iteration over the entire x and y data provided
    verbose = 1, # Show progress
    validation_data = (x_test, y_test) # Data on which to evaluate the loss and any model metrics at the end of each epoch
)

score = model.evaluate(x_test, y_test)

print("\n")
print("Test loss: {:.2f}".format(score[0]))
print("Test accuracy: {:.2f} %".format(score[1]))
print("Test mse: {}".format(score[2]))

Train on 60000 samples, validate on 10000 samples


Test loss: 0.25
Test accuracy: 0.92 %
Test mse: 28.10675048828125


In [42]:
model = Sequential([
    Conv2D(filters=32, kernel_size=(3, 3), activation=relu, input_shape=input_shape),
    Conv2D(filters=64, kernel_size=(3, 3), activation=relu),
    MaxPool2D(), # Max pooling operation for spatial data
    Dropout(0.25), # Dropout consists in randomly setting a fraction rate of input units to 0 at each update during training time, which helps prevent overfitting
    Flatten(),
    Dense(512, activation=relu),
    Dropout(0.5),
    Dense(10, activation=softmax)
])

model.compile(
    optimizer = Adam(), loss = SparseCategoricalCrossentropy(), metrics = [["accuracy", "mse"]]
)

model.fit(
    x = x_train,
    y = y_train,
    batch_size = 32,
    epochs = 1,
    verbose = 1,
    validation_data = (x_test, y_test)
)

score = model.evaluate(x_test, y_test)

print("\n")
print("Test loss: {:.2f}".format(score[0]))
print("Test accuracy: {:.2f} %".format(score[1]))
print("Test mse: {}".format(score[2]))

Train on 60000 samples, validate on 10000 samples


Test loss: 0.04
Test accuracy: 0.99 %
Test mse: 27.337718963623047
