# Create and Train our model

In [None]:
# Import the Data

import gzip
import numpy as np

def open_images(filename):
    with gzip.open(filename, "rb") as file:
        data = file.read()
        # read the buffer into a numpy array
        # 
        return np.frombuffer(data, dtype=np.uint8, offset=16)\
            .reshape(-1,28,28)\
            .astype(np.float32)

def open_labels(filename):
    with gzip.open(filename, "rb") as file:
        data = file.read()
        # read the buffer into a numpy array
        return np.frombuffer(data, dtype=np.uint8, offset=8)

X_train = open_images("./data/train-images-idx3-ubyte.gz")
y_train = open_labels("./data/train-labels-idx1-ubyte.gz")

# t-shirt = 0
# as with our first network, we only want to train for t-shirt or not t-shirt
# therefor we will return true for 0 and false for 1-9
# y_train == 0 <- iterated over the numpay array and compares every element to be "0"
# the return value is another numpay array consisting of true and false values ("0" or not "0")
y_train = y_train == 0

# modify the numpy array of 60000 datasets with 28x28 pixels
# to 60000 datasets of 784 values (single array instead of matrix)
training_data = X_train.reshape(60000, 784)

In [None]:
# Use sequentially representation of the model
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras import Input

In [None]:
# https://keras.io/guides/sequential_model/
model = Sequential()

# Add 784 input values - Input(shape= (tupel with one element) of 784 (28x28 pixel)
model.add(Input(shape=(784,)))
# Add hidden layer as Dense (all weights are connected to all neurons)
# with 100 Neurons and activation function of type sigmoid
# ("relu" is an alternative activation function with a different curve)
model.add(Dense(100, activation="sigmoid"))
# Add the output Dense layer with only one Neuron
model.add(Dense(1, activation="sigmoid"))

# Compile the model for optimized runtime on e.g. GPU
# Optimizer - optimize weights on backpropagation - use sdg (stochastic gradient descent)
# finding the minimum of the function by iterating the tangent
# The accuraycy metric also prints the current accuracy after each training epoch
# The loss function is a mathematical construct to minimize the cost function
# cost function = cost for distance between weight alignment on backpropagation
model.compile(optimizer="sgd", loss="binary_crossentropy", metrics=["accuracy"])

In [None]:
# Training the model
# handover training images, labels,
# epochs <- how often it should be trained,
# batch_size <- number of items per batch
model.fit(training_data, y_train, epochs=10, batch_size=1000)

# Test our model

In [None]:
# plot the image with maplotlib
%matplotlib inline
import matplotlib.pyplot as plt

In [None]:
# show the first image from array and use colormap grey_r (grey reverse)
plt.imshow(X_train[0], cmap="gray_r")
plt.show()

In [None]:
# image needs to be reshaped from 28 x 28 matrix to single 784 items array
item0 = X_train[0].reshape(1, 784)
item0.shape

In [None]:
# validate the first image
model.predict(item0)
# result should be in the -50% probability of being a t-shirt

In [None]:
# show the second image from array and use colormap grey_r (grey reverse)
plt.imshow(X_train[1], cmap="gray_r")
plt.show()

In [None]:
# image needs to be reshaped from 28 x 28 matrix to single 784 items array
item1 = X_train[1].reshape(1, 784)
item1.shape

In [None]:
# validate the first image
model.predict(item1)
# result should be in the +50% probability of being a t-shirt

# Manually validate against full Data

In [None]:
# Predict against the whole list of items
y_trained_predictions = model.predict(training_data)
# The array consists of 60000 single values between 0 and 1 (0% to 100% probability)

# Round the values (<0.5 -> 0 and > 0.5 ->1)
# and reshape to a single array
prediction_result = np.round(y_trained_predictions).reshape(60000)

# compare prediction result to original value
success_rate = prediction_result == y_train
# return value is an array with booleans - true for matches and false for differences

# return the percentage of correct matches
np.mean(success_rate)

# Validate with kera included functionality

In [None]:
model.evaluate(training_data, y_train)
# return value is an array with second value being the probable accuracy

In [None]:
# Read docuementation of a function
model.evaluate?

In [None]:
print(model.metrics_names)

# Validate the model with test data

In [None]:
# If you only have one dataset available for your training,
# this dataset can be split up into training and test data

# from sklearn.model_selection import train_test_split
# X_train, X_test, y_train, y_test = train_test_split(X, y)

# We are not using it in our example, as we have already a split up dataset

In [None]:
# Prepare the test dataset
X_test = open_images("./data/t10k-images-idx3-ubyte.gz")
y_test = open_labels("./data/t10k-labels-idx1-ubyte.gz")

y_test = y_test == 0

X_test = X_test.reshape(10000, 784)

In [None]:
# validating accuracy against test dataset
model.evaluate(X_test, y_test)