In [None]:
import numpy as np
import matplotlib.pyplot as plt
from tensorflow import keras
from tensorflow.keras import layers

### __Deep Learning workflow:__
<font size=3>
    
1. Import and data pre-processing;   
2. Neural network modeling;
3. Model compilation;
4. Train and validation;
5. Final training;
6. Test evaluation;
7. Saving the model.

### __1. Import and data pre-processing:__
<font size=3>
    
1.1 Import data;\
1.2 Data visualization;\
1.3 Feature engineering;\
1.4 Data shuffling;\
1.5 Train, validation, and test tensor divition.

Our next problem is a supervised regression task using the classical [MNIST](https://en.wikipedia.org/wiki/MNIST_database) handwritten digits. The data is available in the [Keras dataset](https://keras.io/api/datasets/mnist/).  

In [None]:
# import MNIST data:
(x_train, y_train), (x_test, y_test) = keras.datasets.mnist.load_data(path="mnist.npz")

print(f"x-train:{x_train.shape}, y-train:{y_train.shape}")
print(f"x-test:{x_test.shape}, y-test:{y_test.shape}")

In [None]:
# visualizing handwritten digits:
i = 0

plt.figure(figsize=(5,3))
plt.title("Number "+str(y_train[i]))
plt.imshow(x_train[i], cmap='gray')
plt.xticks([])
plt.yticks([])
plt.show()

In [None]:
# shuffling train data:
i = np.random.permutation(x_train.shape[0])

x_train = x_train[i]
y_train = y_train[i]

# shuffling test data:
i = np.random.permutation(x_test.shape[0])

x_test = x_test[i]
y_test = y_test[i]

In [None]:
# normalization:
print(f"train: min = {x_train.min()}, max = {x_train.max()}")
print(f"test: min = {x_test.min()}, max = {x_test.max()}\n")

Max = x_train.max()

x_train = x_train/Max
x_test = x_test/Max

print(f"train: min = {x_train.min()}, max = {x_train.max()}")
print(f"test: min = {x_test.min()}, max = {x_test.max()}")

In [None]:
# one-hot encoding for label data:

def one_hot(labels):
    N = labels.size
    
    y_hot = np.zeros((N, 10), dtype="float32")
    
    for i, y in enumerate(labels):
        y_hot[i][y] = 1

    return y_hot

print(y_train[:4], "\n")

y_train = one_hot(y_train)
y_test = one_hot(y_test)

print(y_train[:4], "\n")

print(f"y-train:{y_train.shape}, y-test:{y_test.shape}")

In [None]:
# flatten x data (the number arrays) as dense layers' input vectors: (N, 28, 28) -> (N, 28*28)
def flatten(x):

    N, n, m = x.shape

    return x.reshape(N, n*m)

print(x_train.shape)

x_train = flatten(x_train)
x_test = flatten(x_test)

print(x_train.shape)

In [None]:
# splitting the train data into train and validation:

N_val = int(0.2*x_train.shape[0])

print(f"N-train = {x_train.shape[0]-N_val}, N-val = {N_val}, N-test = {x_test.shape[0]}")

In [None]:
x_val = x_train[:N_val]
x_train = x_train[N_val:]

y_val = y_train[:N_val]
y_train = y_train[N_val:]

print(f"x-train:{x_train.shape}, x-val:{x_val.shape}, x-test:{x_test.shape}")
print(f"y-train:{y_train.shape}, y-val:{y_val.shape}, y-test:{y_test.shape}")

### __2. Neural network modeling:__
<font size=3>
    
2.1 Define initial layer's shape;\
2.2 Define output layer's shape and its [activation function](https://keras.io/api/layers/activations/);\
2.3 Define hidden layers.


In [None]:
In = keras.Input(shape=(x_train.shape[1],))

x = layers.Dense(200, activation="relu")(In)

Out = layers.Dense(y_train.shape[1], activation="softmax")(x)

model = keras.Model(inputs=In, outputs=Out)

model.summary()

### __3. Model compilation:__
<font size=3>

3.1 Define [optimizer](https://keras.io/api/optimizers/);\
3.2 Define [loss function](https://keras.io/api/losses/);\
3.3 Define [validation metric](https://keras.io/api/metrics/).


In [None]:
model.compile(optimizer="Adam", loss="categorical_crossentropy", metrics=['acc'])

### __4. Train and validation__
<font size=3>
    
Here, using the training data, the optimizer updates the values of the model's inner parameters (_i.e._, weights, biases, etc.) over the epochs while minimizing/maximizing the loss function. Meanwhile, the model's performance is measured for each epoch using the validation data. At this workflow stage, we model the neural network architecture to avoid [overfitting and underfitting](https://www.geeksforgeeks.org/underfitting-and-overfitting-in-machine-learning/).
   

In [None]:
%%time

report = model.fit(x_train, y_train, validation_data=[x_val, y_val], batch_size=200, epochs=50)

In [None]:
report.history

loss = report.history['loss']
val_loss = report.history['val_loss']

acc = report.history['acc']
val_acc = report.history['val_acc']

epochs = np.linspace(1, len(loss), len(loss))

fig, ax = plt.subplots(1, 2, figsize=(14, 4))

ax[0].plot(epochs, loss, label="loss")
ax[0].plot(epochs, val_loss, label="cal-loss")
ax[0].set_ylabel("Loss function")

ax[1].plot(epochs, acc, label="acc")
ax[1].plot(epochs, val_acc, label="cal-acc")
ax[1].set_ylabel("Metric function")

for i in [0, 1]:
    ax[i].set_xlabel("epochs")
    ax[i].legend()
    ax[i].grid()

plt.show()

### __5. Final training__
<font size=3>

Once the modeling is completed, we concatenate train and validation data to fit again the model.

__Note:__ use the same number of __epochs__ and __batch-size__ from the previous step.
    

### __6. Test evaluation__:

    6.1 Make the evaluation using the test data;
    6.1 Make some predictions to visualize the results;
   

### __7. Saving the model__:

<font size=3>
    
For model __loading__, see [2.2-notebook](2.2-notebook.ipynb).