# Neural Networks for Handwritten Digit Recognition, Binary

In [None]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.optimizers import Adam

In [None]:
#The shape of X is: (1000, 400)
#The shape of y is: (1000, 1)

model = Sequential(
    [               
        tf.keras.Input(shape=(400,)),    #specify input size  ##The tf.keras.Input(shape=(2,)), specifies the expected shape of the input. This allows Tensorflow to size the weights and bias parameters at this point. This is useful when exploring Tensorflow models. This statement can be omitted in practice and Tensorflow will size the network parameters when the input data is specified in the model.fit statement.
        ### START CODE HERE ### 
        Dense(25, activation='sigmoid', name = 'dense'),
        Dense(15, activation='sigmoid', name = 'dense_1'),
        Dense(1, activation='sigmoid', name = 'dense_2')
    ], name = "my_model" 
)                            

In [None]:
model.summary()

In [None]:
from tensorflow.keras.losses import BinaryCrossentropy

model.compile(
    loss=tf.keras.losses.BinaryCrossentropy(),
    optimizer=tf.keras.optimizers.Adam(0.001),
)

model.fit(
    X,y,
    epochs=20
)

In [None]:
prediction = model.predict(X[0].reshape(1,400))  # a zero
print(f" predicting a zero: {prediction}")
prediction = model.predict(X[500].reshape(1,400))  # a one
print(f" predicting a one:  {prediction}")

In [1]:
# The output of the model is interpreted as a probability. 
# In the first example above, the input is a zero.
# The model predicts the probability that the input is a one is nearly zero.
# In the second example, the input is a one.
# The model predicts the probability that the input is a one is nearly one.
# As in the case of logistic regression, the probability is compared to a threshold to make a final prediction.

In [None]:
if prediction >= 0.5:
    yhat = 1
else:
    yhat = 0
print(f"prediction after threshold: {yhat}")

# Neural Networks for Handwritten Digit Recognition, Multiclass

In [None]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.activations import linear, relu, sigmoid

In [None]:
# The shape of X is: (5000, 400)
# The shape of y is: (5000, 1)

In [None]:
tf.random.set_seed(1234) # for consistent results
model = Sequential(
    [                       
        tf.keras.Input(shape=(400,)), 
        Dense(25, activation='relu', name = "L1"), 
        Dense(15, activation='relu',  name = "L2"), 
        Dense(10, activation='linear', name = "L3")  
    ], name = "my_model" 
)

In [None]:
model.summary()

In [None]:
from tensorflow.keras.losses import SparseCategoricalCrossentropy
# from tensorflow.keras.losses import MeanSquaredError

model.compile(
    loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
    optimizer=tf.keras.optimizers.Adam(learning_rate=0.001),
)

history = model.fit(
    X,y,
    epochs=40
)

#### Epochs and batches
In the `compile` statement above, the number of `epochs` was set to 100. This specifies that the entire data set should be applied during training 100 times.  During training, you see output describing the progress of training that looks like this:
```
Epoch 1/100
157/157 [==============================] - 0s 1ms/step - loss: 2.2770
```
The first line, `Epoch 1/100`, describes which epoch the model is currently running. For efficiency, the training data set is broken into 'batches'. The default size of a batch in Tensorflow is 32. There are 5000 examples in our data set or roughly 157 batches. The notation on the 2nd line `157/157 [====` is describing which batch has been executed.

In [None]:
image_of_two = X[1015]
display_digit(image_of_two)

prediction = model.predict(image_of_two.reshape(1,400))  # prediction

print(f" predicting a Two: \n{prediction}")
print(f" Largest Prediction index: {np.argmax(prediction)}")

The largest output is prediction[2], indicating the predicted digit is a '2'. If the problem only requires a selection, that is sufficient. Use NumPy [argmax](https://numpy.org/doc/stable/reference/generated/numpy.argmax.html) to select it. If the problem requires a probability, a softmax is required:

In [None]:
yhat = np.argmax(prediction_p)

print(f"np.argmax(prediction_p): {yhat}")