# ***HANDWRITTEN DIGITS RECOGNITION WITH MNIST DATASET***

In [None]:
import tensorflow as tf
from tensorflow.keras import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.losses import SparseCategoricalCrossentropy
from tensorflow.keras.optimizers import Adam

2025-12-18 09:58:25.905794: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2025-12-18 09:58:25.970245: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 AVX_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
2025-12-18 09:58:28.298971: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.


In [None]:
#--- Model neural network
NN_SEED = 111
NN_LEARNING_RATE = 0.01
NN_EARLY_STOPPING_MIN_DELTA = 0.001
NN_EARLY_STOPPING_PATIENCE = 5
NN_MODEL_SAVE_PATH = "./saved_models/"
NN_MODEL_SAVE_FIG = "./saved_models/figures/"
NN_TENSORBOARD_LOG_DIR = "./logs/"  # TensorBoard log directory

# Model name format:
NN_MODEL_PROJECT_NAME = 'MNIST'
NN_MODEL_VERSION = 'v1'
NN_MODEL_ARCHITECTURE = 'nn' # or 'cnn' | 'lstm' | ...

## Avoid numerical round-off errors 
We use `activation='linear'` in the output layer instead of softmax.<br>
After getting the output ***logits***, we apply softmax manually when making predictions.

$$ logits = [2.3, -1.1, 0.5] $$

Logits can be large positive | negative numbers. So, we use softmax to get probabilities:

$$ probs_i = \frac{e^{logits_i}}{\sum_j e^{logits_j}} $$



After all, our flow:
``` markdown
Input
 ↓
Dense (ReLU)
 ↓
Dense (ReLU)
 ↓
Dense (Linear) → logits
 ↓
SparseCategoricalCrossentropy(from_logits=True)
    (softmax + log + loss)
 ↓
softmax(logits) → probs
```

In [None]:
model = Sequential([
    Dense(units=25, activation='relu'),
    Dense(units=15, activation='relu'),
    Dense(units=10, activation='linear')
])

In [None]:
model.compile(
    # from_logits=True:     loss + softmax -> logits
    loss= SparseCategoricalCrossentropy(from_logits=True),
    optimizer = Adam(learning_rate=NN_LEARNING_RATE)
)

In [None]:
history = model.fit(
    X, Y, 
    epochs=100,
    verbose=1
)

In [None]:
logits = model(X)
f_x = tf.nn.softmax(logits) # f_x or y_hat