In [4]:
import tensorflow as tf
from tensorflow import keras
import numpy as np

from keras.datasets import mnist
from keras import layers

## Improving Model Fit 
There are this kinds of problems you can face when trying to fit a problem to a model

1. Training doesn't get started: your training loss does not go down 
2. Training starts but the model does not meaningfully predict values and cannot beat the metrics (baseline) you decide for random classifier 
3. Training and validation loss keep going down and we cannot seem to overfit the model to get a point where we need to stop training. 

### Problem 1 - Tuning the gradient descent
In the first problem you should tinker with the learning rate or batch size used for training and gradient descent to make training rate go down. 

In [5]:
(train_images, train_labels), _ = mnist.load_data()
train_images = train_images.reshape((60000, 28 * 28))
train_images = train_images.astype("float32") / 255

model = keras.Sequential([
    layers.Dense(512, activation="relu"),
    layers.Dense(10, activation="softmax")
])
model.compile(optimizer=keras.optimizers.RMSprop(1.),  # just too much learning rate 
              loss="sparse_categorical_crossentropy",
              metrics=["accuracy"])
model.fit(train_images, train_labels,
          epochs=10,
          batch_size=128,
          validation_split=0.2)

# due to the large learning rate 1.0 we can see that the accuracy and loss are both stand still



Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.src.callbacks.History at 0x10608b2d0>

In [7]:
model = keras.Sequential([
    layers.Dense(512, activation="relu"),
    layers.Dense(10, activation="softmax")
])
model.compile(optimizer=keras.optimizers.RMSprop(1e-2),  # just too much learning rate 
              loss="sparse_categorical_crossentropy",
              metrics=["accuracy"])
model.fit(train_images, train_labels,
          epochs=10,
          batch_size=128,
          validation_split=0.2)

# here the loss starts to go down. 



Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.src.callbacks.History at 0x28cf29fd0>