# Deep Learning on CIFAR10

In [16]:
import math
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt

In [2]:
plt.rc('font', size=14)
plt.rc('axes', labelsize=14, titlesize=14)
plt.rc('legend', fontsize=14)
plt.rc('xtick', labelsize=10)
plt.rc('ytick', labelsize=10)

In [49]:
cifar10 = tf.keras.datasets.cifar10.load_data()
(X_train_full, y_train_full), (X_test, y_test) = cifar10

X_train = X_train_full[5000:]
y_train = y_train_full[5000:]
X_valid = X_train_full[:5000]
y_valid = y_train_full[:5000]

In [4]:
np.random.seed(42)
tf.random.set_seed(42)

tf.keras.backend.clear_session()

In [7]:
model = tf.keras.Sequential()
model.add(tf.keras.Input(shape=[32, 32, 3]))
model.add(tf.keras.layers.Flatten())
for _ in range(20):
    model.add(tf.keras.layers.Dense(100, activation="swish", kernel_initializer="he_normal"))
model.add(tf.keras.layers.Dense(10, activation="softmax"))

model.summary()

In [8]:
optimizer = tf.keras.optimizers.Nadam(learning_rate=5e-5)
model.compile(loss="sparse_categorical_crossentropy",
              optimizer=optimizer,
              metrics=["accuracy"])

In [11]:
early_stopping_cb = tf.keras.callbacks.EarlyStopping(patience=20)

model.fit(X_train, y_train, epochs=100,
          validation_data=(X_valid, y_valid),
          callbacks=[early_stopping_cb])

Epoch 1/100
[1m1407/1407[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 3ms/step - accuracy: 0.1250 - loss: 11.3249 - val_accuracy: 0.2110 - val_loss: 2.1456
Epoch 2/100
[1m1407/1407[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 3ms/step - accuracy: 0.2249 - loss: 2.1096 - val_accuracy: 0.2694 - val_loss: 1.9761
Epoch 3/100
[1m1407/1407[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 3ms/step - accuracy: 0.2797 - loss: 1.9663 - val_accuracy: 0.3178 - val_loss: 1.8777
Epoch 4/100
[1m1407/1407[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 3ms/step - accuracy: 0.3189 - loss: 1.8763 - val_accuracy: 0.3482 - val_loss: 1.8114
Epoch 5/100
[1m1407/1407[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 4ms/step - accuracy: 0.3390 - loss: 1.8123 - val_accuracy: 0.3684 - val_loss: 1.7671
Epoch 6/100
[1m1407/1407[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 3ms/step - accuracy: 0.3638 - loss: 1.7600 - val_accuracy: 0.3710 - val_loss: 1.7351
Epoch 7/1

<keras.src.callbacks.history.History at 0x1086a6840>

In [12]:
model.evaluate(X_valid, y_valid)

[1m157/157[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 654us/step - accuracy: 0.4416 - loss: 1.6586


[1.6560536623001099, 0.4440000057220459]

In [13]:
tf.keras.backend.clear_session()

model = tf.keras.Sequential()
model.add(tf.keras.Input(shape=[32, 32, 3]))
model.add(tf.keras.layers.Flatten())
for _ in range(20):
    model.add(tf.keras.layers.Dense(100, kernel_initializer="he_normal"))
    model.add(tf.keras.layers.BatchNormalization())
    model.add(tf.keras.layers.Activation("swish"))

model.add(tf.keras.layers.Dense(10, activation="softmax"))

optimizer = tf.keras.optimizers.Nadam(learning_rate=5e-4)
model.compile(loss="sparse_categorical_crossentropy",
              optimizer=optimizer,
              metrics=["accuracy"])

model.fit(X_train, y_train, epochs=100,
          validation_data=(X_valid, y_valid),
          callbacks=[tf.keras.callbacks.EarlyStopping(patience=20)])

model.evaluate(X_valid, y_valid)

Epoch 1/100
[1m1407/1407[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 5ms/step - accuracy: 0.1842 - loss: 2.2266 - val_accuracy: 0.3216 - val_loss: 1.9025
Epoch 2/100
[1m1407/1407[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 4ms/step - accuracy: 0.3471 - loss: 1.8133 - val_accuracy: 0.3744 - val_loss: 1.7493
Epoch 3/100
[1m1407/1407[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 4ms/step - accuracy: 0.3993 - loss: 1.6870 - val_accuracy: 0.3856 - val_loss: 1.7048
Epoch 4/100
[1m1407/1407[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 4ms/step - accuracy: 0.4288 - loss: 1.6047 - val_accuracy: 0.4252 - val_loss: 1.6071
Epoch 5/100
[1m1407/1407[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 4ms/step - accuracy: 0.4571 - loss: 1.5271 - val_accuracy: 0.4050 - val_loss: 1.6399
Epoch 6/100
[1m1407/1407[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 5ms/step - accuracy: 0.4805 - loss: 1.4576 - val_accuracy: 0.4006 - val_loss: 1.6817
Epoch 7/1

[2.1647257804870605, 0.41940000653266907]

In [14]:
tf.keras.backend.clear_session()

model = tf.keras.Sequential()
model.add(tf.keras.Input(shape=[32, 32, 3]))
model.add(tf.keras.layers.Flatten())
for _ in range(20):
    model.add(tf.keras.layers.Dense(100,
                                    kernel_initializer="lecun_normal",
                                    activation="selu"))
model.add(tf.keras.layers.Dense(10, activation="softmax"))

optimizer = tf.keras.optimizers.Nadam(learning_rate=7e-4)
model.compile(loss="sparse_categorical_crossentropy",
              optimizer=optimizer,
              metrics=["accuracy"])

X_means = X_train.mean(axis=0)
X_stds = X_train.std(axis=0)
X_train_scaled = (X_train - X_means) / X_stds
X_valid_scaled = (X_valid - X_means) / X_stds
X_test_scaled = (X_test - X_means) / X_stds

model.fit(X_train_scaled, y_train, epochs=100,
          validation_data=(X_valid_scaled, y_valid),
          callbacks=[tf.keras.callbacks.EarlyStopping(patience=20)])

model.evaluate(X_valid_scaled, y_valid)

Epoch 1/100
[1m1407/1407[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 4ms/step - accuracy: 0.2656 - loss: 2.0567 - val_accuracy: 0.3442 - val_loss: 1.8483
Epoch 2/100
[1m1407/1407[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 4ms/step - accuracy: 0.3834 - loss: 1.7301 - val_accuracy: 0.4018 - val_loss: 1.7076
Epoch 3/100
[1m1407/1407[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 4ms/step - accuracy: 0.4287 - loss: 1.6200 - val_accuracy: 0.4152 - val_loss: 1.6629
Epoch 4/100
[1m1407/1407[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 4ms/step - accuracy: 0.4531 - loss: 1.5557 - val_accuracy: 0.4564 - val_loss: 1.6098
Epoch 5/100
[1m1407/1407[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 5ms/step - accuracy: 0.4786 - loss: 1.4947 - val_accuracy: 0.4614 - val_loss: 1.6147
Epoch 6/100
[1m1407/1407[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 4ms/step - accuracy: 0.4947 - loss: 1.4575 - val_accuracy: 0.4804 - val_loss: 1.5457
Epoch 7/10

[1.6219482421875, 0.46799999475479126]