# AlexNet Architecture

In [2]:
import matplotlib.pyplot as plt
import tensorflow as tf

from functools import partial
from sklearn.model_selection import train_test_split

In [3]:
cifar100 = tf.keras.datasets.cifar100.load_data()

In [4]:
(X_train_full, y_train_full), (X_test, y_test) = cifar100

In [5]:
X_train, X_valid, y_train, y_valid = train_test_split(X_train_full, y_train_full, random_state=42)

In [6]:
X_train[0].shape

(32, 32, 3)

In [11]:
model = tf.keras.Sequential([
    tf.keras.layers.Input(shape=(224, 224, 3)),
    tf.keras.layers.Conv2D(filters=96, kernel_size=11, strides=4, padding='valid', activation='relu'),
    tf.keras.layers.MaxPool2D(pool_size=3, strides=2),
    tf.keras.layers.Conv2D(filters=256, kernel_size=5, strides=1, padding='same', activation='relu'),
    tf.keras.layers.MaxPool2D(pool_size=3, strides=2),
    tf.keras.layers.Conv2D(filters=384, kernel_size=3, strides=1, padding='same', activation='relu'),
    tf.keras.layers.Conv2D(filters=384, kernel_size=3, strides=1, padding='same', activation='relu'),
    tf.keras.layers.Conv2D(filters=256, kernel_size=3, strides=1, padding='same', activation='relu'),
    tf.keras.layers.MaxPool2D(pool_size=3, strides=2),
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(units=4096, activation='relu', kernel_initializer='he_normal'),
    tf.keras.layers.Dropout(rate=0.5),
    tf.keras.layers.Dense(units=4096, activation='relu', kernel_initializer='he_normal'),
    tf.keras.layers.Dropout(rate=0.5),
    tf.keras.layers.Dense(units=100, activation='softmax', kernel_initializer='he_normal'),
])

In [12]:
model.summary()

In [24]:
model.compile(optimizer = 'adam', loss = 'sparse_categorical_crossentropy', metrics = ['accuracy'])

# Architecture similar to that of Alex-Net, but trained on fashion MNIST

In [26]:
fashion_mnist = tf.keras.datasets.fashion_mnist.load_data()

In [27]:
(X_train_full, y_train_full),(X_test, y_test) = fashion_mnist

In [29]:
X_train, X_valid, y_train, y_valid = train_test_split(X_train_full, y_train_full)

In [31]:
X_train.shape

(45000, 28, 28)

In [32]:
model_mnist = tf.keras.Sequential([
    tf.keras.layers.Input(shape = (28, 28, 1)),
    Conv2D(filters = 32, kernel_size = (5, 5)),
    Maxpool2D(),
    Conv2D(filters = 64),
    Conv2D(filters = 64),
    Maxpool2D(),
    Conv2D(filters = 128),
    Conv2D(filters = 128),
    Maxpool2D(),
    Conv2D(filters = 264),
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(units = 1000, activation = 'relu', kernel_initializer = 'he_normal'),
    tf.keras.layers.Dropout(0.5),
    tf.keras.layers.Dense(units= 500, activation='relu', kernel_initializer = 'he_normal'),
    tf.keras.layers.Dropout(0.5),
    tf.keras.layers.Dense(units = 100, activation='softmax')
])

In [33]:
model_mnist.compile(loss='sparse_categorical_crossentropy', optimizer = 'adam', metrics =['accuracy'])

In [34]:
history_mnist = model_mnist.fit(X_train, y_train, epochs=20, validation_data=(X_valid, y_valid))

Epoch 1/20
[1m1407/1407[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m143s[0m 98ms/step - accuracy: 0.6280 - loss: 11.2110 - val_accuracy: 0.8417 - val_loss: 0.4539
Epoch 2/20
[1m1407/1407[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m137s[0m 97ms/step - accuracy: 0.8244 - loss: 0.4987 - val_accuracy: 0.8485 - val_loss: 0.4466
Epoch 3/20
[1m1407/1407[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m138s[0m 98ms/step - accuracy: 0.8457 - loss: 0.4400 - val_accuracy: 0.8603 - val_loss: 0.4062
Epoch 4/20
[1m1407/1407[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m137s[0m 97ms/step - accuracy: 0.8539 - loss: 0.4103 - val_accuracy: 0.8618 - val_loss: 0.3786
Epoch 5/20
[1m1407/1407[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m147s[0m 104ms/step - accuracy: 0.8585 - loss: 0.4039 - val_accuracy: 0.8571 - val_loss: 0.3947
Epoch 6/20
[1m1407/1407[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m151s[0m 107ms/step - accuracy: 0.8638 - loss: 0.3896 - val_accuracy: 0.8688 - val_loss: 0.