In [1]:
import keras
from keras import layers
from keras import ops
(x_train, y_train), (x_test, y_test) = keras.datasets.cifar10.load_data()



# CIFAR 10

Initially I wanted to do CIFAR 100, but I realized with that many labels, I would need a bigger model and I would end up just waiting in front of the computer to train. To reach a moderate performance faster I settled for CIFAR 10. This is a still a much harder classification problem than fashion MNIST, so hopefully, I will learn something while optimizing this. These are the three models I'm comparing.

1. Just mlp
2. 4 Convolutional layer (+ batch normalization + pooling) + mlp
3. with pretrained model (resnet)

### MLP implementation

Reaches around 28 percent accuracy in under a minute of training

In [8]:
mlp_model = keras.Sequential([
    keras.Input(shape=(32,32,3)),
    layers.Flatten(),
    layers.Rescaling(1/255.0),
    layers.Dense(1024, activation='relu'),
    layers.Dense(256, activation='relu'),
    layers.Dense(128, activation='relu'),
    layers.Dropout(0.5),
    layers.Dense(10),
])

mlp_model.compile(
    loss = keras.losses.SparseCategoricalCrossentropy(from_logits=True),
    optimizer = keras.optimizers.RMSprop(),
    metrics = ['accuracy']
)

mlp_model.summary()

history = mlp_model.fit(
    x_train,
    y_train,
    batch_size=64,
    epochs = 6,
    validation_split=0.15,
)

test_scores = mlp_model.evaluate(x_test,y_test, verbose=1)
print(f'Test Loss: {test_scores[0]}')
print(f'Test Accuracy: {test_scores[1]}')


Epoch 1/6
[1m665/665[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 11ms/step - accuracy: 0.1849 - loss: 2.3581 - val_accuracy: 0.2549 - val_loss: 2.0392
Epoch 2/6
[1m665/665[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 11ms/step - accuracy: 0.3261 - loss: 1.8813 - val_accuracy: 0.2835 - val_loss: 2.1977
Epoch 3/6
[1m665/665[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 10ms/step - accuracy: 0.3691 - loss: 1.7824 - val_accuracy: 0.3519 - val_loss: 1.7947
Epoch 4/6
[1m665/665[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 10ms/step - accuracy: 0.3918 - loss: 1.7143 - val_accuracy: 0.3009 - val_loss: 2.0199
Epoch 5/6
[1m665/665[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 10ms/step - accuracy: 0.4039 - loss: 1.6863 - val_accuracy: 0.3351 - val_loss: 1.9499
Epoch 6/6
[1m665/665[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 10ms/step - accuracy: 0.4160 - loss: 1.6510 - val_accuracy: 0.3304 - val_loss: 2.1488
[1m313/313[0m [32m━━━━━━━

### Simple CNN

Interestingly, this model has less than half the parameters (in memory) as the previous one. It gets 47 percent accuracy in around 4 minutes of training

In [16]:
simple_cnn_model = keras.Sequential([
    keras.Input(shape=(32,32,3)),
    layers.Rescaling(1/255.0),
    layers.Conv2D(filters = 16, kernel_size=(3,3), padding='same'),
    layers.BatchNormalization(),
    layers.Activation(keras.activations.relu),
    layers.MaxPooling2D(pool_size=(2,2)),
    layers.Conv2D(filters = 32, kernel_size=(3,3), padding='same'),
    layers.BatchNormalization(),
    layers.Activation(keras.activations.relu),
    layers.MaxPooling2D(pool_size=(2,2)),
    layers.Conv2D(filters = 64, kernel_size=(3,3), padding='same'),
    layers.BatchNormalization(),
    layers.Activation(keras.activations.relu),
    layers.MaxPooling2D(pool_size=(2,2)),
    layers.Conv2D(filters = 128, kernel_size=(3,3), padding='same'),
    layers.BatchNormalization(),
    layers.Activation(keras.activations.relu),
    
    layers.Flatten(),
    # layers.Dense(128, activation='relu'),
    layers.Dense(64, activation='relu'),
    layers.Dropout(.5),
    layers.Dense(10),
])
simple_cnn_model.compile(
    loss = keras.losses.SparseCategoricalCrossentropy(from_logits=True),
    optimizer = keras.optimizers.RMSprop(),
    metrics = ['accuracy']
)
simple_cnn_model.summary()

early_stopping_callback = keras.callbacks.EarlyStopping(
    monitor='val_loss',     
    patience=5,      
    restore_best_weights=True 
)

history =simple_cnn_model.fit(
    x_train,
    y_train,
    batch_size=64,
    epochs = 5,
    validation_split=0.15,
    callbacks=[early_stopping_callback]
)

test_scores =simple_cnn_model.evaluate(x_test,y_test, verbose=1)
print(f'Test Loss: {test_scores[0]}')
print(f'Test Accuracy: {test_scores[1]}')

Epoch 1/5
[1m665/665[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 15ms/step - accuracy: 0.2146 - loss: 2.1570 - val_accuracy: 0.3937 - val_loss: 1.6855
Epoch 2/5
[1m665/665[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 15ms/step - accuracy: 0.3506 - loss: 1.7121 - val_accuracy: 0.3672 - val_loss: 1.7767
Epoch 3/5
[1m665/665[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 15ms/step - accuracy: 0.4225 - loss: 1.5501 - val_accuracy: 0.5253 - val_loss: 1.3099
Epoch 4/5
[1m665/665[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 15ms/step - accuracy: 0.4637 - loss: 1.4543 - val_accuracy: 0.5160 - val_loss: 1.3941
Epoch 5/5
[1m665/665[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 15ms/step - accuracy: 0.5025 - loss: 1.3652 - val_accuracy: 0.5393 - val_loss: 1.2643
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 0.5424 - loss: 1.2700
Test Loss: 1.2753117084503174
Test Accuracy: 0.5379999876022339


### _Sparkly_ CNN ✨
This one has:
1. Resnet trained from scratch (no transfer learning yet)
2. Data Augmentation Layer
3. Stride for downsampling instead of pooling (which I heard is better)

In [19]:
data_augmentation = keras.Sequential([
  layers.RandomFlip("horizontal"),
  layers.RandomRotation(0.1),
  layers.RandomZoom(0.1),
])

inputs = keras.Input(shape=(32,32,3))
x = data_augmentation(inputs)
x = layers.Rescaling(1/255.0)(x)

# first resnet block
y = layers.Conv2D(filters = 16, kernel_size=(3,3), strides=(2,2), padding='same')(x)
x = layers.Conv2D(filters = 16, kernel_size=(1,1),strides = (2,2), padding='same')(x)
y = layers.BatchNormalization()(y)
y = layers.Activation(keras.activations.relu)(y)
y = layers.Conv2D(filters = 16, kernel_size=(3,3), padding='same')(y)
y = layers.BatchNormalization()(y)
x = layers.Activation(keras.activations.relu)(x + y)

# second resnet block
y = layers.Conv2D(filters = 32, kernel_size=(3,3), strides=(2,2),padding='same')(x)
x = layers.Conv2D(filters = 32, kernel_size=(1,1), strides = (2,2),padding='same')(x)
y = layers.BatchNormalization()(y)
y = layers.Activation(keras.activations.relu)(y)
y = layers.Conv2D(filters = 32, kernel_size=(3,3), padding='same')(y)
y = layers.BatchNormalization()(y)
x = layers.Activation(keras.activations.relu)(x + y)

# third resnet block
y = layers.Conv2D(filters = 64, kernel_size=(3,3),strides = (2,2), padding='same')(x)
x = layers.Conv2D(filters = 64, kernel_size=(1,1),strides = (2,2), padding='same')(x)
y = layers.BatchNormalization()(y)
y = layers.Activation(keras.activations.relu)(y)
y = layers.Conv2D(filters = 64, kernel_size=(3,3), padding='same')(y)
y = layers.BatchNormalization()(y)
x = layers.Activation(keras.activations.relu)(x + y)

x = layers.Flatten()(x)
# x = layers.Dense(128, activation='relu')(x)
x = layers.Dense(64, activation='relu')(x)
x = layers.Dropout(.5)(x)
x = layers.Dense(32, activation='relu')(x)
x = layers.Dropout(.5)(x)

outputs = layers.Dense(10)(x)

sparkly_cnn = keras.Model(inputs = inputs, outputs = outputs)

sparkly_cnn.compile(
    loss = keras.losses.SparseCategoricalCrossentropy(from_logits=True),
    optimizer = keras.optimizers.RMSprop(),
    metrics = ['accuracy']
)
sparkly_cnn.summary()

history =sparkly_cnn.fit(
    x_train,
    y_train,
    batch_size=64,
    epochs = 5,
    validation_split=0.15,
)

test_scores =sparkly_cnn.evaluate(x_test,y_test, verbose=1)
print(f'Test Loss: {test_scores[0]}')
print(f'Test Accuracy: {test_scores[1]}')

Epoch 1/5
[1m665/665[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 19ms/step - accuracy: 0.1688 - loss: 2.2160 - val_accuracy: 0.3021 - val_loss: 1.8759
Epoch 2/5
[1m665/665[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 19ms/step - accuracy: 0.2908 - loss: 1.8937 - val_accuracy: 0.2852 - val_loss: 1.9278
Epoch 3/5
[1m665/665[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 20ms/step - accuracy: 0.3538 - loss: 1.7407 - val_accuracy: 0.3657 - val_loss: 1.6990
Epoch 4/5
[1m665/665[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 19ms/step - accuracy: 0.4061 - loss: 1.6464 - val_accuracy: 0.3584 - val_loss: 1.7202
Epoch 5/5
[1m665/665[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 20ms/step - accuracy: 0.4256 - loss: 1.5884 - val_accuracy: 0.4079 - val_loss: 1.8711
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step - accuracy: 0.4155 - loss: 1.8665
Test Loss: 1.8746784925460815
Test Accuracy: 0.4163999855518341


### CNN with tranfer learning

In [5]:
inputs = keras.Input(shape=(32,32,3))
x = keras.layers.UpSampling2D(size=(7,7))(inputs)
# data_augmentation = keras.Sequential([
#   layers.RandomFlip("horizontal"),
#   layers.RandomRotation(0.1),
#   layers.RandomZoom(0.1),
# ])
# x = data_augmentation(inputs)
x = keras.applications.resnet_v2.preprocess_input(x)
base_model = keras.applications.ResNet50V2(
    include_top = False,
    weights = "imagenet",
    input_shape = (224,224,3)
)
base_model.trainable = False
outputs = base_model(x)

# This model turns images into feature vectors
feature_extractor = keras.Model(inputs, outputs)

x_train_features = feature_extractor.predict(x_train, batch_size=64, verbose=1)

print("Extracting features from the testing set...")
x_test_features = feature_extractor.predict(x_test, batch_size=64, verbose=1)

classifier_head = keras.Sequential([
    # We need to know the shape of our features for the Input layer
    keras.Input(shape=x_train_features.shape[1:]),
    layers.Flatten(),
    layers.Dense(256, activation='relu'),
    layers.Dense(256, activation='relu'),
    layers.Dense(128, activation='relu'),
    layers.Dropout(0.5),
    layers.Dense(10) # Same output layer
])

classifier_head.compile(
    loss = keras.losses.SparseCategoricalCrossentropy(from_logits=True),
    optimizer = keras.optimizers.RMSprop(),
    metrics = ['accuracy']
)

early_stopping_callback = keras.callbacks.EarlyStopping(
    monitor='val_loss',     
    patience=5,      
    restore_best_weights=True 
)

history = classifier_head.fit(
    x_train_features,
    y_train,
    batch_size=64,
    epochs = 40,
    validation_split=0.15,
    callbacks=[early_stopping_callback]
)

test_scores 
classifier_head.evaluate(x_test_features,y_test, verbose=1)
print(f'Test Loss: {test_scores[0]}')
print(f'Test Accuracy: {test_scores[1]}')

[1m 52/782[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m16:12[0m 1s/step

KeyboardInterrupt: 

## Takeaways

Ok, so after having gone through transformers, I feel like these image classification models are architecturally a lot simpler. So ... this CNN module is def my least favorite so far. My models such and guessing and checking number of layers and hyperparameters is not making it any better in a time I'm willing to tolerate. For later, I will add another lesson to this module tackling this same problem but detouring on the boring optimization stuff that I didn't want to get to (checks for overfitting, more data augmentation, visualizing data, confusion matrix, etc)