In [None]:
import numpy as np
import tensorflow as tf
from tensorflow.python.keras import regularizers
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from keras import models, layers, optimizers

from keras import datasets

# Dividing the cifar100 fine data into train and test sets
(X_train, y_train), (X_test, y_test) = tf.keras.datasets.cifar100.load_data(label_mode='fine')
print(X_train.shape)
print(X_test.shape)
print(y_train.shape)
print(y_test.shape)

(50000, 32, 32, 3)
(10000, 32, 32, 3)
(50000, 1)
(10000, 1)


Dataset Division: Divide the training dataset into two parts: a sub-
training set and a validation set. Allocate randomly 1
5 of the training dataset to
serve as the validation set.

In [None]:
X_sub_train, X_val, y_sub_train, y_val = train_test_split(X_train, y_train, test_size=0.2, shuffle=True)
# dividing the train data into sub train and validations 0.2 randomly

Label Prediction Requirement: Ensure that your model is designed to predict the
“fine” label (class) rather than the “coarse” label (superclass). Important: Your
model must predict the “fine” label to be considered for grading; predicting the
“coarse” label will result in a score of zero

Model Selection: Using the sub-training and validation sets, identify
the three most effective models



In [None]:

def AlexNet():
    model = models.Sequential([
        layers.Conv2D(64, (3, 3), strides=(1,1), padding = 'same', activation='relu', input_shape=(32,32,3)),
        #input shape 32x32x3 to match the dataset, 64 filter size was chosen to
        #get an increased basic featurs, kept padding the same through out the
        #model Stride 1x1 to get every pixel relu out performed with tanh and sigmoid
        layers.Conv2D(192, (3, 3), padding = 'same', activation='relu', kernel_regularizer=regularizers.l2(1e-4)), #added L2 regularizers to improve overfitting
        # increased filter to 192 enabled more complex features
        layers.MaxPool2D(pool_size= (2,2), strides=(2,2)),
        layers.Dropout(0.2),
        layers.Conv2D(384, (3, 3), padding = 'same', activation='relu', kernel_regularizer=regularizers.l2(1e-4)),
        layers.Conv2D(384, (3, 3), padding = 'same', activation='relu', kernel_regularizer=regularizers.l2(1e-4)),
        layers.MaxPool2D(pool_size= (2,2), strides=(2,2)),
        layers.Dropout(0.2), # added dropout to help wiht overfitting
        layers.Flatten(), # flatten turn 2D feature map int 1D
        layers.Dense(4096, activation='relu'),
        layers.Dense(4096, activation='relu'),
        layers.Dense(100, activation='softmax') # 100 for the multi class classification and softmax


    ])
    return model

In [None]:
rmsprop_optimizer = tf.keras.optimizers.RMSprop(learning_rate=1e-4)

#adam_optimizer = tf.keras.optimizers.Adam(learning_rate=1e-4)

#sgd_optimizer = tf.keras.optimizers.SGD(learning_rate=1e-4)
# three different optimizers i tried with RMSprop learning_rate=1e-4 being the best

cnn_model1 = AlexNet()
# I read sparse_categorical_crossentropy is best used in is best used for classification for reducing memory issue which i ran into
cnn_model1.compile(loss='sparse_categorical_crossentropy', optimizer = rmsprop_optimizer, metrics=['accuracy'])
#history = cnn_model1.fit(X_sub_train, y_sub_train, epochs=10, batch_size=64, validation_data=(X_val, y_val))
history = cnn_model1.fit(X_train, y_train, epochs=10, batch_size=64)

acc = history.history['accuracy']
val_acc = history.history['val_accuracy']
loss = history.history['loss']
val_loss = history.history['val_loss']

epochs = range(1, len(acc) + 1)

plt.plot(epochs, acc, 'bo', label='Training acc')
plt.plot(epochs, val_acc, 'b', label='Validation acc')
plt.title('Training and validation accuracy')
plt.legend()

plt.figure()

plt.plot(epochs, loss, 'bo', label='Training loss')
plt.plot(epochs, val_loss, 'b', label='Validation loss')
plt.title('Training and validation loss')
plt.legend()

plt.show()

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


KeyError: ignored

In [None]:
test_model1=cnn_model1.evaluate(X_test,y_test)
cnn_model1.summary()

Model: "sequential_8"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d_65 (Conv2D)          (None, 32, 32, 64)        1792      
                                                                 
 conv2d_66 (Conv2D)          (None, 32, 32, 192)       110784    
                                                                 
 max_pooling2d_29 (MaxPooli  (None, 16, 16, 192)       0         
 ng2D)                                                           
                                                                 
 dropout_31 (Dropout)        (None, 16, 16, 192)       0         
                                                                 
 conv2d_67 (Conv2D)          (None, 16, 16, 384)       663936    
                                                                 
 conv2d_68 (Conv2D)          (None, 16, 16, 384)       1327488   
                                                      

In [None]:


def vgg(): #vgg was used as a base structure for my second model
            # it gave me great place to start with for focusing on filters
    model = models.Sequential([ #filter sizes are similar to vgg, when testing i kept running into overfitting so i added L2 regularizers
        layers.Conv2D(64, (3, 3), padding = 'same', activation='relu', kernel_regularizer=regularizers.l2(1e-4), input_shape=(32,32,3)),
        layers.Conv2D(64, (3, 3), padding = 'same', activation='relu', kernel_regularizer=regularizers.l2(1e-4)),
        layers.MaxPool2D(pool_size= (2,2), strides=(2,2)),
        layers.Dropout(0.2), #added dropout at 0.2 to reduce overfitting and 0.2 was chosen becuase others performed worst
        layers.Conv2D(128, (3, 3), padding = 'same', activation='relu', kernel_regularizer=regularizers.l2(1e-4)),
        layers.Conv2D(128, (3, 3), padding = 'same', activation='relu', kernel_regularizer=regularizers.l2(1e-4)),
        # using conv2d, conv2d, maxpool2d, dropout, i wanted to double to filters
        # from 64, 128, 256, 512 to keep increasing network complex features
        layers.MaxPool2D(pool_size= (2,2), strides=(2,2)),
        layers.Dropout(0.2),
        layers.Conv2D(256, (3, 3), padding = 'same', activation='relu', kernel_regularizer=regularizers.l2(1e-4)),
        layers.Conv2D(256, (3, 3), padding = 'same', activation='relu', kernel_regularizer=regularizers.l2(1e-4)),
        layers.Conv2D(256, (3, 3), padding = 'same', activation='relu', kernel_regularizer=regularizers.l2(1e-4)),
        layers.MaxPool2D(pool_size= (2,2), strides=(2,2)),
        layers.Dropout(0.2),
        layers.Conv2D(512, (3, 3), padding = 'same', activation='relu', kernel_regularizer=regularizers.l2(1e-4)),
        layers.Conv2D(512, (3, 3), padding = 'same', activation='relu', kernel_regularizer=regularizers.l2(1e-4)),
        layers.Conv2D(512, (3, 3), padding = 'same', activation='relu', kernel_regularizer=regularizers.l2(1e-4)),
        layers.MaxPool2D(pool_size= (2,2), strides=(2,2)),
        layers.Dropout(0.2),
        #add one more block but with tanh as an activation because i gained 2% in peformance
        layers.Conv2D(512, (3, 3), padding = 'same', activation='tanh', kernel_regularizer=regularizers.l2(1e-4)),
        layers.Conv2D(512, (3, 3), padding = 'same', activation='tanh', kernel_regularizer=regularizers.l2(1e-4)),
        layers.Conv2D(512, (3, 3), padding = 'same', activation='tanh', kernel_regularizer=regularizers.l2(1e-4)),
        layers.MaxPool2D(pool_size= (2,2), strides=(2,2)),
        layers.Dropout(0.2),
        layers.Flatten(),
        layers.Dense(4096, activation='relu'),
        layers.Dense(4096, activation='relu'),
        layers.Dense(100, activation='softmax')


    ])
    return model
rmsprop_optimizer = tf.keras.optimizers.RMSprop(learning_rate=1e-4)

#adam_optimizer = tf.keras.optimizers.Adam(learning_rate=1e-4)

#sgd_optimizer = tf.keras.optimizers.SGD(learning_rate=1e-4)


cnn_model2 = vgg()
cnn_model2.compile(loss='sparse_categorical_crossentropy', optimizer = rmsprop_optimizer, metrics=['accuracy'])
#history = cnn_model2.fit(X_sub_train, y_sub_train, epochs=10, batch_size=128, validation_data=(X_val, y_val))
history = cnn_model2.fit(X_train, y_train, epochs=10, batch_size=128)
test_model2=cnn_model2.evaluate(X_test,y_test)
cnn_model2.summary()

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Model: "sequential_9"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d_69 (Conv2D)          (None, 32, 32, 64)        1792      
                                                                 
 conv2d_70 (Conv2D)          (None, 32, 32, 64)        36928     
                                                                 
 max_pooling2d_31 (MaxPooli  (None, 16, 16, 64)        0         
 ng2D)                                                           
                                                                 
 dropout_33 (Dropout)        (None, 16, 16, 64)        0         
                                                                 
 conv2d_71 (Conv2D)          (None, 16, 16, 128)       73856     
                                                                 
 conv2d_7

In [None]:

acc = history.history['accuracy']
val_acc = history.history['val_accuracy']
loss = history.history['loss']
val_loss = history.history['val_loss']

epochs = range(1, len(acc) + 1)

plt.plot(epochs, acc, 'bo', label='Training acc')
plt.plot(epochs, val_acc, 'b', label='Validation acc')
plt.title('Training and validation accuracy')
plt.legend()

plt.figure()

plt.plot(epochs, loss, 'bo', label='Training loss')
plt.plot(epochs, val_loss, 'b', label='Validation loss')
plt.title('Training and validation loss')
plt.legend()

plt.show()

In [None]:

def create_model3(): # this model was created from what i learned from the previous two models
                    # i noticed L2 regularizers decreased overfitting
                    # adding BatchNormalization help with my issue of training plateaus
                    # kept relu as the activation as it peformed best
    model = models.Sequential([
        layers.Conv2D(32, (3, 3), padding='same', kernel_regularizer=regularizers.l2(1e-4), input_shape=(32, 32, 3), activation='relu'),
        layers.BatchNormalization(),
        layers.Conv2D(32, (3, 3), padding='same', kernel_regularizer=regularizers.l2(1e-4), activation='relu'),
        layers.BatchNormalization(),
        layers.MaxPooling2D(pool_size=(2, 2)),
        layers.Dropout(0.2),

        layers.Conv2D(128, (3, 3), padding='same', kernel_regularizer=regularizers.l2(1e-4), activation='relu'),
        layers.BatchNormalization(),
        layers.Conv2D(128, (3, 3), padding='same', kernel_regularizer=regularizers.l2(1e-4), activation='relu'),
        layers.BatchNormalization(),
        layers.MaxPooling2D(pool_size=(2, 2)),
        layers.Dropout(0.2),

        layers.Conv2D(256, (3, 3), padding='same', kernel_regularizer=regularizers.l2(1e-4), activation='relu'),
        layers.BatchNormalization(),
        layers.Conv2D(256, (3, 3), padding='same', kernel_regularizer=regularizers.l2(1e-4), activation='relu'),
        layers.BatchNormalization(),
        layers.MaxPooling2D(pool_size=(2, 2)),
        layers.Dropout(0.2),

        layers.Conv2D(512, (3, 3), padding='same', kernel_regularizer=regularizers.l2(1e-4), activation='relu'),
        layers.BatchNormalization(),
        layers.MaxPooling2D(pool_size=(2, 2)),
        layers.Dropout(0.2),

        layers.Flatten(),
        layers.Dense(512, kernel_regularizer=regularizers.l2(1e-4), activation='relu'),
        layers.Dropout(0.2),
        layers.Dense(100, activation='softmax')
    ])
    return model
rmsprop_optimizer = tf.keras.optimizers.RMSprop(learning_rate=1e-4)

#adam_optimizer = tf.keras.optimizers.Adam(learning_rate=1e-4)

#sgd_optimizer = tf.keras.optimizers.SGD(learning_rate=1e-4)
cnn_model3 = create_model3()
cnn_model3.compile(loss='sparse_categorical_crossentropy', optimizer = rmsprop_optimizer, metrics=['accuracy'])
#history = cnn_model3.fit(X_sub_train, y_sub_train, epochs=10, batch_size=128, validation_data=(X_val, y_val))
history = cnn_model3.fit(X_train, y_train, epochs=10, batch_size=128)
test_model3=cnn_model3.evaluate(X_test,y_test)
cnn_model3.summary()

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Model: "sequential_10"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d_82 (Conv2D)          (None, 32, 32, 32)        896       
                                                                 
 batch_normalization_14 (Ba  (None, 32, 32, 32)        128       
 tchNormalization)                                               
                                                                 
 conv2d_83 (Conv2D)          (None, 32, 32, 32)        9248      
                                                                 
 batch_normalization_15 (Ba  (None, 32, 32, 32)        128       
 tchNormalization)                                               
                                                                 
 max_pooling2d_36 (MaxPooli  (None, 16, 16, 32)        0         
 ng2D)  

In [None]:
acc = history.history['accuracy']
val_acc = history.history['val_accuracy']
loss = history.history['loss']
val_loss = history.history['val_loss']

epochs = range(1, len(acc) + 1)

plt.plot(epochs, acc, 'bo', label='Training acc')
plt.plot(epochs, val_acc, 'b', label='Validation acc')
plt.title('Training and validation accuracy')
plt.legend()

plt.figure()

plt.plot(epochs, loss, 'bo', label='Training loss')
plt.plot(epochs, val_loss, 'b', label='Validation loss')
plt.title('Training and validation loss')
plt.legend()

plt.show()