# Daniel Adegoke - Lab Evaluation #4

### Load the dataset from Keras

In [1]:

from keras.datasets import cifar10
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline

Using TensorFlow backend.


In [2]:
(x_train, y_train), (x_test, y_test) = cifar10.load_data()

Downloading data from https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz


### Data Exploration

In [3]:
print('Training data shape:', x_train.shape)
print('Testing data shape:', x_test.shape)

Training data shape: (50000, 32, 32, 3)
Testing data shape: (10000, 32, 32, 3)


In [4]:
y_train.shape,y_test.shape

((50000, 1), (10000, 1))

In [5]:
# Find the unique numbers from the train labels
classes = np.unique(y_train)
nClasses = len(classes)
print('Total number of outputs : ', nClasses)
print('Output classes : ', classes)

Total number of outputs :  10
Output classes :  [0 1 2 3 4 5 6 7 8 9]


In [6]:
label_dict = {
 0: 'airplane',
 1: 'automobile',
 2: 'bird',
 3: 'cat',
 4: 'deer',
 5: 'dog',
 6: 'frog',
 7: 'horse',
 8: 'ship',
 9: 'truck',
}

In [7]:
x_train = x_train/255.0

In [8]:
x_train_flat = x_train.reshape(-1,3072)

In [9]:
feat_cols = ['pixel'+str(i) for i in range(x_train_flat.shape[1])]

In [10]:
df_cifar = pd.DataFrame(x_train_flat,columns=feat_cols)

In [11]:
df_cifar['label'] = y_train
print('Size of the dataframe: {}'.format(df_cifar.shape))

Size of the dataframe: (50000, 3073)


In [12]:
df_cifar.head()

Unnamed: 0,pixel0,pixel1,pixel2,pixel3,pixel4,pixel5,pixel6,pixel7,pixel8,pixel9,...,pixel3063,pixel3064,pixel3065,pixel3066,pixel3067,pixel3068,pixel3069,pixel3070,pixel3071,label
0,0.231373,0.243137,0.247059,0.168627,0.180392,0.176471,0.196078,0.188235,0.168627,0.266667,...,0.847059,0.721569,0.54902,0.592157,0.462745,0.329412,0.482353,0.360784,0.282353,6
1,0.603922,0.694118,0.733333,0.494118,0.537255,0.533333,0.411765,0.407843,0.372549,0.4,...,0.560784,0.521569,0.545098,0.560784,0.52549,0.556863,0.560784,0.521569,0.564706,9
2,1.0,1.0,1.0,0.992157,0.992157,0.992157,0.992157,0.992157,0.992157,0.992157,...,0.305882,0.333333,0.32549,0.309804,0.333333,0.32549,0.313725,0.337255,0.329412,9
3,0.109804,0.098039,0.039216,0.145098,0.133333,0.07451,0.14902,0.137255,0.078431,0.164706,...,0.211765,0.184314,0.109804,0.247059,0.219608,0.145098,0.282353,0.254902,0.180392,4
4,0.666667,0.705882,0.776471,0.658824,0.698039,0.768627,0.694118,0.72549,0.796078,0.717647,...,0.294118,0.309804,0.321569,0.278431,0.294118,0.305882,0.286275,0.301961,0.313725,1


### Principal components

In [13]:
from sklearn.decomposition import PCA
pca_cifar = PCA(n_components=2)
principalComponents_cifar = pca_cifar.fit_transform(df_cifar.iloc[:,:-1])

In [14]:
principal_cifar_Df = pd.DataFrame(data = principalComponents_cifar
             , columns = ['principal component 1', 'principal component 2'])
principal_cifar_Df['y'] = y_train

In [15]:
principal_cifar_Df.head()

Unnamed: 0,principal component 1,principal component 2,y
0,-6.401018,2.729039,6
1,0.829783,-0.949943,9
2,7.7302,-11.522102,9
3,-10.347817,0.010738,4
4,-2.625651,-4.96924,1


In [16]:
print('Explained variation per principal component: {}'.format(pca_cifar.explained_variance_ratio_))

Explained variation per principal component: [0.2907663  0.11253144]


In [17]:
x_test = x_test/255.0

In [18]:
x_test = x_test.reshape(-1,32,32,3)

In [19]:
x_test_flat = x_test.reshape(-1,3072)

In [20]:
pca = PCA(0.9)

In [21]:
pca.fit(x_train_flat)

PCA(copy=True, iterated_power='auto', n_components=0.9, random_state=None,
    svd_solver='auto', tol=0.0, whiten=False)

In [22]:
pca.n_components_

99

In [23]:
train_img_pca = pca.transform(x_train_flat)
test_img_pca = pca.transform(x_test_flat)

# Exercise

In [24]:
from keras.models import Sequential
from keras.layers import Dense
from keras.utils import np_utils
from keras.optimizers import RMSprop
from keras.models import Model
from keras.layers import *
from keras import optimizers

In [25]:
y_train = np_utils.to_categorical(y_train)
y_test = np_utils.to_categorical(y_test)

# Part-1 
## Instruction 
Fit a Fully connected NN with 4 hidden layer with number of neurons in the 1st hidden layer=1024, 2nd hidden layer=1024, 3rd hidden layer=512, 4th hidden layer=256 on the PCA transformed dataset.

Use the optimizer as RMSprop, batch_size= 128, epochs=20, learning_rate=0.001

In [26]:
# Input Parameters
n_input = 99 # number of features
n_hidden_1 = 1024
n_hidden_2 = 1024
n_hidden_3 = 512
n_hidden_4 = 256
num_digits = 10

In [27]:
Inp = Input(shape=(99,))
x1 = Dense(n_hidden_1, activation='relu', name = "Hidden_Layer_1")(Inp)
x2 = Dense(n_hidden_2, activation='relu', name = "Hidden_Layer_2")(x1)
x3 = Dense(n_hidden_3, activation='relu', name = "Hidden_Layer_3")(x2)
x4 = Dense(n_hidden_4, activation='relu', name = "Hidden_Layer_4")(x3)
output = Dense(num_digits, activation='softmax', name = "Output_Layer")(x4)

In [28]:
model = Model(Inp, output)
model.summary() 

Model: "model_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         (None, 99)                0         
_________________________________________________________________
Hidden_Layer_1 (Dense)       (None, 1024)              102400    
_________________________________________________________________
Hidden_Layer_2 (Dense)       (None, 1024)              1049600   
_________________________________________________________________
Hidden_Layer_3 (Dense)       (None, 512)               524800    
_________________________________________________________________
Hidden_Layer_4 (Dense)       (None, 256)               131328    
_________________________________________________________________
Output_Layer (Dense)         (None, 10)                2570      
Total params: 1,810,698
Trainable params: 1,810,698
Non-trainable params: 0
_________________________________________________

In [29]:
learning_rate = 0.001
training_epochs = 20
batch_size = 128


In [30]:
RMS = optimizers.RMSprop(learning_rate, rho=0.9)

In [31]:
model.compile(loss='categorical_crossentropy',
              optimizer=RMS,
              metrics=['accuracy'])

In [32]:
history1 = model.fit(train_img_pca, y_train,
                     batch_size = batch_size,
                     epochs = training_epochs,
                     verbose = 2,
                     validation_data=(test_img_pca, y_test))

Train on 50000 samples, validate on 10000 samples
Epoch 1/20
 - 10s - loss: 1.6334 - accuracy: 0.4210 - val_loss: 1.4435 - val_accuracy: 0.4843
Epoch 2/20
 - 10s - loss: 1.3242 - accuracy: 0.5306 - val_loss: 1.3532 - val_accuracy: 0.5232
Epoch 3/20
 - 9s - loss: 1.1483 - accuracy: 0.5904 - val_loss: 1.2809 - val_accuracy: 0.5472
Epoch 4/20
 - 10s - loss: 0.9862 - accuracy: 0.6482 - val_loss: 1.3715 - val_accuracy: 0.5411
Epoch 5/20
 - 10s - loss: 0.8379 - accuracy: 0.7008 - val_loss: 1.3434 - val_accuracy: 0.5510
Epoch 6/20
 - 9s - loss: 0.6963 - accuracy: 0.7491 - val_loss: 1.6857 - val_accuracy: 0.5595
Epoch 7/20
 - 10s - loss: 0.5677 - accuracy: 0.7944 - val_loss: 1.5873 - val_accuracy: 0.5625
Epoch 8/20
 - 9s - loss: 0.4677 - accuracy: 0.8346 - val_loss: 1.8941 - val_accuracy: 0.5462
Epoch 9/20
 - 10s - loss: 0.3929 - accuracy: 0.8621 - val_loss: 2.0101 - val_accuracy: 0.5568
Epoch 10/20
 - 10s - loss: 0.3398 - accuracy: 0.8841 - val_loss: 2.0706 - val_accuracy: 0.5558
Epoch 11/20


# Part-2
## Instruction 
Fit a Fully connected NN with 4 hidden layer with number of neurons in the 1st hidden layer=1024, 2nd hidden layer=1024, 3rd hidden layer=512, 4th hidden layer=256 on the original dataset.

Use the optimizer as RMSprop, batch_size= 128, epochs=20, learning_rate=0.001

In [60]:
# Input Parameters
n_input = 99 # number of features
n_hidden_1 = 1024
n_hidden_2 = 1024
n_hidden_3 = 512
n_hidden_4 = 256
num_digits = 10

In [61]:
x_train_flat.shape

(50000, 3072)

In [62]:
Inp = Input(shape=(3072,))
x1 = Dense(n_hidden_1, activation='relu', name = "Hidden_Layer_1")(Inp)
x2 = Dense(n_hidden_2, activation='relu', name = "Hidden_Layer_2")(x1)
x3 = Dense(n_hidden_3, activation='relu', name = "Hidden_Layer_3")(x2)
x4 = Dense(n_hidden_4, activation='relu', name = "Hidden_Layer_4")(x3)
output = Dense(num_digits, activation='softmax', name = "Output_Layer")(x4)

In [63]:
model = Model(Inp, output)
model.summary() 

Model: "model_3"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_6 (InputLayer)         (None, 3072)              0         
_________________________________________________________________
Hidden_Layer_1 (Dense)       (None, 1024)              3146752   
_________________________________________________________________
Hidden_Layer_2 (Dense)       (None, 1024)              1049600   
_________________________________________________________________
Hidden_Layer_3 (Dense)       (None, 512)               524800    
_________________________________________________________________
Hidden_Layer_4 (Dense)       (None, 256)               131328    
_________________________________________________________________
Output_Layer (Dense)         (None, 10)                2570      
Total params: 4,855,050
Trainable params: 4,855,050
Non-trainable params: 0
_________________________________________________

In [64]:
learning_rate = 0.001
training_epochs = 20
batch_size = 128

In [65]:
RMS = optimizers.RMSprop(learning_rate, rho=0.9)

In [66]:
model.compile(loss='categorical_crossentropy',
              optimizer=RMS,
              metrics=['accuracy'])

In [69]:
history1 = model.fit(x_train_flat, y_train,
                     batch_size = batch_size,
                     epochs = training_epochs,
                     verbose = 2,
                     validation_data=(x_test_flat, y_test))

Train on 50000 samples, validate on 10000 samples
Epoch 1/20
 - 22s - loss: 2.1844 - accuracy: 0.2372 - val_loss: 1.8840 - val_accuracy: 0.3113
Epoch 2/20
 - 22s - loss: 1.8377 - accuracy: 0.3388 - val_loss: 1.7727 - val_accuracy: 0.3531
Epoch 3/20
 - 22s - loss: 1.7369 - accuracy: 0.3763 - val_loss: 1.9131 - val_accuracy: 0.3316
Epoch 4/20
 - 22s - loss: 1.6686 - accuracy: 0.4029 - val_loss: 1.8241 - val_accuracy: 0.3728
Epoch 5/20
 - 23s - loss: 1.6120 - accuracy: 0.4218 - val_loss: 1.7142 - val_accuracy: 0.3938
Epoch 6/20
 - 22s - loss: 1.5721 - accuracy: 0.4384 - val_loss: 1.5603 - val_accuracy: 0.4523
Epoch 7/20
 - 23s - loss: 1.5273 - accuracy: 0.4535 - val_loss: 1.5892 - val_accuracy: 0.4321
Epoch 8/20
 - 23s - loss: 1.5041 - accuracy: 0.4639 - val_loss: 1.5198 - val_accuracy: 0.4648
Epoch 9/20
 - 22s - loss: 1.4765 - accuracy: 0.4743 - val_loss: 1.6349 - val_accuracy: 0.4210
Epoch 10/20
 - 23s - loss: 1.4447 - accuracy: 0.4848 - val_loss: 1.9170 - val_accuracy: 0.3744
Epoch 11/

# Part-3
## Write a conclusion on the above two models
For example is any of the model overfitting, if so why? Which model is better?

The first model is overfitted with an accuracy of 94% , the the 2nd model has an accuracy of 54.8%, therefore PCA dataset is overfitted by the model. It performs very well on preparing dataset however inadequately on the vadildation dataset. 

However, Using the orginal dataset, the model performs well on the training dataset and the validation dataset also. In totality, this model is better.