In [1]:
import numpy as np
import pandas as pd
from scipy.io import loadmat
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import datasets, layers, models
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Activation, Dropout, Flatten, Dense
from tensorflow.keras.layers import BatchNormalization
from tensorflow.keras.callbacks import LearningRateScheduler
import tensorflow.keras.backend as K
import matplotlib.pyplot as plt

from skimage.feature import hog
from skimage.color import rgb2grey
import cv2 as cv

from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.svm import SVC
from sklearn.metrics import roc_curve, auc

  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])


In [3]:
matTr = loadmat('train_32x32.mat')
matTe = loadmat('test_32x32.mat')
# matExt = loadmat('extra_32x32.mat')

# labels are originally in [1,10] and now will be in [0,9]
Xtr, Ytr = matTr['X'], matTr['y']-1
Xte, Yte = matTe['X'], matTe['y']-1
# Xext, Yext = matExt['X'], matExt['y']-1

# changing the dimensions so that the number of the input image is the first
Xtr = np.transpose(Xtr, (3, 0, 1, 2))
Xte = np.transpose(Xte, (3, 0, 1, 2))
# Xext = np.transpose(Xext, (3, 0, 1, 2))

# Xtr_ext = np.concatenate((Xtr,Xext))
# Ytr_ext = np.concatenate((Ytr,Yext))
Xtr, Xte = Xtr / 255.0, Xte / 255.0
# Xtr_ext = Xtr_ext / 255.0
Ytr = np.squeeze(Ytr)
Yte = np.squeeze(Yte)
# Yext = np.squeeze(Yext)

In [4]:
batch_size = 128
epochs = 15
IMG_HEIGHT = 32
IMG_WIDTH = 32
NUM_CHANNEL = 3

In [None]:
# Xtr_gray = np.zeros((len(Xtr),32,32,1))
# for i in range(len(Xtr)):
#     Xtr_gray[i] = tf.image.rgb_to_grayscale(Xtr[i])
Xtr_gray = tf.image.rgb_to_grayscale(Xtr)


In [None]:
Xtr_gray = K.eval(Xtr_gray)

In [None]:
Xtr_bin = np.zeros((len(Xtr_gray),32,32,1))
for i in range(len(Xtr_gray)):
    Xtr_bin[i,:,:,0] = cv.adaptiveThreshold(Xtr_gray[i,:,:,0],255,cv.ADAPTIVE_THRESH_GAUSSIAN_C,cv.THRESH_BINARY,11,2)
#     blur = cv.GaussianBlur(Xtr_gray[i,:,:,0],(5,5),0)
#     _, Xtr_bin[i,:,:,0] = cv.threshold(blur,0,255,cv.THRESH_BINARY+cv.THRESH_OTSU)

```python
ind = 700
titles = ['RGB Image', 'GRAY_SCALE', 'BINARY']
images = [Xtr[ind], Xtr_gray[ind,:,:,0], Xtr_bin[ind,:,:,0]]
for i in range(3):
    plt.subplot(1,3,i+1)
    if i!=0:
        plt.imshow(images[i],'gray')
    else:
        plt.imshow(images[i])
    plt.title(titles[i])
    plt.xticks([]),plt.yticks([])
plt.savefig('images.png',bbox_inches = 'tight')
plt.show()
```

```python
plt.hist(Ytr, ec='k',bins=10)
plt.title('Histogram of Train Data')
plt.savefig('histogram.png',bbox_inches = 'tight')
```

# Experiment 1

In [6]:
nets = 3
model = [0]*nets
history = [0]*nets
for i in range(nets):
    model[i] = Sequential()
    model[i].add(Conv2D(16, kernel_size=5, activation='relu', padding="same", input_shape=(32,32,3)))
    model[i].add(MaxPooling2D())
    if i>0:
        model[i].add(Conv2D(32, kernel_size=5, activation='relu', padding="same"))
        model[i].add(MaxPooling2D())
    if i>1:
        model[i].add(Conv2D(64, kernel_size=5, activation='relu', padding="same"))
        model[i].add(MaxPooling2D(padding='same'))
    model[i].add(Flatten())
    model[i].add(Dense(256, activation='relu'))
    model[i].add(Dense(10, activation='softmax'))
    model[i].compile(optimizer='adam',
                  loss=tf.keras.losses.sparse_categorical_crossentropy,
                  metrics=['accuracy'])
    
X_train2, X_val2, Y_train2, Y_val2 = train_test_split(Xtr, Ytr, test_size = 0.1)
for j in range(nets):
    history[j] = model[j].fit(X_train2, Y_train2, epochs=20, batch_size=64,
                              validation_data = (X_val2, Y_val2), verbose=2)
    print("CNN {0:d}: Epochs={1:d}, Train accuracy={2:.5f}, Validation accuracy={3:.5f}".format(
            j+1,epochs,max(history[j].history['acc']),max(history[j].history['val_acc']) ))

Train on 65931 samples, validate on 7326 samples
Epoch 1/20
 - 4s - loss: 1.0894 - acc: 0.6613 - val_loss: 0.7957 - val_acc: 0.7696
Epoch 2/20
 - 2s - loss: 0.6454 - acc: 0.8130 - val_loss: 0.6697 - val_acc: 0.8079
Epoch 3/20
 - 2s - loss: 0.5414 - acc: 0.8423 - val_loss: 0.5696 - val_acc: 0.8407
Epoch 4/20
 - 2s - loss: 0.4770 - acc: 0.8622 - val_loss: 0.5427 - val_acc: 0.8430
Epoch 5/20
 - 2s - loss: 0.4316 - acc: 0.8742 - val_loss: 0.5231 - val_acc: 0.8537
Epoch 6/20
 - 2s - loss: 0.4035 - acc: 0.8814 - val_loss: 0.5243 - val_acc: 0.8542
Epoch 7/20
 - 2s - loss: 0.3722 - acc: 0.8889 - val_loss: 0.5201 - val_acc: 0.8557
Epoch 8/20
 - 2s - loss: 0.3507 - acc: 0.8958 - val_loss: 0.5108 - val_acc: 0.8557
Epoch 9/20
 - 2s - loss: 0.3270 - acc: 0.9029 - val_loss: 0.5066 - val_acc: 0.8662
Epoch 10/20
 - 2s - loss: 0.3052 - acc: 0.9089 - val_loss: 0.5110 - val_acc: 0.8609
Epoch 11/20
 - 2s - loss: 0.2905 - acc: 0.9121 - val_loss: 0.5409 - val_acc: 0.8544
Epoch 12/20
 - 2s - loss: 0.2768 - a

In [2]:
epochs=20
for j in range(3):
    print("CNN {0:d}: Epochs={1:d}, Train accuracy={2:.5f}, Validation accuracy={3:.5f}".format(
            j+1,epochs,max(history[j].history['acc']),max(history[j].history['val_acc']) ))

NameError: name 'history' is not defined

In [None]:
for i in range(nets):
    val_acc = history[i].history['val_accuracy']
    epochs_range = range(epochs)

    plt.figure(figsize=(8, 8))
    plt.plot(epochs_range, val_acc, label='(C-P)x{}'.format(i+1))
    
plt.xlabel('epoch')
plt.ylabel('accuracy')
plt.title('Model Accuracy')
plt.legend(loc='upper left')
plt.show()

# Experiment 2

In [None]:
nets = 6
model = [0]*nets
history = [0]*nets
for i in range(nets):
    model[i] = Sequential()
    model[i].add(Conv2D(i*8+8, kernel_size=5, activation='relu', padding="same", input_shape=(32,32,3)))
    model[i].add(MaxPooling2D())
    model[i].add(Conv2D(i*16+16, kernel_size=5, activation='relu', padding="same"))
    model[i].add(MaxPooling2D())
    model[i].add(Flatten())
    model[i].add(Dense(256, activation='relu'))
    model[i].add(Dense(10, activation='softmax'))
    model[i].compile(optimizer='adam',
                  loss=tf.keras.losses.sparse_categorical_crossentropy,
                  metrics=['accuracy'])
    
X_train2, X_val2, Y_train2, Y_val2 = train_test_split(Xtr, Ytr, test_size = 0.1)
for j in range(nets):
    history[j] = model[j].fit(X_train2, Y_train2, epochs=20, batch_size=64,
                              validation_data = (X_val2, Y_val2), verbose=2)
    print("CNN {0:d}: Epochs={1:d}, Train accuracy={2:.5f}, Validation accuracy={3:.5f}".format(
            j+1,epochs,max(history[j].history['acc']),max(history[j].history['val_acc']) ))

In [None]:
epochs=20
for j in range(nets):
    print("CNN {0:d}: Epochs={1:d}, Train accuracy={2:.5f}, Validation accuracy={3:.5f}".format(
            j+1,epochs,max(history[j].history['acc']),max(history[j].history['val_acc']) ))

In [None]:
for i in range(nets):
    val_acc = history[i].history['val_accuracy']
    epochs_range = range(epochs)

    plt.figure(figsize=(8, 8))
    plt.plot(epochs_range, val_acc, label='{} maps'.format(i*8+8))
    
plt.xlabel('epoch')
plt.ylabel('accuracy')
plt.title('Model Accuracy')
plt.legend(loc='upper left')
plt.show()

# Experiment 3

In [None]:
nets = 8
model = [0]*nets
history = [0]*nets
for i in range(nets):
    model[i] = Sequential()
    model[i].add(Conv2D(32, kernel_size=5, activation='relu', padding="same", input_shape=(32,32,3)))
    model[i].add(MaxPooling2D())
    model[i].add(Conv2D(64, kernel_size=5, activation='relu', padding="same"))
    model[i].add(MaxPooling2D())
    model[i].add(Flatten())
    if i>0:
        model[i].add(Dense(2**(i+4), activation='relu'))
    model[i].add(Dense(10, activation='softmax'))
    model[i].compile(optimizer='adam',
                  loss=tf.keras.losses.sparse_categorical_crossentropy,
                  metrics=['accuracy'])
    
X_train2, X_val2, Y_train2, Y_val2 = train_test_split(Xtr, Ytr, test_size = 0.1)
for j in range(nets):
    history[j] = model[j].fit(X_train2, Y_train2, epochs=20, batch_size=64,
                              validation_data = (X_val2, Y_val2), verbose=2)
    print("CNN {0:d}: Epochs={1:d}, Train accuracy={2:.5f}, Validation accuracy={3:.5f}".format(
            j+1,epochs,max(history[j].history['acc']),max(history[j].history['val_acc']) ))

In [None]:
epochs=20
for j in range(nets):
    print("CNN {0:d}: Epochs={1:d}, Train accuracy={2:.5f}, Validation accuracy={3:.5f}".format(
            j+1,epochs,max(history[j].history['acc']),max(history[j].history['val_acc']) ))

In [None]:
for i in range(nets):
    val_acc = history[i].history['val_accuracy']
    epochs_range = range(epochs)

    plt.figure(figsize=(8, 8))
    plt.plot(epochs_range, val_acc, label='{}N'.format(2**(i+4)))

plt.xlabel('epoch')
plt.ylabel('accuracy')
plt.title('Model Accuracy')
plt.legend(loc='upper left')
plt.show()

# Experiment 4

In [None]:
nets = 8
model = [0]*nets
history = [0]*nets
for i in range(nets):
    model[i] = Sequential()
    model[i].add(Conv2D(32, kernel_size=5, activation='relu', padding="same", input_shape=(32,32,3)))
    model[i].add(MaxPooling2D())
    model[i].add(Dropout(i*0.1))
    model[i].add(Conv2D(64, kernel_size=5, activation='relu', padding="same"))
    model[i].add(MaxPooling2D())
    model[i].add(Dropout(i*0.1))
    model[i].add(Flatten())
    model[i].add(Dense(2**(i+4), activation='relu'))
    model[i].add(Dropout(i*0.1))
    model[i].add(Dense(10, activation='softmax'))
    model[i].compile(optimizer='adam',
                  loss=tf.keras.losses.sparse_categorical_crossentropy,
                  metrics=['accuracy'])
    
X_train2, X_val2, Y_train2, Y_val2 = train_test_split(Xtr, Ytr, test_size = 0.1)
for j in range(nets):
    history[j] = model[j].fit(X_train2, Y_train2, epochs=20, batch_size=64,
                              validation_data = (X_val2, Y_val2), verbose=2)
    print("CNN {0:d}: Epochs={1:d}, Train accuracy={2:.5f}, Validation accuracy={3:.5f}".format(
            j+1,epochs,max(history[j].history['acc']),max(history[j].history['val_acc']) ))

In [None]:
epochs=20
for j in range(nets):
    print("CNN {0:d}: Epochs={1:d}, Train accuracy={2:.5f}, Validation accuracy={3:.5f}".format(
            j+1,epochs,max(history[j].history['acc']),max(history[j].history['val_acc']) ))

In [None]:
for i in range(nets):
    val_acc = history[i].history['val_accuracy']
    epochs_range = range(epochs)

    plt.figure(figsize=(8, 8))
    plt.plot(epochs_range, val_acc, label='D={}'.format(i*0.1))

plt.xlabel('epoch')
plt.ylabel('accuracy')
plt.title('Model Accuracy')
plt.legend(loc='upper left')
plt.show()

# Experiment 5

In [None]:
model = Sequential()
model.add(Conv2D(32, kernel_size=3, activation='relu', input_shape=(32,32,3)))
model.add(BatchNormalization())
model.add(Conv2D(32, 3, activation='relu'))
model.add(BatchNormalization())
model.add(Conv2D(32, kernel_size = 5, strides=2, padding='same', activation='relu'))
model.add(BatchNormalization())
model.add(Dropout(0.4))

model.add(Conv2D(64, kernel_size = 3, activation='relu'))
model.add(BatchNormalization())
model.add(Conv2D(64, kernel_size = 3, activation='relu'))
model.add(BatchNormalization())
model.add(Conv2D(64, kernel_size = 5, strides=2, padding='same', activation='relu'))
model.add(BatchNormalization())
model.add(Dropout(0.4))

model.add(Flatten())
model[i].add(Dense(128, activation='relu'))
model.add(BatchNormalization())
model.add(Dropout(0.4))
model.add(Dense(10, activation='softmax'))

model.compile(optimizer='adam',
              loss=tf.keras.losses.sparse_categorical_crossentropy,
              metrics=['accuracy'])
epochs = 20
X_train2, X_val2, Y_train2, Y_val2 = train_test_split(Xtr, Ytr, test_size = 0.1)
history = model.fit(X_train2, Y_train2, epochs=20, batch_size=64,
                              validation_data = (X_val2, Y_val2), verbose=2)
print("CNN: Epochs={1:d}, Train accuracy={2:.5f}, Validation accuracy={3:.5f}".format(
            epochs,max(history.history['acc']),max(history.history['val_acc']) ))

In [None]:
# # CREATE MORE IMAGES VIA DATA AUGMENTATION
# datagen = ImageDataGenerator(
#         rotation_range=10,  
#         zoom_range = 0.10,  
#         width_shift_range=0.1, 
#         height_shift_range=0.1)

In [None]:
nets = 5
model = [0]*nets
history = [0]*nets
for i in range(3):
    model[i] = Sequential()
    model[i].add(Conv2D(16, kernel_size=5, activation='relu', padding="same", input_shape=(32,32,3)))
    model[i].add(MaxPooling2D(pool_size=(2,2)))
    if i>0:
        model[i].add(Conv2D(32, kernel_size=5, activation='relu', padding="same"))
        model[i].add(MaxPooling2D(pool_size=(2,2)))
    if i>1:
        model[i].add(Conv2D(64, kernel_size=5, activation='relu', padding="same"))
        model[i].add(MaxPooling2D(pool_size=(2,2), padding='same'))
    model[i].add(Flatten())
    model[i].add(Dense(256, activation='relu'))
    model[i].add(Dense(10, activation='softmax'))
    myAdam = tf.keras.optimizers.Adam(lr=0.00001)
    model[i].compile(optimizer='adam',
                  loss=tf.keras.losses.sparse_categorical_crossentropy,
                  metrics=['accuracy'])
    
X_train2, X_val2, Y_train2, Y_val2 = train_test_split(Xtr, Ytr, test_size = 0.1)
for j in range(3):
    history[j] = model[j].fit(X_train2, Y_train2, epochs=20, batch_size=64,
                              validation_data = (X_val2, Y_val2), verbose=2)
    print("CNN {0:d}: Epochs={1:d}, Train accuracy={2:.5f}, Validation accuracy={3:.5f}".format(
            j+1,epochs,max(history[j].history['acc']),max(history[j].history['val_acc']) ))

In [None]:
model[0].summary()

In [None]:
# for j in range(nets):
#     X_train2, X_val2, Y_train2, Y_val2 = train_test_split(Xtr, Ytr, test_size = 0.1)
#     history[j] = model[j].fit_generator(datagen.flow(X_train2,Y_train2, batch_size=64),
#         epochs = epochs, steps_per_epoch = X_train2.shape[0]//64,  
#         validation_data = (X_val2,Y_val2), callbacks=[annealer], verbose=0)
#     print("CNN {0:d}: Epochs={1:d}, Train accuracy={2:.5f}, Validation accuracy={3:.5f}".format(
#         j+1,epochs,max(history[j].history['acc']),max(history[j].history['val_acc']) ))
epochs = 20
# DECREASE LEARNING RATE EACH EPOCH
# annealer = LearningRateScheduler(lambda x: 1e-3 * 0.95 ** x)
X_train2, X_val2, Y_train2, Y_val2 = train_test_split(Xtr, Ytr, test_size = 0.1)
for j in range(3):
    history[j] = model[j].fit(X_train2, Y_train2, epochs=20, batch_size=64,
                              validation_data = (X_val2, Y_val2), verbose=2)
    print("CNN {0:d}: Epochs={1:d}, Train accuracy={2:.5f}, Validation accuracy={3:.5f}".format(
            j+1,epochs,max(history[j].history['acc']),max(history[j].history['val_acc']) ))

In [None]:
# # DECREASE LEARNING RATE EACH EPOCH
# annealer = LearningRateScheduler(lambda x: 1e-3 * 0.95 ** x)


# epochs=50
# X_train2, X_val2, Y_train2, Y_val2 = train_test_split(Xtr, Ytr, test_size = 0.1)
# history = model.fit_generator(datagen.flow(X_train2,Y_train2, batch_size=64),
#                               epochs = epochs, steps_per_epoch = X_train2.shape[0]//64,  
#                               validation_data = (X_val2,Y_val2), callbacks=[annealer], verbose=0)
# print("CNN: Epochs={0:d}, Train accuracy={1:.5f}, Validation accuracy={2:.5f}".format(
#     epochs,history['acc'],history['val_acc']))

# ML model

In [None]:
def create_features(img):
    # flatten three channel color image
    color_features = img.flatten()
    # convert image to greyscale
    grey_image = rgb2grey(img)
    # get HOG features from greyscale image
    hog_features, _ = hog(img, orientations=8, pixels_per_cell=(16, 16),
                    cells_per_block=(1, 1), visualize=True, multichannel=True)
    # combine color and hog features into a single array
    flat_features = np.hstack((color_features,hog_features))
    return hog_features

In [None]:
features_list = []
for i in range(len(Xtr)):
    image_features = create_features(Xtr[i])
    features_list.append(image_features)  
# convert list of arrays into a matrix
feature_matrix = np.array(features_list)


In [None]:
feature_matrix.shape

In [None]:
# scaling the features
from sklearn.preprocessing import scale
X_scaled = scale(feature_matrix)

# train test split
X_train, X_test, y_train, y_test = train_test_split(X_scaled, Ytr, test_size = 0.3, train_size = 0.2 ,random_state = 10)

In [None]:

model_linear = SVC(kernel='linear')
model_linear.fit(X_train, y_train)

# predict
y_pred = model_linear.predict(X_test)

In [None]:
# confusion matrix and accuracy

from sklearn import metrics
from sklearn.metrics import confusion_matrix
# accuracy
print("accuracy:", metrics.accuracy_score(y_true=y_test, y_pred=y_pred), "\n")

# cm
print(metrics.confusion_matrix(y_true=y_test, y_pred=y_pred))