In [1]:
# Load packages
import numpy as np
import cv2
import os
import random
import h5py
import scipy.io as sio
from tqdm import tqdm

from keras.preprocessing.image import ImageDataGenerator, img_to_array, array_to_img, load_img
from keras.applications import ResNet50, VGG16
from keras.applications import imagenet_utils
from keras.models import Sequential, Model, load_model
from keras.layers.convolutional import Conv2D
from keras.layers.core import Activation, Dense, Flatten, Dropout
from keras.layers import Input
from keras.layers.pooling import MaxPooling2D
from keras.layers.normalization import BatchNormalization
from keras import backend as K
from keras.datasets import cifar10
from keras.optimizers import SGD
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelBinarizer, LabelEncoder
from sklearn.metrics import classification_report
import matplotlib.pyplot as plt
from IPython.display import SVG
from keras.utils.vis_utils import model_to_dot
from keras.utils import plot_model
from matplotlib.pyplot import imshow

%matplotlib inline

Using TensorFlow backend.


# 1. Load human dataset

In [0]:
# Define the data directory
#path_data = 'C:/Users/h17026889/THANGHOANG/Practical DL4CV/Week2/lfw_funneled'

In [0]:
# Get the filenames from directory
#link_names = []
#for (rootDir, dirNames, filenames) in os.walk(path_data):
#    for filename in filenames:
#        if filename.endswith('.jpg'):
#            link_names.append(os.path.join(rootDir, filename))

In [0]:
# Check length
#print("There are {} images in the directory".format(len(link_names)))

There are 13233 images in the directory


In [0]:
# Shuffle with a fixed seed so that the split is reproducible
#random.seed(167)
#random.shuffle(link_names)

In [0]:
# Randomly pick 7000 images
#train_human_paths = link_names[:5000]
#test_human_paths = link_names[5000:7000]

In [0]:
def resize_keep_aspect_ratio(image, width, height):
    (h, w) = image.shape[:2]
    dH = 0
    dW = 0
    if w < h:
        image = cv2.resize(image, (width, int(h*width/w)), interpolation = cv2.INTER_AREA)
        dH = int((image.shape[0] - height) / 2.0)
    else:
        image = cv2.resize(image, (int(w*height/h), height), interpolation = cv2.INTER_AREA)
        dW = int((image.shape[1] - width) / 2.0)
    (h, w) = image.shape[:2]
    image = image[dH:h-dH, dW:w-dW]
    return cv2.resize(image, (width, height), interpolation = cv2.INTER_AREA)

In [0]:
def load_and_resize(filePaths):
    data = []
    for filename in tqdm(filePaths):
        image = load_img(filename)
        image = img_to_array(image)
        image = resize_keep_aspect_ratio(image, 32, 32)
        image = np.expand_dims(image, axis=0)
        image /= 255. 
        data.append(image)
    return np.vstack(data)

In [0]:
#train_human_x = load_and_resize(train_human_paths)
#test_human_x = load_and_resize(test_human_paths)

In [0]:
# Create labels for human dataset
#train_human_y = 10*np.ones((5000, 1), dtype = int)
#test_human_y = 10*np.ones((2000, 1), dtype = int)

In [0]:
# Load human dataset
human = h5py.File('drive/My Drive/Practical DL4CV/Week2/human.hdf5')

In [0]:
# Get features and labels
train_human_x = human["features"][:5000]
test_human_x = human["features"][5000:7000]
train_human_y = human["labels"][:5000].reshape(-1, 1)
test_human_y = human["labels"][5000:7000].reshape(-1, 1)

In [0]:
# Check the shape
print('Shape of train_human_x = ' + str(train_human_x.shape))
print('Shape of train_human_y = ' + str(train_human_y.shape))
print('Shape of test_human_x = ' + str(test_human_x.shape))
print('Shape of test_human_y = ' + str(test_human_y.shape))

# 2. Load cars dataset

In [0]:
# Define the data directory
#path_data2 = 'C:/Users/h17026889/THANGHOANG/Practical DL4CV/Week2/cars_train'

In [0]:
# Get the filenames from directory
#link_names2 = [os.path.join(path_data2, filename) for filename in os.listdir(path_data2) if filename.endswith('jpg')]

In [0]:
# Check length
print("There are {} images in the directory".format(len(link_names2)))

There are 8144 images in the directory


In [0]:
# Shuffle with a fixed seed so that the split is reproducible
#random.seed(167)
#random.shuffle(link_names2)

In [0]:
# Randomly pick 7000 images
#train_cars_paths = link_names2[:5000]
#test_cars_paths = link_names2[5000:7000]

In [0]:
#train_cars_x = load_and_resize(train_cars_paths)

In [0]:
#test_cars_x = load_and_resize(test_cars_paths)

In [0]:
# Create labels for cars dataset
#train_cars_y = 11*np.ones((train_cars_x.shape[0], 1), dtype = int)
#test_cars_y = 11*np.ones((test_cars_x.shape[0], 1), dtype = int)

In [0]:
# Load cars dataset
cars = h5py.File('drive/My Drive/Practical DL4CV/Week2/cars.hdf5')

In [0]:
# Get features and labels
train_cars_x = cars["features"][:5000]
test_cars_x = cars["features"][5000:7000]
train_cars_y = cars["labels"][:5000].reshape(-1, 1)
test_cars_y = cars["labels"][5000:7000].reshape(-1, 1)

In [0]:
# Check the shape
print('Shape of train_cars_x = ' + str(train_cars_x.shape))
print('Shape of train_cars_y = ' + str(train_cars_y.shape))
print('Shape of test_cars_x = ' + str(test_cars_x.shape))
print('Shape of test_cars_y = ' + str(test_cars_y.shape))

# 3. Load license dataset

In [0]:
# Define the data directory
#path_data3 = 'C:/Users/h17026889/THANGHOANG/Practical DL4CV/Week2/2017-IWT4S-CarsReId_LP-dataset'

In [0]:
# Get the filenames from directory
#link_names3 = []
#for (rootDir, dirNames, filenames) in os.walk(path_data3):
#    for filename in filenames:
#        if filename.endswith('.png'):
#            link_names3.append(os.path.join(rootDir, filename))

In [0]:
# Check length
#print("There are {} images in the directory".format(len(link_names3)))

There are 185903 images in the directory


In [0]:
# Shuffle with a fixed seed so that the split is reproducible
#random.seed(167)
#random.shuffle(link_names3)

In [0]:
# Randomly pick 7000 images
#train_license_paths = link_names3[:5000]
#test_license_paths = link_names3[5000:7000]

In [0]:
#train_license_x = load_and_resize(train_license_paths)

In [0]:
#test_license_x = load_and_resize(test_license_paths)

In [0]:
# Create labels for cars dataset
#train_license_y = 12*np.ones((train_license_x.shape[0], 1), dtype = int)
#test_license_y = 12*np.ones((test_license_x.shape[0], 1), dtype = int)

In [0]:
# Load license dataset
license = h5py.File('drive/My Drive/Practical DL4CV/Week2/license.hdf5')

In [0]:
# Get features and labels
train_license_x = license["features"][:5000]
test_license_x = license["features"][5000:7000]
train_license_y = license["labels"][:5000].reshape(-1, 1)
test_license_y = license["labels"][5000:7000].reshape(-1, 1)

In [0]:
# Check the shape
print('Shape of train_license_x = ' + str(train_license_x.shape))
print('Shape of train_license_y = ' + str(train_license_y.shape))
print('Shape of test_license_x = ' + str(test_license_x.shape))
print('Shape of test_license_y = ' + str(test_license_y.shape))

# 4. Load Cifar10 dataset from keras.datasets

In [9]:
# Load the training and testing CIRFA10 dataset
((train_cifar10_x, train_cifar10_y), (test_cifar10_x, test_cifar10_y)) = cifar10.load_data()

Downloading data from https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz


In [0]:
# Normalize image vectors
train_cifar10_x = train_cifar10_x / 255.
test_cifar10_x = test_cifar10_x / 255.

In [0]:
# Check the shape
print('Shape of train_cifar10_x = ' + str(train_cifar10_x.shape))
print('Shape of train_cifar10_y = ' + str(train_cifar10_y.shape))
print('Shape of test_cifar10_x = ' + str(test_cifar10_x.shape))
print('Shape of test_cifar10_y = ' + str(test_cifar10_y.shape))

# 5. Concatenate all datasets

In [0]:
X_train = np.concatenate((train_cifar10_x, train_human_x, train_cars_x, train_license_x), axis = 0)

In [0]:
X_test = np.concatenate((test_cifar10_x, test_human_x, test_cars_x, test_license_x), axis = 0)

In [0]:
Y_train = np.concatenate((train_cifar10_y, train_human_y, train_cars_y, train_license_y), axis = 0)

In [0]:
Y_test = np.concatenate((test_cifar10_y, test_human_y, test_cars_y, test_license_y), axis = 0)

In [0]:
# Convert the labels from integers to vectors
lb = LabelBinarizer()
Y_train = lb.fit_transform(Y_train)
Y_test = lb.transform(Y_test)

In [0]:
# Initialize the label names the dataset
label_names = ["airplane", "automobile", "bird", "cat", "deer", "dog", "frog", "horse", "ship", "truck", "human", "cars", "license"]

In [0]:
# Check the shape
print('Shape of X_train = ' + str(X_train.shape))
print('Shape of Y_train = ' + str(Y_train.shape))
print('Shape of X_test = ' + str(X_test.shape))
print('Shape of Y_test = ' + str(Y_test.shape))

# 6. Train VGG Network

In [0]:
#baseModel = VGG16(weights = 'imagenet', include_top = False, input_tensor = Input(shape = (32, 32, 3)))

In [0]:
#headModel = baseModel.output
#headModel = Flatten(name = 'flatten')(headModel)
#headModel = Dense(256, activation = 'relu')(headModel)
#headModel = Dropout(0.7)(headModel)
#headModel = Dense(len(label_names), activation = 'softmax')(headModel)

In [0]:
#model = Model(inputs = baseModel.input, outputs = headModel)

In [0]:
#model.summary()

In [0]:
# Data augmentation
#aug = ImageDataGenerator(rotation_range = 30, width_shift_range = 0.1, height_shift_range = 0.1,
#                         shear_range = 0.2, zoom_range = 0.2, horizontal_flip = True, fill_mode = 'nearest' )

In [0]:
#opt = SGD(lr = 0.01)
#model.compile(loss = 'categorical_crossentropy', optimizer = opt, metrics = ['accuracy'])

In [0]:
#H = model.fit_generator(aug.flow(X_train, Y_train, batch_size = 32), validation_data = (X_test, Y_test),
#                        epochs = 5, steps_per_epoch = len(X_train) // 32, verbose = 1)

In [0]:
# Load pretrained model
model = load_model('drive/My Drive/Practical DL4CV/Week2/30epochs_1.h5')

In [18]:
# Evaluate on the test sets
model.evaluate(X_test, Y_test, batch_size = 32)



[0.28241994134788084, 0.91425]

# 7. Conclusion

My model (VGG Network with input of shape (32, 32, 3)) gets around 91% test accuracy and 94% train accuracy in 30 epochs (trained by Google Colab) with a mini batch size of 32 and "SGD" optimizer. I think if we use bigger size of input and adjust dropout, take more time to train, we should get more accuracy.