## Read image data

In [75]:
from __future__ import print_function
import matplotlib.pyplot as plt
import numpy as np
import os
import sys
import tarfile
from IPython.display import display, Image
from scipy import ndimage
from sklearn.linear_model import LogisticRegression
from six.moves.urllib.request import urlretrieve
from six.moves import cPickle as pickle
import cv2
import imutils


class Dataset(object):

    
    def __init__(self):
        self.X_train = None
        self.X_valid = None
        self.X_test = None
        self.Y_train = None
        self.Y_valid = None
        self.Y_test = None
        self.images_list = []
        self.labels_list = []
        
    # traverse folder
    def traverse_image_dir(self, file_path):
        
        for file_or_dir in os.listdir(file_path):
            abs_path = os.path.abspath(os.path.join(file_path, file_or_dir))
            if os.path.isdir(abs_path):
                self.traverse_image_dir(abs_path)
            else:
                # read jpg file and filter size equals zero file
                if file_or_dir.lower().endswith('.jpg') and os.stat(abs_path)[6] > 0:
                    image = self.read_resize_image(abs_path)
                    self.images_list.append(image)

                    if abs_path.find('unauthorized') > 0:
                        self.labels_list.append(0)
                    elif abs_path.find('authorized') > 0:
                        self.labels_list.append(1)
                    else:
                        print( "unvalid folder exists")

        #print("Image length {}".format(len(self.images_list)))
        return self.images_list, self.labels_list

    # read and resize
    def read_resize_image(self, full_name, resized_height = 50):
        image = cv2.imread(full_name)
        resized_image = imutils.resize(image, height = resized_height)
        return resized_image    
    
    # convert to numpy arrays
    def get_image_array(self, file_path):

        images, labels = self.traverse_image_dir(file_path)
        images = np.array(images)
        labels = np.array(labels)

        return images, labels

    def read(self, img_rows=50, img_cols=50, img_channels=3, nb_classes=2):

        images, labels = self.get_image_array(r'C:\Users\dbsnail\ImageFolder\data_large')
        print("Images shape {}, label shape {}, ratio of authorized data {}".format(images.shape, labels.shape, labels.mean()))
    
        # numpy.reshape
        X_train_valid, X_test, y_train_valid, y_test = train_test_split(images, labels, test_size=0.3, random_state=123)
        X_train, X_valid, y_train, y_valid = train_test_split(X_train_valid, y_train_valid, train_size=0.7, random_state=200)
        
        
        #reshape
        X_train = X_train.reshape(X_train.shape[0], img_rows, img_cols, 3)
        X_valid = X_valid.reshape(X_valid.shape[0], img_rows, img_cols, 3)
        X_test = X_test.reshape(X_test.shape[0], img_rows, img_cols, 3)
        input_shape = (img_rows, img_cols, 3)


        # the data, shuffled and split between train and test sets
        print('X_train shape:', X_train.shape)
        print(X_train.shape[0], 'train samples')
        print(X_valid.shape[0], 'valid samples')
        print(X_test.shape[0], 'test samples')

        # convert class vectors to binary class matrices
        Y_train = np_utils.to_categorical(y_train, nb_classes)
        Y_valid = np_utils.to_categorical(y_valid, nb_classes)
        Y_test = np_utils.to_categorical(y_test, nb_classes)
        
        # scale the input data to the range [0,1]
        X_train = X_train.astype('float32')
        X_valid = X_valid.astype('float32')
        X_test = X_test.astype('float32')
        X_train /= 255
        X_valid /= 255
        X_test /= 255


        self.X_train = X_train
        self.X_valid = X_valid
        self.X_test = X_test
        self.Y_train = Y_train
        self.Y_valid = Y_valid
        self.Y_test = Y_test

## Modeling

In [232]:
from __future__ import print_function
import random
import numpy as np
from sklearn.model_selection import train_test_split
from keras.preprocessing.image import ImageDataGenerator
from keras.models import Sequential
from keras.layers import Dense, Dropout, Activation, Flatten
from keras.layers import Convolution2D, MaxPooling2D
from keras.optimizers import SGD
from keras.utils import np_utils
from keras.models import load_model
from keras import backend as K

class Model(object):

    FILE_PATH = r'C:\Users\dbsnail\ImageFolder\model_with_aug.h5'

    def __init__(self):
        self.model = None

    def build_model(self, dataset, nb_classes=2):
        self.model = Sequential()

        self.model.add(Convolution2D(32, 3, 3, border_mode='same', activation = 'relu', input_shape=dataset.X_train.shape[1:]))
        self.model.add(Convolution2D(32, 3, 3, activation = 'relu'))
        self.model.add(MaxPooling2D(pool_size=(2, 2)))
        self.model.add(Dropout(0.25))

        self.model.add(Convolution2D(48, 3, 3, border_mode='same', activation = 'relu'))
        self.model.add(Convolution2D(48, 3, 3, border_mode='same', activation = 'relu'))
        self.model.add(MaxPooling2D(pool_size=(2, 2)))
        #self.model.add(Dropout(0.25))
        
        self.model.add(Convolution2D(64, 3, 3, border_mode='same', activation = 'relu'))
        self.model.add(Convolution2D(64, 3, 3, border_mode='same', activation = 'relu'))
        self.model.add(MaxPooling2D(pool_size=(2, 2)))
        self.model.add(Dropout(0.25))        

        self.model.add(Flatten())
        self.model.add(Dense(960))
        self.model.add(Activation('relu'))
        self.model.add(Dropout(0.5))
        self.model.add(Dense(nb_classes))
        self.model.add(Activation('softmax'))

        self.model.summary()
     
    def train(self, dataset, batch_size=20, nb_epoch=40, data_augmentation=True):
        
        # let's train the model using SGD + momentum (how original).
        sgd = SGD(lr=0.01, decay=1e-6, momentum=0.9, nesterov=True)
        self.model.compile(loss='categorical_crossentropy', ##'binary_crossentropy'
                           optimizer=sgd,
                           metrics=['accuracy'])  #['precision']) #
        if not data_augmentation:
            print('Not using data augmentation.')
            self.model.fit(dataset.X_train, dataset.Y_train,
                           batch_size=batch_size,
                           nb_epoch=nb_epoch,
                           validation_data=(dataset.X_valid, dataset.Y_valid),
                           shuffle=True)
        else:
            print('Using real-time data augmentation.')

            # This will do preprocessing and realtime data augmentation
            datagen = ImageDataGenerator(
                featurewise_center=False,             # set input mean to 0 over the dataset
                samplewise_center=False,              # set each sample mean to 0
                featurewise_std_normalization=False,  # divide inputs by std of the dataset
                samplewise_std_normalization=False,   # divide each input by its std
                zca_whitening=False,                  # apply ZCA whitening
                rotation_range=0.,                    # randomly rotate images in the range (degrees, 0 to 180)
                width_shift_range=0.,                 # randomly shift images horizontally (fraction of total width)
                height_shift_range=0.,                # randomly shift images vertically (fraction of total height)
                channel_shift_range=0.2,
                fill_mode = 'nearest',        # Points outside the boundaries of the input are filled according to the given mode.
                horizontal_flip=False,                 # randomly flip images
                vertical_flip=True)                  # randomly flip images

            # compute quantities required for featurewise normalization
            # (std, mean, and principal components if ZCA whitening is applied)
            datagen.fit(dataset.X_train)

            # fit the model on the batches generated by datagen.flow()
            self.model.fit_generator(datagen.flow(dataset.X_train, dataset.Y_train,
                                                  batch_size=batch_size),
                                     samples_per_epoch=dataset.X_train.shape[0],
                                     nb_epoch=nb_epoch,
                                     validation_data=(dataset.X_valid, dataset.Y_valid))

    def save(self, file_path=FILE_PATH):
        print('Model Saved.')
        self.model.save(file_path)

    def load(self, file_path=FILE_PATH):
        print('Model Loaded.')
        self.model = load_model(file_path)

    def predict(self, image):
        image = image.reshape((1, 50, 50, 3))
        image = image.astype('float32')
        image /= 255
        result = self.model.predict_proba(image)
        print(result)
        result = self.model.predict_classes(image)

        return result[0]

    def evaluate(self, dataset):
        score = self.model.evaluate(dataset.X_test, dataset.Y_test, verbose=0)
        print("%s: %.2f%%" % (self.model.metrics_names[1], score[1] * 100))

In [77]:
dataset = Dataset()
dataset.read()

Images shape (9733, 50, 50, 3), label shape (9733,), ratio of authorized data 0.3915545052912771
X_train shape: (4769, 50, 50, 3)
4769 train samples
2044 valid samples
2920 test samples


## Train model

In [233]:
model = Model()
model.build_model(dataset)
model.train(dataset)
model.save()

____________________________________________________________________________________________________
Layer (type)                     Output Shape          Param #     Connected to                     
convolution2d_84 (Convolution2D) (None, 50, 50, 32)    896         convolution2d_input_31[0][0]     
____________________________________________________________________________________________________
convolution2d_85 (Convolution2D) (None, 48, 48, 32)    9248        convolution2d_84[0][0]           
____________________________________________________________________________________________________
maxpooling2d_46 (MaxPooling2D)   (None, 24, 24, 32)    0           convolution2d_85[0][0]           
____________________________________________________________________________________________________
dropout_53 (Dropout)             (None, 24, 24, 32)    0           maxpooling2d_46[0][0]            
___________________________________________________________________________________________

In [234]:
model = Model()
model.load()
model.evaluate(dataset)

Model Loaded.
acc: 99.86%


## Predict new image 

In [235]:
import cv2
import imutils

In [236]:
image = cv2.imread(r'C:\Users\dbsnail\ImageFolder\test\sample1.jpg')
resized_image = imutils.resize(image, height = 50)
model.predict(resized_image)

[[  1.99542979e-10   1.00000000e+00]]


1

In [237]:
image = cv2.imread(r'C:\Users\dbsnail\ImageFolder\test\sample2.jpg')
resized_image = imutils.resize(image, height = 50)
model.predict(resized_image)

[[  7.55044603e-05   9.99924541e-01]]


1

In [238]:
image = cv2.imread(r'C:\Users\dbsnail\ImageFolder\test\sample3.jpg')
resized_image = imutils.resize(image, height = 50)
model.predict(resized_image)

[[  1.68523504e-04   9.99831438e-01]]


1

In [239]:
image = cv2.imread(r'C:\Users\dbsnail\ImageFolder\test\sample4.jpg')
resized_image = imutils.resize(image, height = 50)
model.predict(resized_image)

[[ 0.00113458  0.99886549]]


1

In [240]:
image = cv2.imread(r'C:\Users\dbsnail\ImageFolder\test\chendaoming.jpg')
resized_image = imutils.resize(image, height = 50)
model.predict(resized_image)

[[ 0.54024619  0.45975375]]


0

In [241]:
image = cv2.imread(r'C:\Users\dbsnail\ImageFolder\test\zhourunfa.jpg')
resized_image = imutils.resize(image, height = 50)
model.predict(resized_image)

[[  9.99752581e-01   2.47407588e-04]]


0

In [242]:
image = cv2.imread(r'C:\Users\dbsnail\ImageFolder\test\chenbaoguo.jpg')
resized_image = imutils.resize(image, height = 50)
model.predict(resized_image)

[[  1.00000000e+00   3.64587871e-09]]


0

In [243]:
image = cv2.imread(r'C:\Users\dbsnail\ImageFolder\test\zhangguoli.jpg')
resized_image = imutils.resize(image, height = 50)
model.predict(resized_image)

[[  1.00000000e+00   5.34033759e-08]]


0

In [244]:
image = cv2.imread(r'C:\Users\dbsnail\ImageFolder\test\linyongjian.jpg')
resized_image = imutils.resize(image, height = 50)
model.predict(resized_image)

[[  1.00000000e+00   4.88550335e-13]]


0

In [245]:
image = cv2.imread(r'C:\Users\dbsnail\ImageFolder\test\lixuejian.jpg')
resized_image = imutils.resize(image, height = 50)
model.predict(resized_image)

[[  1.00000000e+00   7.53058310e-11]]


0

In [246]:
image = cv2.imread(r'C:\Users\dbsnail\ImageFolder\test\wangziwen.jpg')
resized_image = imutils.resize(image, height = 50)
model.predict(resized_image)

[[  9.99999762e-01   2.20358757e-07]]


0

In [247]:
image = cv2.imread(r'C:\Users\dbsnail\ImageFolder\test\tangguoqiang.jpg')
resized_image = imutils.resize(image, height = 50)
model.predict(resized_image)

[[ 0.66252619  0.33747378]]


0

In [248]:
image = cv2.imread(r'C:\Users\dbsnail\ImageFolder\test\zhangyimou.jpg')
resized_image = imutils.resize(image, height = 50)
model.predict(resized_image)

[[ 0.96823043  0.03176954]]


0