In [5]:
import os
import glob
import numpy as np
from PIL import Image

from sklearn.model_selection import train_test_split
from keras import optimizers,utils
from keras.models import Sequential
from keras.layers import Dense, Dropout, Activation, Flatten
from keras.layers import Conv2D, MaxPooling2D

**Preparing data:**  
1. Unzip data
2. Reshape image to (128,128) and generate three-channel (RBG) images
3. Using PIL build-in function to normalize (min-max normalization) and equalize data
4. Save the preprocessed data to target directory

In [None]:
size = (128,128)
rootdir = "./unzip/gear_images/*/*"
for filename in glob.iglob(rootdir, recursive=True):
    im = Image.open(filename)
    im.thumbnail(size)
    layer = Image.new('RGB', size, (255,255,255))
    layer.paste(im)
    
    im_e = ImageOps.equalize(layer)
    im_n = ImageOps.autocontrast(layer,5)
    
    newpath_e=filename.replace("gear_images","e_images")
    newpath_n=filename.replace("gear_images","n_images")
    new_dir_e = os.path.dirname(newpath_e)
    new_dir_n = os.path.dirname(newpath_n)
    
    if not os.path.exists(new_dir_e):
        os.makedirs(new_dir_e)
    
    if not os.path.exists(new_dir_n):
        os.makedirs(new_dir_n)
    
    #print(newpath_e)
    im_e.save(newpath_e)
    im_n.save(newpath_n)

print('done')

**Prepare labels:**  
From the name of sub-directories to generate targe labels 

In [None]:
#preparing y labels
def label_code(rootdir):
    label_dict={}
    count = 0
    for filename in glob.iglob(rootdir, recursive=True):
        label_name = filename.split('/')[-2]
        if label_name not in label_dict.keys():
            label_dict[label_name] = count
            count += 1
    return label_dict

rootdir = "./unzip/e_images/*/*"
label_map = label_code(rootdir)

**Prepare the input data to CNN model:**  

In [8]:
#preparing data
images=[]
y=[]

for filename in glob.iglob(rootdir, recursive=True):
    im = np.array(Image.open(filename))
    images.append(im)
    
    label_name = filename.split('/')[-2]
    y.append(label_map[label_name])

images = np.array(images)
y = np.array(y)
(x_train, x_test, y_train, y_test) = train_test_split(images, y, test_size=0.25, random_state=32)

y_train = utils.to_categorical(y_train, num_classes)
y_test = utils.to_categorical(y_test, num_classes)

x_train = x_train.astype('float32')
x_test = x_test.astype('float32')
x_train /= 255  #normalization is critical for the correct result
x_test /= 255

print(np.shape(y_train))

(1591, 12)


In [None]:
**Construct CNN:**  
1. Convolutional layer*2
2. Pooling layer
3. Convolutional layer
4. Pooling layer
5. Flatten
6. Full connection layer
7. Drop out layer
8. Full connection layer

In [6]:
#prepare CNN model
filter_size = (4,4)
num_classes = 12
model = Sequential()
model.add(Conv2D(64, filter_size, activation='relu', input_shape=(128, 128, 3)))
model.add(Conv2D(64, filter_size, activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
#model.add(Dropout(0.2))

model.add(Conv2D(128, filter_size, activation='relu'))
#model.add(Conv2D(128, filter_size, activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
#model.add(Dropout(0.2))

model.add(Flatten())
model.add(Dense(256, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(num_classes, activation='softmax'))

opt = optimizers.rmsprop(lr=0.0001, decay=1e-6)
model.compile(loss='categorical_crossentropy', optimizer=opt, metrics=['accuracy'])
print(model.summary())

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_1 (Conv2D)            (None, 125, 125, 64)      3136      
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 122, 122, 64)      65600     
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 61, 61, 64)        0         
_________________________________________________________________
conv2d_3 (Conv2D)            (None, 58, 58, 128)       131200    
_________________________________________________________________
max_pooling2d_2 (MaxPooling2 (None, 29, 29, 128)       0         
_________________________________________________________________
flatten_1 (Flatten)          (None, 107648)            0         
_________________________________________________________________
dense_1 (Dense)              (None, 256)               27558144  
__________

**Train the model:**

In [15]:
model.fit(x_train, y_train, batch_size=128, epochs=20)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<keras.callbacks.History at 0x7fa621794a20>

**Save the trained model:**

In [8]:
model.save_weights('./cnn_weights_1.h5')
model.save('./cnn_1.h5')

**Show the model evaluzation results:**  

In [9]:
score = model.evaluate(x_test, y_test, verbose=0, batch_size=128)
for name,value in zip(model.metrics_names, score):
    print(name, value) 

loss 0.4705527427337937
acc 0.903954801698638
