In [1]:
import cv2
import numpy as np
import os
from random import shuffle
from tqdm import tqdm

In [2]:
IMG_DIR = '/disk2/paint/'
TRAIN_DIR = '/disk2/paint_train/'
TRAIN_FILE = TRAIN_DIR + 'train_data.npy'
MAX_IMAGES_PER_CLASS = 5000
IMG_SIZE = 112
# learning rate = 0.001
LR = 1e-3

In [3]:
attributes = ['media_3d_graphics', 'media_comic', 'media_graphite', 'media_oilpaint', 
              'media_pen_ink', 'media_vectorart', 'media_watercolor']
def label_to_one_hot(label):
    res = np.zeros(len(attributes))
    index = attributes.index(label)
    res[index] = 1
    return res;

def one_hot_to_label(one_hot):
    index = np.argmax(one_hot)
    return attributes[index]

# we need about 10GB RAM
def create_train_data():
    attr_count = {k:0 for k in attributes}
    training_data = []
    for image_name in tqdm(os.listdir(IMG_DIR)):
        name = image_name.split('.')[0]
        if (attr_count[name] > MAX_IMAGES_PER_CLASS):
            continue
        attr_count[name] += 1
        path = os.path.join(IMG_DIR, image_name)
        image = cv2.imread(path, 1)
        image = cv2.resize(image, (IMG_SIZE,IMG_SIZE))
        label = label_to_one_hot(name)
        training_data.append([np.array(image), np.array(label)])
    shuffle(training_data)
    np.save(TRAIN_FILE, training_data)
    print('Loaded data: ', attr_count)
    return training_data;

In [4]:
if (os.path.isfile(TRAIN_FILE)):
    train_data = np.load(TRAIN_FILE)
else:
    train_data = create_train_data()

In [5]:
# X - images
X = np.array([i[0] for i in train_data])
# Y - 'one_hot' labels
Y = np.array([i[1] for i in train_data])
# reshape to: [num_images, IMG_SIZE, IMG_SIZE, num_channels(3 for color, 1 for grayscale)]
X = X.reshape(X.shape[0], IMG_SIZE, IMG_SIZE, 3)
print('RGB values of image 0, row 0, column 0: ', X[0][0][0])
# since the values are integers, let's convert them to float
X = X.astype('float32')
# let's normalize the values, initially they are in range (0,255)
# we convert them to range (-0.5, 0.5), so their mean will be 0
X = (X - 127) / 255
print('(num_images, IMG_SIZE, IMG_SIZE, num_channels)')
print(X.shape)

RGB values of image 0, row 0, column 0:  [239 239 239]
(num_images, IMG_SIZE, IMG_SIZE, num_channels)
(32732, 112, 112, 3)


In [6]:
# let's use 10% of our data for testing
# take last 3273 elements
x_test = X[-3273:]
y_test = Y[-3273:]
x_train = X[:-3273]
y_train = Y[:-3273]
print('train size:', x_train.shape[0])
print('test size:', x_test.shape[0])

train size: 29459
test size: 3273


In [None]:
import keras
from keras import regularizers
from keras.models import Model
from keras.layers import Dense, Dropout, Flatten, Conv2D, MaxPooling2D, Input, GaussianNoise, concatenate, AveragePooling2D, ZeroPadding2D
from keras.callbacks import TensorBoard

input_layer = Input(shape=(IMG_SIZE, IMG_SIZE, 3))

layers_1 = Conv2D(64, (7, 7), padding='same', activation='relu', kernel_regularizer=regularizers.l2(0.0005))(input_layer)
layers_1 = MaxPooling2D(pool_size=(3, 3), strides=(2, 2))(layers_1)
layers_1 = Conv2D(64, (1, 1), padding='same', activation='relu', kernel_regularizer=regularizers.l2(0.0005))(layers_1)
layers_1 = Conv2D(128, (3, 3), padding='same', activation='relu', kernel_regularizer=regularizers.l2(0.0005))(layers_1)
layers_1 = MaxPooling2D(pool_size=(3, 3), strides=(2, 2))(layers_1)
layers_1 = ZeroPadding2D(((0, 1), (0, 1)))(layers_1)

layers_2a = Conv2D(32, (1, 1), padding='same', activation='relu', kernel_regularizer=regularizers.l2(0.0005))(layers_1)
layers_2b = Conv2D(64, (1, 1), padding='same', activation='relu', kernel_regularizer=regularizers.l2(0.0005))(layers_1)
layers_2b = Conv2D(128, (3, 3), padding='same', activation='relu', kernel_regularizer=regularizers.l2(0.0005))(layers_2b)
layers_2c = Conv2D(16, (1, 1), padding='same', activation='relu', kernel_regularizer=regularizers.l2(0.0005))(layers_1)
layers_2c = Conv2D(32, (5, 5), padding='same', activation='relu', kernel_regularizer=regularizers.l2(0.0005))(layers_2c)
layers_2d = MaxPooling2D(pool_size=(3, 3), strides=(1, 1), padding='same')(layers_1)
layers_2d = Conv2D(64, (1, 1), padding='same', activation='relu', kernel_regularizer=regularizers.l2(0.0005))(layers_2d)
layers_2 = concatenate([layers_2a,layers_2b,layers_2c,layers_2d])

loss1 = AveragePooling2D(pool_size=(5,5),strides=(3,3))(layers_2)
loss1 = Conv2D(128, (1, 1), padding='same', activation='relu', kernel_regularizer=regularizers.l2(0.0005))(loss1)
loss1 = Flatten()(loss1)
loss1 = Dense(1024, activation='relu', kernel_regularizer=regularizers.l2(0.0005))(loss1)
loss1 = Dropout(0.7)(loss1)
loss1 = Dense(len(attributes), kernel_regularizer=regularizers.l2(0.0005), activation='softmax')(loss1)

layers_2 = MaxPooling2D(pool_size=(3, 3), strides=(2, 2))(layers_2)
layers_2 = ZeroPadding2D(((0, 1), (0, 1)))(layers_2)
layers_3a = Conv2D(32, (1, 1), padding='same', activation='relu', kernel_regularizer=regularizers.l2(0.0005))(layers_2)
layers_3b = Conv2D(64, (1, 1), padding='same', activation='relu', kernel_regularizer=regularizers.l2(0.0005))(layers_2)
layers_3b = Conv2D(128, (3, 3), padding='same', activation='relu', kernel_regularizer=regularizers.l2(0.0005))(layers_3b)
layers_3c = Conv2D(16, (1, 1), padding='same', activation='relu', kernel_regularizer=regularizers.l2(0.0005))(layers_2)
layers_3c = Conv2D(32, (5, 5), padding='same', activation='relu', kernel_regularizer=regularizers.l2(0.0005))(layers_3c)
layers_3d = MaxPooling2D(pool_size=(3, 3), strides=(1, 1), padding='same')(layers_2)
layers_3d = Conv2D(64, (1, 1), padding='same', activation='relu', kernel_regularizer=regularizers.l2(0.0005))(layers_3d)
layers_3 = concatenate([layers_3a,layers_3b,layers_3c,layers_3d])

loss2 = AveragePooling2D(pool_size=(5,5),strides=(3,3))(layers_3)
loss2 = Conv2D(128, (1, 1), padding='same', activation='relu', kernel_regularizer=regularizers.l2(0.0005))(loss2)
loss2 = Flatten()(loss2)
loss2 = Dense(1024, activation='relu', kernel_regularizer=regularizers.l2(0.0005))(loss2)
loss2 = Dropout(0.7)(loss2)
loss2 = Dense(len(attributes), kernel_regularizer=regularizers.l2(0.0005), activation='softmax')(loss2)

model = Model(input_layer, [loss1, loss2])
model.summary()
model.compile(loss=keras.losses.categorical_crossentropy,
              optimizer="adam", metrics=['accuracy'])
tbCallBack = TensorBoard(log_dir='./log-bam-2/model-v1', histogram_freq=0,  
          write_graph=True, write_images=True)
model.fit(x_train, [y_train, y_train],
          epochs=15,
          verbose=1,
          validation_split=0.2,
          callbacks=[tbCallBack])
# let's evaluate model on test data
score = model.evaluate(x_test, [y_test, y_test], verbose=0)
print('Test loss:', score[0])
print('Test accuracy:', score[1])