In [1]:
%matplotlib inline
import cv2
from theano import *
theano.config.openmp = True

from keras.optimizers import SGD
from keras.models import Sequential
from keras.layers import Convolution2D, MaxPooling2D, ZeroPadding2D, GlobalMaxPooling2D
from keras.layers import Activation, Dropout, Flatten, Dense
from keras.preprocessing.image import ImageDataGenerator

import h5py
import numpy as np
import seaborn as sns
import os
import sys
from tqdm import tqdm
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from keras.applications.vgg16 import VGG16

Using Theano backend.


In [2]:
fobia_folder = "./trypophobic/"
normal_folder = "./data_train/reddit_sub_pics/"
resolution_x = 224
resolution_y = 224

In [3]:
train_datagen = ImageDataGenerator(rescale=1./255,# rotation_range=20, horizontal_flip=True, 
                                   dim_ordering='th',
                                  zca_whitening=False, featurewise_center=False, featurewise_std_normalization=False)
train_data_generator = train_datagen.flow_from_directory('data_train/',
                                            target_size=(resolution_x, resolution_y),
                                            batch_size=8,
                                            class_mode=None,
                                            shuffle = False,
                                            classes=['non-trypophobic','trypophobic'])

test_datagen = ImageDataGenerator(rescale=1./255, dim_ordering='th',
                                 zca_whitening=False, featurewise_center=False, featurewise_std_normalization=False)
test_data_generator = test_datagen.flow_from_directory('data_valid/',
                                            target_size=(resolution_x, resolution_y),
                                            batch_size=8,
                                            class_mode=None,
                                            shuffle = False,
                                            classes=['non-trypophobic','trypophobic'])

Found 1560 images belonging to 2 classes.
Found 320 images belonging to 2 classes.


In [4]:
def VGG_16(weights_path=None, num_of_conv_blocks = 1):
    
    model = Sequential()
    i = 0
    while(True):
        model.add(ZeroPadding2D((1,1),input_shape=(3,224,224)))
        model.add(Convolution2D(64, 3, 3, activation='relu'))
        model.add(ZeroPadding2D((1,1)))
        model.add(Convolution2D(64, 3, 3, activation='relu'))
        model.add(MaxPooling2D((2,2), strides=(2,2)))
        
        i+=1
        if(i == num_of_conv_blocks):
            break
        
        model.add(ZeroPadding2D((1,1)))
        model.add(Convolution2D(128, 3, 3, activation='relu'))
        model.add(ZeroPadding2D((1,1)))
        model.add(Convolution2D(128, 3, 3, activation='relu'))
        model.add(MaxPooling2D((2,2), strides=(2,2)))
        
        i+=1
        if(i == num_of_conv_blocks):
            break

        model.add(ZeroPadding2D((1,1)))
        model.add(Convolution2D(256, 3, 3, activation='relu'))
        model.add(ZeroPadding2D((1,1)))
        model.add(Convolution2D(256, 3, 3, activation='relu'))
        model.add(ZeroPadding2D((1,1)))
        model.add(Convolution2D(256, 3, 3, activation='relu'))
        model.add(MaxPooling2D((2,2), strides=(2,2)))
        
        i+=1
        if(i == num_of_conv_blocks):
            break

        model.add(ZeroPadding2D((1,1)))
        model.add(Convolution2D(512, 3, 3, activation='relu'))
        model.add(ZeroPadding2D((1,1)))
        model.add(Convolution2D(512, 3, 3, activation='relu'))
        model.add(ZeroPadding2D((1,1)))
        model.add(Convolution2D(512, 3, 3, activation='relu'))
        model.add(MaxPooling2D((2,2), strides=(2,2)))
        
        i+=1
        if(i == num_of_conv_blocks):
            break

        model.add(ZeroPadding2D((1,1)))
        model.add(Convolution2D(512, 3, 3, activation='relu'))
        model.add(ZeroPadding2D((1,1)))
        model.add(Convolution2D(512, 3, 3, activation='relu'))
        model.add(ZeroPadding2D((1,1)))
        model.add(Convolution2D(512, 3, 3, activation='relu'))
        model.add(MaxPooling2D((2,2), strides=(2,2)))
        
        break

    f = h5py.File(weights_path)
    for k in range(f.attrs['nb_layers']):
        if k >= len(model.layers):
            # we don't look at the last (fully-connected) layers in the savefile
            break
        g = f['layer_{}'.format(k)]
        weights = [g['param_{}'.format(p)] for p in range(g.attrs['nb_params'])]
        model.layers[k].set_weights(weights)
        model.layers[k].trainable = False
    f.close()
    print('Model loaded.')
    
    #model.add(Flatten())
    #model.add(Dense(4096, activation='relu'))
    #model.add(Dropout(0.5))
    #model.add(Dense(4096, activation='relu'))
    #model.add(Dropout(0.5))
    #model.add(Dense(1000, activation='softmax'))

    return model

#sgd = "adam"#SGD(lr=0.1, decay=1e-6, momentum=0.9, nesterov=True)
#model.compile(optimizer=sgd, loss='binary_crossentropy', metrics=['accuracy'])

In [5]:
#Get the pre-trained vgg model
vgg_model = VGG_16('vgg16_weights.h5', 5)

from keras_sequential_ascii import sequential_model_to_ascii_printout
sequential_model_to_ascii_printout(vgg_model)

Model loaded.
      OPERATION           DATA DIMENSIONS   WEIGHTS(N)   WEIGHTS(%)

          Input   #####   (3, 224, 224)
  ZeroPadding2D   \|||/ -------------------         0     0.0%
                  #####   (3, 226, 226)
  Convolution2D    \|/  -------------------      1792     0.0%
           relu   #####   (64, 224, 224)
  ZeroPadding2D   \|||/ -------------------         0     0.0%
                  #####   (64, 226, 226)
  Convolution2D    \|/  -------------------     36928     0.3%
           relu   #####   (64, 224, 224)
   MaxPooling2D   YYYYY -------------------         0     0.0%
                  #####   (64, 112, 112)
  ZeroPadding2D   \|||/ -------------------         0     0.0%
                  #####   (64, 114, 114)
  Convolution2D    \|/  -------------------     73856     0.5%
           relu   #####   (128, 112, 112)
  ZeroPadding2D   \|||/ -------------------         0     0.0%
                  #####   (128, 114, 114)
  Convolution2D    \|/  ------------------- 

In [6]:
def predict_generator_status(model, generator, number):
    res = []
    num = 0
    for batch in tqdm(generator):
        batch_res = model.predict_on_batch(batch)
        for prediction in batch_res:
            res.append(prediction)
        num+=generator.batch_size
        
        if num >= number:
            break
    return np.array(res)

1it [00:45, 45.76s/it]


(16, 512, 7, 7)

In [None]:
# Calculate bottleneck features of vgg model
bottleneck_features_train = predict_generator_status(vgg_model, train_data_generator, 1560)
np.save(open('bottleneck_features_train.npy', 'wb'), bottleneck_features_train)

print("Train data processed")

bottleneck_features_validation = predict_generator_status(vgg_model, test_data_generator, 320)
np.save(open('bottleneck_features_validation.npy', 'wb'), bottleneck_features_validation)

print("Test data processed")

32it [26:46, 49.44s/it]

In [None]:
bottleneck_features_train.shape

In [None]:
model.fit_generator(nb_val_samples=320, samples_per_epoch=1560, nb_epoch=5,
                    generator=train_data_generator,
                    validation_data=test_data_generator)

In [None]:
model.save("pretraining.h5")

In [None]:
weights = model.layers[0].W.get_value(borrow=True).reshape(8,10,10,3)
print (weights.shape)
fig, axes = plt.subplots(2,8, sharex=True, sharey=True)
plt.rcParams['figure.figsize'] = (10,5)
for i,w in enumerate(weights):
    axes[int(i/8)][int(i%8)].imshow(w)
    if(i == 31):
        break
plt.show()

In [None]:
np.random.seed(4)
sample_image = X_test[np.random.randint(0, len(X_test))]
plt.imshow(sample_image.reshape(resolution_x, resolution_y, 3))
plt.show()

In [None]:
convout1_f = theano.function(model.inputs, [model.layers[1].output])
sample_outs = convout1_f([sample_image.astype(np.float32)])[0][0]

fig, axes = plt.subplots(4,8, sharex=True, sharey=True)
plt.rcParams['figure.figsize'] = (10, 5)
for i,w in enumerate(sample_outs):
    axes[int(i/8)][int(i%8)].imshow(w, cmap="gray")
    #sns.heatmap(w)
    #plt.show()
plt.show()