In [35]:
# load required packages
from keras.applications import *
from keras.models import *
from keras.preprocessing import image
from keras.preprocessing.image import *
from keras.layers import Dense,Flatten,Dropout,BatchNormalization
import h5py as h
import numpy as np
from sklearn.utils import shuffle
from keras import backend as K
from keras.utils import to_categorical
import pandas as pd
import PIL.Image

In [8]:
path = '/home/ec2-user/dogs_cats/'

In [9]:
batch_size = 64

In [10]:
def compute_step(sample,batch_size):
    if sample%batch_size == 0:
        step = sample//batch_size
    else:
        step = sample//batch_size + 1
    return step

In [11]:
# extract features
def extract_features(model,path,gen_arg,input_size,batch_size):
    #preprocessing data
    gen = ImageDataGenerator(preprocessing_function = gen_arg)
    train_gen = gen.flow_from_directory(path+'train',shuffle=False,target_size=(input_size,input_size),
                                       batch_size=batch_size,class_mode='binary')
    test_gen = gen.flow_from_directory(path+'test',shuffle=False,target_size=(input_size,input_size),
                                      batch_size=batch_size,class_mode=None)
    
    #create pre-train model
    base_model = model(weights='imagenet',include_top=False,pooling='avg')
    pre_model = Model(inputs=base_model.input,outputs=base_model.output)
    #extract features
    trn_features = pre_model.predict_generator(train_gen,steps=compute_step(train_gen.n,batch_size),verbose=1)
    test_features = pre_model.predict_generator(test_gen,steps=compute_step(test_gen.n,batch_size),verbose=1)
    trn_label = to_categorical(train_gen.classes,num_classes=2)
    #file_name = test_gen.filenames
    return trn_features,test_features,trn_label
    

In [14]:
def save_file(filename,x,test,y):
    f = h.File(filename,'w')
    f.create_dataset('x_train',data=x)
    f.create_dataset('x_test',data=test)
    f.create_dataset('y_train',data=y)

In [131]:
def get_filename(gen=ImageDataGenerator()):
    test_gen = gen.flow_from_directory(path+'test',shuffle=False,target_size=(224,224),
                                      batch_size=64,class_mode=None)
    return test_gen.filenames

In [12]:
xception_x,xception_test,xception_y = extract_features(Xception,path,gen_arg=xception.preprocess_input,
                                     input_size=299,batch_size=batch_size)

Found 25000 images belonging to 2 classes.
Found 12500 images belonging to 1 classes.


In [18]:
save_file('xception_features.h5',xception_x,xception_test,xception_y)

In [20]:
Inresnet_x,Inresnet_test,Inresnet_y = extract_features(InceptionResNetV2,path,gen_arg=inception_resnet_v2.preprocess_input,
                                                      input_size=299,batch_size=batch_size)

Found 25000 images belonging to 2 classes.
Found 12500 images belonging to 1 classes.


In [21]:
save_file('Inresnet_features.h5',Inresnet_x,Inresnet_test,Inresnet_y)

In [22]:
resnet_x,resnet_test,resnet_y = extract_features(ResNet50,path,gen_arg=resnet50.preprocess_input,
                                                input_size=224,batch_size=batch_size)

Found 25000 images belonging to 2 classes.
Found 12500 images belonging to 1 classes.


In [23]:
save_file('resnet_features.h5',resnet_x,resnet_test,resnet_y)

In [24]:
densenet_x,densenet_test,densenet_y = extract_features(DenseNet201,path,gen_arg=densenet.preprocess_input,
                                                input_size=224,batch_size=batch_size)

Found 25000 images belonging to 2 classes.
Found 12500 images belonging to 1 classes.
Downloading data from https://github.com/fchollet/deep-learning-models/releases/download/v0.8/densenet201_weights_tf_dim_ordering_tf_kernels_notop.h5


In [25]:
save_file('DenseNet_features.h5',densenet_x,densenet_test,densenet_y)

In [40]:
#prepare data for new model
x_train = np.concatenate((Inresnet_x,resnet_x,xception_x,densenet_x),axis=1)
x_test = np.concatenate((Inresnet_test,resnet_test,xception_test,densenet_test),axis=1)
y_train = resnet_y 

#x_train,y_train = shuffle(x_train,y_train)

In [41]:
x_train.shape

(25000, 7552)

In [124]:
#create classifier
input_tensor = Input(x_train.shape[1:])
#x = Dense(1024,activation='relu')(input_tensor)
x = Dropout(0.8)(input_tensor)
pred = Dense(2,activation='softmax')(x)
model = Model(inputs=input_tensor,outputs=pred)
model.compile(optimizer='Adam',loss='categorical_crossentropy',metrics=['accuracy'])

In [125]:
#train model
model.fit(x_train,y_train,batch_size = 256,epochs = 15,validation_split=0.2)

Train on 20000 samples, validate on 5000 samples
Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15


<keras.callbacks.History at 0x7f6ad0d76128>

In [126]:
#predict
prediction = model.predict(x_test,verbose=1)



In [147]:
#clip before submiting
pred = prediction.clip(min=0.005,max=0.995)

In [148]:
is_dog = pred[:,1]

In [149]:
filenames = get_filename()

Found 12500 images belonging to 1 classes.


In [150]:
index = [int(fname[fname.rfind('/')+1:fname.rfind('.')]) for fname in filenames]

In [151]:
sub = np.stack([index,is_dog],axis=1)

In [152]:
submission_Inresnet_resnet_X_DenseNet = pd.DataFrame(sub,columns=['id','label'])

In [153]:
submission_Inresnet_resnet_X_DenseNet.id = submission_Inresnet_resnet_X_DenseNet.id.astype(int)

In [154]:
submission_Inresnet_resnet_X_DenseNet.to_csv('submission_Inresnet_resnet_X_DenseNet.csv',index=None)