In [None]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import os
print(os.listdir("../input"))
import pandas as pd
import tensorflow as tf
tfk=tf.contrib.keras
import os
import re

### Separate cat and dog files and plot

In [None]:
def get_cat_and_dog_files(data):
    cat_files=[]
    dog_files=[]
    for i in data:
        result=re.search('cat',i)
        result1=re.search('dog',i)
        if result != None:
            if result.group()=='cat':
                cat_files.append(i)
        if result1 != None:
            if result1.group()=='dog':
                dog_files.append(i)
    return cat_files, dog_files

data=os.listdir('../input/train')
cat,dog=get_cat_and_dog_files(data)

import matplotlib.pyplot as plt
%matplotlib inline

def plot_images(files):
    image=[]
    for i in range(len(files)):
        k=tfk.preprocessing.image.load_img('../input/train/{}'.format(files[i]),target_size=(255,255,3))
        image.append(k)
    for i in image:
        plt.figure()
        plt.imshow(i)

plot_images(cat[0:5])

### Split the train and test data

In [None]:
from sklearn.cross_validation import train_test_split

cat_train,cat_test=train_test_split(cat)
dog_train,dog_test=train_test_split(dog)

print(len(cat_train))
print(len(cat_test))
print(len(dog_train))
print(len(dog_test))

### Define the VGG model from scratch

In [None]:
tf.reset_default_graph()
model_params={
    'num_classes':10,
    'input_shape':(32,32,3),
    'conv_kernel':(3,3),
    'pool_kernel':(2,2),
    'batch_size':32
}
hyper_params={
    'l2_regularization':0.0005,
    'dropout':0.5,
    'learning_rate':0.0001
}
def conv2d(filters):
    return tfk.layers.Conv2D(filters,
                             model_params['conv_kernel'],
                             padding='same',
                             activation=tf.nn.relu,
                             kernel_regularizer=tf.contrib.layers.l2_regularizer(hyper_params['l2_regularization']))
def convBlockFunction(filters,inputs):
    net=conv2d(filters)(inputs)
    net=tfk.layers.BatchNormalization()(net)
    net=conv2d(filters)(net)
    net=tfk.layers.MaxPool2D(model_params['pool_kernel'])(net)
    return tfk.layers.Dropout(0.25)(net)
def convBlock(filters):
    def convBlockCurried(inputs):
        return convBlockFunction(filters,inputs)
    return convBlockCurried
def denseBlockFunction(units,inputs):
    net=tfk.layers.Dense(units)(inputs)
    return tfk.layers.Dropout(hyper_params['dropout'])(net)

def denseBlock(units):
    def denseBlockCurried(inputs):
        return denseBlockFunction(units,inputs)
    return denseBlockCurried
def VGG():
    tf.reset_default_graph()
    inputs=tfk.layers.Input(shape=model_params['input_shape'])
    net=convBlock(64)(inputs)
    net=convBlock(128)(net)
    net=convBlock(256)(net)
    
    net=tfk.layers.Flatten()(net)
    net=denseBlock(1024)(net)
    net=denseBlock(512)(net)
    
    outputs=tfk.layers.Dense(model_params['num_classes'],activation=tf.nn.softmax)(net)
    model=tfk.models.Model(inputs=inputs,outputs=outputs)
    return model
def compile_model(model):
    loss=tfk.losses.categorical_crossentropy
    optimizer=tfk.optimizers.Adam(lr=hyper_params['learning_rate'])
    metrics=[tfk.metrics.categorical_accuracy]
    
    model.compile(loss=loss,optimizer=optimizer,metrics=metrics)
    print(model.summary())
    
    return model

In [None]:
vgg=VGG()
vgg=compile_model(vgg)

### train and predict the VGG model on the cifar dataset with Augmentation and using generator

In [None]:
(x_train,y_train),(x_test,y_test)=tfk.datasets.cifar10.load_data()
y_train1=tfk.utils.to_categorical(y_train,model_params['num_classes'])
y_test1=tfk.utils.to_categorical(y_test,model_params['num_classes'])

train_datagen=tfk.preprocessing.image.ImageDataGenerator(
rescale=1./255,
shear_range=0.2,
zoom_range=0.2,
horizontal_flip=True)
test_datagen=tfk.preprocessing.image.ImageDataGenerator(
rescale=1./255)

train_generator=train_datagen.flow(x=x_train,y=y_train1,batch_size=model_params['batch_size'])
validation_generator=test_datagen.flow(x=x_test,y=y_test1,batch_size=model_params['batch_size'])



In [None]:
history=vgg.fit_generator(train_generator,steps_per_epoch=300,epochs=10,validation_data=validation_generator,validation_steps=100)

### Using already existing VGG model in tf.contrib.keras.applications

In [None]:
model=tfk.applications.VGG19(input_shape=(224,224,3))

def make_prediction(img_path):
    img=tfk.preprocessing.image.load_img(img_path,target_size=(224,224))
    x=tfk.preprocessing.image.img_to_array(img)
    x=np.expand_dims(x,axis=0)
    x=tfk.applications.vgg19.preprocess_input(x)
    preds=model.predict(x)
    result=tfk.applications.vgg19.decode_predictions(preds,top=3)[0]
    print('predictions: \n{}'.format(result))

train_size=20000
test_size=5000
input_shape=(150,150,3)
batch_size=32

### Transfer learning, removing top layer in VGG and training the remaing model adding one dense layer on top of it

In [None]:
train_size=20000
test_size=5000
input_shape=(150,150,3)
batch_size=32

model=tfk.applications.VGG16(include_top=False)

train_dir='../input/train'
test_dir='../input/test'

len(os.listdir('../input/test'))

In [None]:
def load_data(image_files):
    images=[]
    for i in image_files:
        image_path='../input/train/{}'.format(i)
        img=tfk.preprocessing.image.load_img(image_path,target_size=(150,150,3))
        img=tfk.preprocessing.image.img_to_array(img)
        images.append(img)
    return np.asarray(images)

train_dog=load_data(dog_train)

test_dog=load_data(dog_test)

train_cat=load_data(cat_train)
test_cat=load_data(cat_test)

def extract_features(model,images):
    features=model.predict(images)
    return features

train_data_cat=extract_features(model,train_cat)
train_data_dog=extract_features(model,train_dog)
test_data_cat=extract_features(model,test_cat)
test_data_dog=extract_features(model,test_dog)

train_data=np.vstack([train_data_cat,train_data_dog])

test_data=np.vstack([test_data_cat,test_data_dog])

train_labels=np.array([0]*train_data_cat.shape[0]+
                     [1]*train_data_dog.shape[0])

test_labels=np.array([0]*test_data_cat.shape[0]+
                     [1]*test_data_dog.shape[0])

def dnn():
    inputs=tfk.layers.Input(shape=(4,4,512))
    net=tfk.layers.Flatten()(inputs)
    net=tfk.layers.Dense(256,activation=tf.nn.relu)(net)
    net=tfk.layers.Dropout(0.6)(net)
    outputs=tfk.layers.Dense(1,activation=tf.nn.sigmoid)(net)
    model=tfk.models.Model(inputs=inputs,outputs=outputs)
    return model
def compile_model(model):
    optimizer=tfk.optimizers.RMSprop(lr=0.0001)
    model.compile(loss=tfk.losses.binary_crossentropy,
                 optimizer=optimizer,
                 metrics=[tfk.metrics.binary_accuracy])
    print(model.summary())
    return model

In [None]:
model=dnn()

In [None]:
compile_model(model)

In [None]:
history=model.fit(x=train_data,y=train_labels,batch_size=32,verbose=2,epochs=10,validation_data=(test_data,test_labels))

In [None]:
model.evaluate(test_data,test_labels,batch_size=32,verbose=1)

In [None]:
history.history

In [None]:
import matplotlib.pyplot as plt
%matplotlib inline
plt.figure()
plt.plot(history.history['binary_accuracy'])
plt.plot(history.history['val_binary_accuracy'])
plt.title('vgg_accuracy')
plt.xlabel('epoch')

In [None]:
train_data.shape

In [None]:
train_labels.shape

In [None]:
os.listdir()

In [None]:
extract_features(model,test_cat,'test_cat')
extract_features(model,test_dog,'test_dog')

In [None]:
extract_features(model,train_dog,'train_dog')