# Trial run on small dataset. 

In [31]:
import numpy as np
from keras.preprocessing.image import ImageDataGenerator,array_to_img, img_to_array, load_img
from keras.models import Sequential
from keras.layers import Dropout, Flatten, Dense
from keras import applications
from keras.models import load_model
from matplotlib import pyplot as plt

## Initialize variables

In [2]:
train_dir = 'train'
validation_dir = 'validation'

In [3]:
img_width, img_height = 150, 150 #Image dimensions
train_samples = 2000 #number of training examples. 1000 in each group. 
validation_samples = 800 # number of validation examples. 400 in each group. 
epochs = 50
batch_size = 16

## Load VGG16 with weights

In [14]:
def save_features_from_vgg():
    vgg_model = applications.VGG16(include_top=False, weights='imagenet') #VGG16 load the model without top layer. 
    #Data generators so that I can extract the features without the top layer using weights from VGG16
    datagenerator = ImageDataGenerator(rescale=1. / 255)
    train_generator = datagenerator.flow_from_directory(
        train_dir,
        target_size=(img_width, img_height),
        batch_size=batch_size,
        class_mode=None,
        shuffle=False)
    validation_generator = datagenerator.flow_from_directory(
        validation_dir,
        target_size=(img_width, img_height),
        batch_size=batch_size,
        class_mode=None,
        shuffle=False)
    #Predict features
    features_train = vgg_model.predict_generator(
        train_generator, 
        train_samples // batch_size)
    features_validation = vgg_model.predict_generator(
        validation_generator, 
        validation_samples // batch_size)
    #since we have equal number of samples in both categories and we used shuffle = FALSE in our datagenerator above. Thus, we can just make the labels in this case.  
    return features_train,features_validation

## Make top layer and train it

In [26]:
def train_last_layer(features_train,train_labels,features_validation,validation_labels):
    #Make the top layer. 
    model = Sequential()
    model.add(Flatten(input_shape=features_train.shape[1:]))
    model.add(Dense(256, activation='relu'))
    model.add(Dropout(0.5))
    model.add(Dense(1, activation='sigmoid'))#signmoid since we have only two classes in our case - Peak or no peak. 

    model.compile(optimizer='rmsprop',
                  loss='binary_crossentropy', 
                  metrics=['accuracy']) #compile with rmsprop. We can also use Adam - seems to be doing about the same performance. 
    #Train the top layer with training and validation data. 
    model.fit(features_train, train_labels,
              epochs=epochs,
              batch_size=batch_size,
              validation_data=(features_validation, validation_labels))
    model.save_weights('vgg16_transfer_top_layer.h5')
    model.save('vgg16_transfer_top_model.h5')

## Run functions to train model

In [15]:
features_train,features_validation = save_features_from_vgg()

Found 2000 images belonging to 2 classes.
Found 800 images belonging to 2 classes.


Since shuffle was off and there was equal number of both classes we can just hand make the labels

In [24]:
train_labels = np.array([0] * int(train_samples / 2) + [1] * int(train_samples / 2))
validation_labels = np.array([0] * int(validation_samples / 2) + [1] * int(validation_samples / 2))

In [27]:
train_last_layer(features_train,train_labels,features_validation,validation_labels)

Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.

Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where
Train on 2000 samples, validate on 800 samples
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


## Make predict function

In [45]:
def predict_image(image,model_file):
    img = load_img(image,target_size=(150,150))
    img = img_to_array(img)
    img = np.expand_dims(img, axis=0)
    vgg_model = applications.VGG16(include_top=False, weights='imagenet')
    model = load_model(model_file)
    features = vgg_model.predict(img)
    predictVal = model.predict(features)
    if predictVal[0][0] == 0:
        print ("It is not a peak.")
    else:
        print ("It is a peak.")

## Example run

In [49]:
predict_image("test/peak.png", "vgg16_transfer_top_model.h5")

It is a peak.
