In [1]:
%matplotlib inline

In [2]:
from __future__ import division,print_function

import os, json
import shutil
from glob import glob
import random
import numpy as np
np.set_printoptions(precision=4, linewidth=100)
import pandas as pd
from matplotlib import pyplot as plt

In [3]:
from importlib import reload
import utils; reload(utils)
from utils import plots

 https://github.com/Theano/Theano/wiki/Converting-to-the-new-gpu-back-end%28gpuarray%29

Using gpu device 0: Tesla P100-PCIE-16GB (CNMeM is disabled, cuDNN not available)
Using Theano backend.


In [4]:
from keras.models import Sequential
from keras.layers.core import Flatten, Dense, Dropout, Lambda
from keras.layers.convolutional import Convolution2D, MaxPooling2D, ZeroPadding2D
from keras.utils.data_utils import get_file
from keras.preprocessing import image
from keras.optimizers import SGD, RMSprop, Adam, Nadam


## Create validation set and sample

In [5]:
NB_ROOT = os.getcwd()

In [6]:
DATA_HOME_DIR = os.path.join(NB_ROOT, "data/invasive-species-monitoring")
results_path = os.path.join('/mnt/data/invasive-species-monitoring', 'results/')

In [7]:
%cd $DATA_HOME_DIR
%mkdir -p valid
%mkdir -p results
%mkdir -p sample/train
%mkdir -p sample/test/unknown
%mkdir -p sample/valid
%mkdir -p sample/results
%mkdir -p test/unknown
%mkdir -p /mnt/data/invasive-species-monitoring/results

/mnt/ml/working/fastai-courses/deeplearning1/nbs/data/invasive-species-monitoring


In [8]:
def create_label_dirs(base_dir):
    """
    """
    # Create label directories thant can be recognized by Keras ImageDataGenerator.flow_from_directory    
    labels = ['invasive', 'not_invasive']
    for label in labels:
        try:
            os.makedirs(os.path.join(base_dir, label))
        except OSError as e:
            if e.errno != os.errno.EEXIST:
                raise

create_label_dirs('train')
create_label_dirs('valid')
create_label_dirs('sample/train')
create_label_dirs('sample/valid')

In [9]:
train_labels_csv = pd.read_csv("train_labels.csv")

In [10]:
def label_images_by_dir(base_dir="train"):
    """
    Move images into their label directory so that they can be recognized by
    ImageDataGenerator.flow_from_directory
    """
    for _, row in train_labels_csv.iterrows():
        image_name = "{}.jpg".format(row['name'])
        src_path = os.path.join(base_dir, image_name)
        if row['invasive'] == 1:
            dst_path = os.path.join(base_dir, 'invasive', image_name)
        else:
            dst_path = "train/not_invasive/{}".format(image_name)
        if os.path.exists(src_path):
            shutil.move(src_path, dst_path)
# label_images_by_dir()

In [11]:
def create_valid_set(train_root='train', valid_root="valid", valid_rate=0.1):
    labels = ['invasive', 'not_invasive']
    for label in labels:
        train_label_dir = os.path.join(train_root, label)
        valid_label_dir = os.path.join(valid_root, label)
        files = os.listdir(train_label_dir)
        for file in random.sample(files, k=int(len(files) * valid_rate)):
            shutil.move(os.path.join(train_label_dir, file), valid_label_dir)
# create_valid_set()

In [12]:
def create_sample_set(rate=0.01):
    labels = ['invasive', 'not_invasive', 'unknown']
    for dataset in ['train', 'valid', 'test']:
        if dataset == "valid":
            # use a higher smple rate for the validation dataset
            # because they have fewer items
            sample_rate = rate*5
        else:
            sample_rate = rate
        for label in labels:
            src_dir = os.path.join(dataset, label)
            dst_dir = os.path.join('sample', dataset, label)
            try:
                files = os.listdir(src_dir)
            except FileNotFoundError:
                continue
            for file in random.sample(files, k=int(len(files) * sample_rate)):
                shutil.copy(os.path.join(src_dir, file), dst_dir)

# create_sample_set()

# Build Model

In [13]:
from keras.models import Sequential, Model, load_model
from keras import applications
from keras import optimizers
from keras.layers import Dropout, Flatten, Dense

In [14]:
target_size = (600, 450)
# target_size = (224, 224)

In [15]:
def build_model():
    img_rows, img_cols = target_size
    img_channel = 3
    base_model = applications.VGG16(weights='imagenet', include_top=False, input_shape=(img_channel,img_rows, img_cols))
    base_model
    
    for layer in base_model.layers:
        layer.trainable = False
    
    add_model = Sequential()
    add_model.add(Flatten(input_shape=base_model.output_shape[1:]))
    add_model.add(Dense(256, activation='relu'))
    add_model.add(Dense(2, activation='softmax'))

    model = Model(inputs=base_model.input, outputs=add_model(base_model.output))
    model.compile(loss='categorical_crossentropy', optimizer=optimizers.SGD(lr=1e-4, momentum=0.9),
                  metrics=['accuracy'])

    print(model.summary())
    return model

model = build_model()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         (None, 3, 600, 450)       0         
_________________________________________________________________
block1_conv1 (Conv2D)        (None, 64, 600, 450)      1792      
_________________________________________________________________
block1_conv2 (Conv2D)        (None, 64, 600, 450)      36928     
_________________________________________________________________
block1_pool (MaxPooling2D)   (None, 64, 300, 225)      0         
_________________________________________________________________
block2_conv1 (Conv2D)        (None, 128, 300, 225)     73856     
_________________________________________________________________
block2_conv2 (Conv2D)        (None, 128, 300, 225)     147584    
_________________________________________________________________
block2_pool (MaxPooling2D)   (None, 128, 150, 112)     0         
__________

# Train

In [16]:
_path = DATA_HOME_DIR
# _path = DATA_HOME_DIR + '/sample' # Only for sample tests!
test_path = os.path.join(DATA_HOME_DIR, 'test')
train_path = os.path.join(_path, 'train')
valid_path = os.path.join(_path, 'valid')
test_path = os.path.join(_path, 'test')

In [17]:
def get_batches(path, gen=image.ImageDataGenerator(), shuffle=True, batch_size=8, target_size=(224,224), class_mode='categorical'):
        """
            Takes the path to a directory, and generates batches of augmented/normalized data. Yields batches indefinitely, in an infinite loop.

            See Keras documentation: https://keras.io/preprocessing/image/
        """
        # 224x224 is the image size used by ImageNet
        return gen.flow_from_directory(path, target_size=target_size,
                class_mode=class_mode, shuffle=shuffle, batch_size=batch_size)

In [18]:
! cd $NB_ROOT
BATCH_SIZE = 32

trans_gen = image.ImageDataGenerator(
    rotation_range=10,
    width_shift_range=0.1, height_shift_range=0.1,
    horizontal_flip=True,
)
 
train_batches = get_batches(train_path, gen=trans_gen, batch_size=BATCH_SIZE, target_size=target_size)
valid_batches = get_batches(valid_path, batch_size=BATCH_SIZE*2, target_size=target_size)

Found 2067 images belonging to 2 classes.
Found 228 images belonging to 2 classes.


In [19]:
def fit_model(model, no_of_epochs = 10):
    latest_weights_filename = None
    for epoch in range(no_of_epochs):
        print("Running epoch: %d" % epoch)
        model.fit_generator(
            train_batches,
            epochs=1,
            steps_per_epoch=train_batches.samples//BATCH_SIZE,
            validation_data=valid_batches, validation_steps=valid_batches.samples//BATCH_SIZE,
        )
        latest_weights_filename = 'ft%d.h5' % epoch
        model.save_weights(os.path.join(results_path, latest_weights_filename))
        print("Completed %s fit operations" % epoch)

In [20]:
fit_model(model, 20)

Running epoch: 0
Epoch 1/1
Completed 0 fit operations
Running epoch: 1
Epoch 1/1
Completed 1 fit operations
Running epoch: 2
Epoch 1/1
Completed 2 fit operations
Running epoch: 3
Epoch 1/1
Completed 3 fit operations
Running epoch: 4
Epoch 1/1
Completed 4 fit operations
Running epoch: 5
Epoch 1/1
Completed 5 fit operations
Running epoch: 6
Epoch 1/1
Completed 6 fit operations
Running epoch: 7
Epoch 1/1
Completed 7 fit operations
Running epoch: 8
Epoch 1/1
Completed 8 fit operations
Running epoch: 9
Epoch 1/1
Completed 9 fit operations
Running epoch: 10
Epoch 1/1
Completed 10 fit operations
Running epoch: 11
Epoch 1/1
Completed 11 fit operations
Running epoch: 12
Epoch 1/1
Completed 12 fit operations
Running epoch: 13
Epoch 1/1
Completed 13 fit operations
Running epoch: 14
Epoch 1/1
Completed 14 fit operations
Running epoch: 15
Epoch 1/1
Completed 15 fit operations
Running epoch: 16
Epoch 1/1
Completed 16 fit operations
Running epoch: 17
Epoch 1/1
Completed 17 fit operations
Running epoc

In [31]:
fit_model(model, 20)

Running epoch: 0
Epoch 1/1
Completed 0 fit operations
Running epoch: 1
Epoch 1/1
Completed 1 fit operations
Running epoch: 2
Epoch 1/1
Completed 2 fit operations
Running epoch: 3
Epoch 1/1
Completed 3 fit operations
Running epoch: 4
Epoch 1/1
Completed 4 fit operations
Running epoch: 5
Epoch 1/1
Completed 5 fit operations
Running epoch: 6
Epoch 1/1
Completed 6 fit operations
Running epoch: 7
Epoch 1/1
Completed 7 fit operations
Running epoch: 8
Epoch 1/1
Completed 8 fit operations
Running epoch: 9
Epoch 1/1
Completed 9 fit operations
Running epoch: 10
Epoch 1/1
Completed 10 fit operations
Running epoch: 11
Epoch 1/1
Completed 11 fit operations
Running epoch: 12
Epoch 1/1
Completed 12 fit operations
Running epoch: 13
Epoch 1/1
Completed 13 fit operations
Running epoch: 14
Epoch 1/1
Completed 14 fit operations
Running epoch: 15
Epoch 1/1
Completed 15 fit operations
Running epoch: 16
Epoch 1/1
Completed 16 fit operations
Running epoch: 17
Epoch 1/1
Completed 17 fit operations
Running epoc

In [36]:
fit_model(model, 20)

Running epoch: 0
Epoch 1/1
Completed 0 fit operations
Running epoch: 1
Epoch 1/1
Completed 1 fit operations
Running epoch: 2
Epoch 1/1
Completed 2 fit operations
Running epoch: 3
Epoch 1/1
Completed 3 fit operations
Running epoch: 4
Epoch 1/1
Completed 4 fit operations
Running epoch: 5
Epoch 1/1
Completed 5 fit operations
Running epoch: 6
Epoch 1/1
Completed 6 fit operations
Running epoch: 7
Epoch 1/1
Completed 7 fit operations
Running epoch: 8
Epoch 1/1
Completed 8 fit operations
Running epoch: 9
Epoch 1/1
Completed 9 fit operations
Running epoch: 10
Epoch 1/1
Completed 10 fit operations
Running epoch: 11
Epoch 1/1
Completed 11 fit operations
Running epoch: 12
Epoch 1/1
Completed 12 fit operations
Running epoch: 13
Epoch 1/1
Completed 13 fit operations
Running epoch: 14
Epoch 1/1
Completed 14 fit operations
Running epoch: 15
Epoch 1/1
Completed 15 fit operations
Running epoch: 16
Epoch 1/1
Completed 16 fit operations
Running epoch: 17
Epoch 1/1
Completed 17 fit operations
Running epoc

KeyboardInterrupt: 

In [45]:
model.load_weights(os.path.join(results_path, 'ft15.h5'))

# Predict

In [46]:
test_batches = get_batches(
    test_path, batch_size=BATCH_SIZE * 2, target_size=target_size, shuffle=False, class_mode=None)

Found 1531 images belonging to 1 classes.


In [47]:
def predict(model, test_batches):
    return model.predict_generator(test_batches, test_batches.samples)

In [48]:
preds = predict(model, test_batches)

In [49]:
sbm = pd.DataFrame(preds, columns=["invasive","not invasive"])
sbm['name'] = [int(f.replace('unknown/', '').replace('.jpg', '')) for f in test_batches.filenames]
sbm = sbm.set_index(['name'])
sbm = sbm.sort_index()

In [50]:
sbm.to_csv('submission.csv', columns=['invasive'])

In [51]:
!kg submit -c invasive-species-monitoring submission.csv

0.97237
