In [1]:
from keras.applications.resnet50 import ResNet50
from keras.layers import Dense, GlobalAveragePooling2D, Dropout, BatchNormalization
from keras.models import Model
from keras.optimizers import Adam
from keras_resnet import ResnetModel
from data_helper import ImageGenerator, get_train_matrices, get_test_matrices, ValidGenerator
from sklearn.cross_validation import train_test_split

import gc
import seaborn as sns
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
import numpy as np
import pandas as pd
from itertools import chain

%matplotlib inline
%load_ext watermark
%config InlineBackend.figure_format = 'retina'

Using TensorFlow backend.


In [2]:
img_size = (224, 224)
img_channels = 3
output_size = 17
batch_size = 128
batches = 1 * 128 // batch_size

In [3]:
%watermark -vmp keras -g

CPython 3.5.2
IPython 5.1.0

keras 2.0.5

compiler   : GCC 4.2.1 Compatible Apple LLVM 4.2 (clang-425.0.28)
system     : Darwin
release    : 16.6.0
machine    : x86_64
processor  : i386
CPU cores  : 8
interpreter: 64bit
Git hash   : 724e22391b321f57298f4333b0928e6023d4849e


In [4]:
x_train, y_train, labels_map = get_train_matrices("/Users/westside/dev/python/planet-amazon-deforestation/input/train_v2.csv", "/Users/westside/dev/python/planet-amazon-deforestation/input/train-jpg", img_size)
X_train, X_valid, y_train, y_valid = train_test_split(x_train, y_train, test_size=0.2)
flow = ImageGenerator().get_train_generator(X_train, y_train)
validation_generator = ValidGenerator().get_valid_generator(X_valid, y_valid)
validation_steps = 1#len(y_valid) // 128 

100%|██████████| 40479/40479 [05:27<00:00, 123.48it/s]


In [5]:
from tensorflow.contrib.keras.api.keras.callbacks import ModelCheckpoint

filepath="resnet_weights.best.hdf5"
checkpoint = ModelCheckpoint(filepath, monitor='val_acc', verbose=1, save_best_only=True)

In [6]:
model = ResnetModel(output_size=output_size)
pretrain = False

if pretrain:
    learn_rates = [0.001]#, 0.0001, 0.00001, 0.000001]
    epochs_list = [1]#, 1, 1, 1]
    train_losses, val_losses = [], []

    for lr, epochs in zip(learn_rates, epochs_list):
        tmp_train_losses, tmp_val_losses, fbeta_score, thresholds = model.fit(flow, epochs, lr, validation_generator, y_valid, train_callbacks=[checkpoint], batches=batches)
        print("fbeta = {}".format(fbeta_score))
        train_losses += tmp_train_losses
        val_losses += tmp_val_losses
        
    learn_rates = [0.001]#, 0.0001, 0.00001, 0.000001]
    epochs_list = [1]#, 1, 1, 1]

    for lr, epochs in zip(learn_rates, epochs_list):
        tmp_train_losses, tmp_val_losses, fbeta_score, thresholds = model.fit(flow, epochs, lr, validation_generator,y_valid, resnet_layers_trainable=15,  train_callbacks=[checkpoint], batches=batches)
        print("fbeta = {}".format(fbeta_score))
        train_losses += tmp_train_losses
        val_losses += tmp_val_losses

  self.model = Model(input=self.resnet.input, output=predictions)


In [7]:
model.load_weights("resnet_weights.best.hdf5")
print("Weights loaded")

Weights loaded


resnet

In [8]:
plt.plot(train_losses, label='Training loss')
plt.plot(val_losses, label='Validation loss')
plt.legend();

NameError: name 'train_losses' is not defined

In [None]:
fbeta_score

In [None]:
del X_train, y_train
gc.collect()

x_test, x_test_filename = get_test_matrices("/Users/westside/dev/python/planet-amazon-deforestation/input/test-jpg", img_size)
predictions = model.predict(x_test)

del x_test
gc.collect()

x_test, x_test_filename_additional = get_test_matrices("/Users/westside/dev/python/planet-amazon-deforestation/input/test-jpg-additional", img_size)
new_predictions = model.predict(x_test)

del x_test
gc.collect()

predictions = np.vstack((predictions, new_predictions))
x_test_filename = np.hstack((x_test_filename, x_test_filename_additional))
print("Predictions shape: {}\nFiles name shape: {}\n1st predictions entry:\n{}".format(predictions.shape, 
                                                                              x_test_filename.shape,
                                                                              predictions[0]))

In [None]:
fb_score, thresholds = model.get_fbeta_score(validation_generator, y_valid, validation_steps)

tags_pred = np.array(predictions).T
_, axs = plt.subplots(5, 4, figsize=(15, 20))
axs = axs.ravel()

for i, tag_vals in enumerate(tags_pred):
    sns.boxplot(tag_vals, orient='v', palette='Set2', ax=axs[i]).set_title(labels_map[i])

In [None]:
predicted_labels = model.map_predictions(predictions, labels_map, thresholds)

In [None]:
tags_list = [None] * len(predicted_labels)
for i, tags in enumerate(predicted_labels):
    tags_list[i] = ' '.join(map(str, tags))

final_data = [[filename.split(".")[0], tags] for filename, tags in zip(x_test_filename, tags_list)]

In [None]:
final_df = pd.DataFrame(final_data, columns=['image_name', 'tags'])
final_df.head()

In [None]:
tags_s = pd.Series(list(chain.from_iterable(predicted_labels))).value_counts()
fig, ax = plt.subplots(figsize=(16, 8))
sns.barplot(x=tags_s, y=tags_s.index, orient='h');

In [None]:
final_df.to_csv('submission_file.csv', index=False)
model.close()