In [1]:
import numpy as np
import pandas as pd
import os
import gc

import keras as k

import matplotlib.pyplot as plt
import cv2
from tqdm import tqdm
from multiprocessing import Pool, cpu_count    

Using TensorFlow backend.


In [2]:
#%matplotlib inline

In [3]:
#np.empty((1, 128, 128, 3), dtype=np.float16).nbytes * 200000 / (1024*1024)

# 1. Data Preprocessing

This project is about mapping each images to multiple category. Labels provided are in string, therefore, we convert them into a binary array, such as "primary blooming" into [1,0,0,0,1,0,......]. Also, the images are read, normalize and augmented with rotation in parallel into a huge array in RAM.

In [2]:
inv_label_map = ['blow_down',
 'bare_ground',
 'conventional_mine',
 'blooming',
 'cultivation',
 'artisinal_mine',
 'haze',
 'primary',
 'slash_burn',
 'habitation',
 'clear',
 'road',
 'selective_logging',
 'partly_cloudy',
 'agriculture',
 'water',
 'cloudy']

label_map = {'agriculture': 14,
 'artisinal_mine': 5,
 'bare_ground': 1,
 'blooming': 3,
 'blow_down': 0,
 'clear': 10,
 'cloudy': 16,
 'conventional_mine': 2,
 'cultivation': 4,
 'habitation': 9,
 'haze': 6,
 'partly_cloudy': 13,
 'primary': 7,
 'road': 11,
 'selective_logging': 12,
 'slash_burn': 8,
 'water': 15}

In [3]:
img_height = 128
img_width  = 128

In [4]:
df_train = pd.read_csv('../input/train_v2.csv')

flatten = lambda l: [item for sublist in l for item in sublist]
labels = list(set(flatten([l.split(' ') for l in df_train['tags'].values])))

#key not same cannot save keras model
#label_map = {l: i for i, l in enumerate(labels)}
#inv_label_map = {i: l for l, i in label_map.items()}

Y = np.empty((df_train.shape[0]*5, 17), dtype=np.uint8)
i=0
for tags in tqdm(df_train['tags'].values, miniters=1000):
    targets = np.zeros(17)
    for t in tags.split(' '):
        targets[label_map[t]] = 1 
    for j in range(i,i+5):
        Y[j,:] = targets
    i += 5
print(Y.shape)    

def get_images(names):
    i = 0
    X = np.empty((names.shape[0]*5, img_height, img_width, 3), dtype=np.float16)
    for f in tqdm(names.values, miniters=1000):
        img = cv2.imread('../input/train-jpg/{}.jpg'.format(f))
        if img_height != img.shape[0]:
            img = cv2.resize(img, (img_height, img_width))
        X[i,:,:,:] = np.array(img, np.float16)
        X[i+1,:,:,:] = np.array(cv2.flip(img, 0), np.float16)
        X[i+2,:,:,:] = np.array(cv2.flip(img, 1), np.float16)
        M = cv2.getRotationMatrix2D((img_height/2,img_width/2),90,1)
        X[i+3,:,:,:] = np.array(cv2.warpAffine(img, M, (img_height, img_width)))
        N = cv2.getRotationMatrix2D((img_height/2,img_width/2),-90,1)
        X[i+4,:,:,:] = np.array(cv2.warpAffine(img, N, (img_height, img_width)))
        i += 5
    return X / 255.

i = 0
names = df_train['image_name']
X = np.empty((names.shape[0]*5, img_height, img_width, 3), dtype=np.float16)
for f in tqdm(names.values, miniters=1000):
    img = cv2.imread('../input/train-jpg/{}.jpg'.format(f))
    if img_height != img.shape[0]:
        img = cv2.resize(img, (img_height, img_width))
    X[i,:,:,:] = np.array(img, np.float16)
    X[i+1,:,:,:] = np.array(cv2.flip(img, 0), np.float16)
    X[i+2,:,:,:] = np.array(cv2.flip(img, 1), np.float16)
    M = cv2.getRotationMatrix2D((img_height/2,img_width/2),90,1)
    X[i+3,:,:,:] = np.array(cv2.warpAffine(img, M, (img_height, img_width)))
    N = cv2.getRotationMatrix2D((img_height/2,img_width/2),-90,1)
    X[i+4,:,:,:] = np.array(cv2.warpAffine(img, N, (img_height, img_width)))
    i += 5
X = X / 255.

#multiply cpu_count if cannot fit memory
'''pool = Pool(16)
X = pool.map(
    get_images, 
    np.array_split(df_train['image_name'], 16)
)
pool.close()
pool.join()'''

print(X.shape)

100%|██████████| 40479/40479 [00:00<00:00, 141901.31it/s]
  0%|          | 0/40479 [00:00<?, ?it/s]

(202395, 17)


100%|██████████| 40479/40479 [02:13<00:00, 303.79it/s]


(202395, 128, 128, 3)


# 2. Model Building

The model used in this file is a 14-layers convolutional neural network with binary cross entropy as loss function and Adam Optimizer. The code will stop the training if loss function is not improved for 2 consecutive epochs and a checkpoint is saved whenever best performance is achieved. Finally, the performance is measured in fbeta.

In [5]:
from sklearn.model_selection import train_test_split

In [6]:
split = 35000
#x_train, x_valid, y_train, y_valid = X[:split], X[split:], Y[:split], Y[split:]
x_train, x_valid, y_train, y_valid = train_test_split(X, Y, test_size=0.2, random_state=42)

In [4]:
from keras import backend as K
from keras.models import Sequential
from keras.layers import Dense, Dropout, Flatten
from keras.layers import Conv2D, MaxPooling2D, BatchNormalization
from keras.callbacks import EarlyStopping, ModelCheckpoint
from keras.optimizers import Adam
from keras.preprocessing.image import ImageDataGenerator

In [5]:
def fbeta(y_true, y_pred):
    beta = 2
    threshold_shift = -0.3

    # just in case of hipster activation at the final layer
    y_pred = K.clip(y_pred, 0, 1)

    # shifting the prediction threshold from .5 if needed
    y_pred_bin = K.round(y_pred + threshold_shift)

    tp = K.sum(K.round(y_true * y_pred_bin), axis=1) + K.epsilon()
    fp = K.sum(K.round(K.clip(y_pred_bin - y_true, 0, 1)), axis=1)
    fn = K.sum(K.round(K.clip(y_true - y_pred, 0, 1)), axis=1)

    precision = tp / (tp + fp)
    recall = tp / (tp + fn)

    beta_squared = beta ** 2
    return K.mean((beta_squared + 1) * (precision * recall) / (beta_squared * precision + recall + K.epsilon()))

In [10]:
train_generator = ImageDataGenerator(
                        rotation_range=180,
                        width_shift_range=0.2, 
                        height_shift_range=0.2,
                        shear_range=0.2,
                        zoom_range=2,
                    )

In [6]:
model = Sequential()
model.add(BatchNormalization(input_shape=(128, 128, 3)))

model.add(Conv2D(32, kernel_size=(3, 3),padding='same', activation='relu'))
model.add(Conv2D(32, (3, 3), activation='relu'))
model.add(Conv2D(32, (3, 3), activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))

model.add(Conv2D(64, kernel_size=(3, 3),padding='same', activation='relu'))
model.add(Conv2D(64, (3, 3), activation='relu'))
model.add(Conv2D(64, (3, 3), activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))

model.add(Conv2D(128, kernel_size=(3, 3),padding='same', activation='relu'))
model.add(Conv2D(128, (3, 3), activation='relu'))
model.add(Conv2D(128, (3, 3), activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))

model.add(Conv2D(256, kernel_size=(3, 3),padding='same', activation='relu'))
model.add(Conv2D(256, (3, 3), activation='relu'))
model.add(Conv2D(256, (3, 3), activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))

model.add(Conv2D(512, kernel_size=(3, 3),padding='same', activation='relu'))
model.add(Conv2D(512, (3, 3), activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))

model.add(Flatten())
model.add(Dense(512, activation='relu'))
model.add(BatchNormalization())
model.add(Dropout(0.5))
model.add(Dense(512, activation='relu'))
model.add(BatchNormalization())
model.add(Dropout(0.5))
model.add(Dense(17, activation='sigmoid'))

In [7]:
opt  = Adam(lr=0.001)
model.compile(loss='binary_crossentropy', # We NEED binary here, since categorical_crossentropy l1 norms the output before calculating loss.
                  optimizer=opt,
                  metrics=['accuracy', fbeta])

In [16]:
epochs_arr = [20, 5, 5]
learn_rates = [0.001, 0.0001, 0.00001]
kfold_weights_path = os.path.join('', 'weights.h5')

for learn_rate, epochs in zip(learn_rates, epochs_arr):
    opt  = Adam(lr=learn_rate)
    model.compile(loss='binary_crossentropy', # We NEED binary here, since categorical_crossentropy l1 norms the output before calculating loss.
                  optimizer=opt,
                  metrics=['accuracy', fbeta])
    callbacks = [
        EarlyStopping(monitor='val_loss', patience=2, verbose=2),
        ModelCheckpoint(kfold_weights_path, monitor='val_loss', 
                        save_best_only=True, verbose=2)
    ]
    
    '''model.fit_generator(train_generator.flow(x_train, y_train, batch_size=128),
          steps_per_epoch=len(x_train) / 128,
          epochs=epochs,
          verbose=1,
          workers=3,
          validation_data=(x_valid, y_valid),
          callbacks=callbacks)'''

    model.fit(x = x_train, y= y_train, validation_data=(x_valid, y_valid),
      batch_size=128,verbose=1, epochs=epochs,callbacks=callbacks,shuffle=True)

Train on 161916 samples, validate on 40479 samples
Epoch 1/20

KeyboardInterrupt: 

In [None]:
#save!
model.save_weights('singlebest.h5')

In [8]:
model.load_weights('singlebest.h5')

In [12]:
from sklearn.metrics import fbeta_score, accuracy_score

In [22]:
kfold_weights_path = os.path.join('', 'weights.h5')
if os.path.isfile(kfold_weights_path):
    model.load_weights(kfold_weights_path)

In [13]:
p_valid = model.predict(x_valid, batch_size=128, verbose=1)



In [14]:
print(fbeta_score(y_valid, np.array(p_valid) > 0.2, beta=2, average='samples'))

0.926797243879


In [15]:
score = fbeta_score(y_valid, np.array(p_valid) > 0.2, beta=2, average=None)
print('F2 test scores per tag:')
[(inv_label_map[l], score[l]) for l in score.argsort()[::-1]]

F2 test scores per tag:


[('primary', 0.99100244136229076),
 ('clear', 0.98010050530676351),
 ('partly_cloudy', 0.94646066360880887),
 ('agriculture', 0.89213552999689161),
 ('cloudy', 0.89031078610603287),
 ('road', 0.85979248041024658),
 ('artisinal_mine', 0.82019848219497959),
 ('water', 0.80761800334168765),
 ('haze', 0.77914241090444403),
 ('habitation', 0.74570579167584239),
 ('cultivation', 0.65804597701149425),
 ('conventional_mine', 0.41111111111111115),
 ('selective_logging', 0.33992583436341162),
 ('bare_ground', 0.28212776534924189),
 ('blooming', 0.18088737201365188),
 ('blow_down', 0.1284796573875803),
 ('slash_burn', 0.0)]

**Observation**: the model perform well on several category with more data and totally missed several category such as slash_burn, etc. 

In [16]:
for i in range(17):
    print(inv_label_map[i], '\t:', accuracy_score(y_valid[:,i], p_valid[:,i]>0.2))

blow_down 	: 0.997356654068
bare_ground 	: 0.976234590775
conventional_mine 	: 0.997529583241
blooming 	: 0.99147706218
cultivation 	: 0.887596037452
artisinal_mine 	: 0.996244966526
haze 	: 0.95679241088
primary 	: 0.964920082018
slash_burn 	: 0.994466266459
habitation 	: 0.949257639764
clear 	: 0.948096543887
road 	: 0.918229205267
selective_logging 	: 0.990810049655
partly_cloudy 	: 0.968848044665
agriculture 	: 0.884508016502
water 	: 0.905630079794
cloudy 	: 0.97947083673


# 3. Make Prediction

Test data are preprocessed in the same way as training data, then the model predict the labels for each images and saved into submission.csv.

In [9]:
df_submission = pd.read_csv('../input/sample_submission_v2.csv')

def get_images(names):
    i = 0
    X = np.empty((names.shape[0], img_height, img_width, 3), dtype=np.float16)
    for f in tqdm(names.values, miniters=1000):
        img = cv2.imread('../input/test-jpg/{}.jpg'.format(f))
        if img_height != img.shape[0]:
            img = cv2.resize(img, (img_height, img_width))
        X[i,:,:,:] = np.array(img, np.float16)
        i += 1
    return X / 255.

pool = Pool(cpu_count())
X_submission = np.concatenate(pool.map(
    get_images, 
    np.array_split(df_submission['image_name'], cpu_count())
))
pool.close()
pool.join()
print(X_submission.shape)

100%|██████████| 15298/15298 [00:53<00:00, 284.98it/s]
100%|██████████| 15298/15298 [00:54<00:00, 283.17it/s]
100%|██████████| 15298/15298 [00:54<00:00, 283.05it/s]
100%|██████████| 15297/15297 [00:54<00:00, 282.93it/s]


(61191, 128, 128, 3)


In [10]:
predict = model.predict(X_submission, batch_size = 128, verbose=1)



In [11]:
result = pd.DataFrame(np.array(predict) > 0.2)
preds = []
sorted_tags = pd.Series(inv_label_map)

for i in tqdm(range(result.shape[0]), miniters=1000):
    preds.append(' '.join(list(
        sorted_tags[np.where(result.loc[i] == 1)[0]]
    )))

100%|██████████| 61191/61191 [00:54<00:00, 1127.14it/s]


In [12]:
df_submission['tags'] = preds
df_submission.to_csv('submission_single_deep_augment.csv', index=False)