In [14]:
import numpy as np
import pandas as pd
import json
import csv

import tensorflow as tf
import keras.backend as K

from os import listdir, rename, makedirs
from os.path import isfile, join, exists
from shutil import copyfile

from keras.applications import densenet, xception
from keras.backend.tensorflow_backend import set_session
from keras.models import Sequential, Model
from keras.callbacks import ModelCheckpoint, ReduceLROnPlateau, LearningRateScheduler, EarlyStopping
from keras.optimizers import SGD, Adam
from keras.layers import Dropout, Dense, Activation
from keras.layers.normalization import BatchNormalization
from keras import regularizers
from keras import utils
from keras.preprocessing import image

from sklearn.metrics import accuracy_score

from matplotlib import pyplot as plt

from demo_utils import plot_history, plot_lr
from model_utils import evaluate_model, get_checkpoint
from clr import CyclicLR

%matplotlib inline

np.random.seed(42)
tf.set_random_seed(42)

This setting allows Tensorflow to allocate GPU memory in runtime rather than at the session initialization. Remove this cell if you don't have GPU.

In [2]:
config = tf.ConfigProto()
config.gpu_options.allow_growth = True
set_session(tf.Session(config=config))

In [10]:
train_data_dir = 'train'
val_data_dir = 'validation'
test_data_dir = 'test'

img_height = 299
img_width = 299

batch_size = 16

nb_train_samples, nb_validation_samples, nb_test_samples = (37184 + 25600, 12800, 25600)

# Generate pseudolabeled dataset

In [4]:
labels = pd.read_csv('test_predictions.csv', index_col=0)
labels.head()

Unnamed: 0_level_0,predicted,predicted_tta,actual
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2593_1,1,1,1
3758_1,38,38,1
426_1,85,85,1
4465_1,1,1,1
6315_1,1,1,1


In [5]:
test_datagen = image.ImageDataGenerator(preprocessing_function=xception.preprocess_input)

validation_generator = test_datagen.flow_from_directory(
        test_data_dir,
        target_size=(img_height, img_width),
        batch_size=batch_size,
        class_mode='categorical',
        shuffle=False)

Found 25600 images belonging to 128 classes.


In [6]:
inverse_class_dictionary = {validation_generator.class_indices[k]:k for k in validation_generator.class_indices}

In [7]:
for i, file in enumerate(validation_generator.filenames):
    id = file.split('.')[0].split('/')[1]

    label = labels.loc[id, 'predicted_tta']
    label = inverse_class_dictionary[label - 1] # We have 1-based labels here, so we subtract 1.

    source_file = join(test_data_dir, file)

    target_name = f'{file.split(".")[0]}_psl.jpg'
    target_file = join(train_data_dir, target_name)

    copyfile(source_file, target_file)

# Load all needed functions and train model with pseudolabels

### Augmentations

In [4]:
import cv2

from albumentations import Compose, OneOf, HorizontalFlip, RandomBrightness, RandomContrast, ShiftScaleRotate, HueSaturationValue
from albumentations import MotionBlur, MedianBlur, Blur

def preprocess_input_hard(image):    
    augmentation = Compose([
        HorizontalFlip(p=0.5),
        ShiftScaleRotate(shift_limit=0.15, scale_limit=0.3, rotate_limit=15, border_mode=cv2.BORDER_REPLICATE, p=0.3),
        OneOf([
            RandomBrightness(p=0.33, limit=0.15),
            RandomContrast(p=0.33, limit=0.15),
            HueSaturationValue(hue_shift_limit=0.15, sat_shift_limit=0.15, val_shift_limit=0.15, p=0.33)
        ], p=0.3),
        OneOf([
            MotionBlur(p=0.33, blur_limit=4),
            MedianBlur(p=0.33, blur_limit=4),
            Blur(p=0.33, blur_limit=4)
        ], p=0.25),
    ], p=1.0)
    
    image_data = {'image': np.uint8(image)}
    aug_image = augmentation(**image_data)['image']
    
    return xception.preprocess_input(aug_image)

### Class weights (actual weights are different now because we changed class distribution)

In [9]:
from sklearn.utils.class_weight import compute_class_weight

train_datagen = image.ImageDataGenerator(preprocessing_function=preprocess_input_hard)

train_generator = train_datagen.flow_from_directory(
        train_data_dir,
        target_size=(img_height, img_width),
        batch_size=batch_size,
        class_mode='categorical',
        shuffle=True)

# Class weights
train_labels = utils.to_categorical(train_generator.classes)
y_integers = np.argmax(train_labels, axis=1)
class_weights = compute_class_weight('balanced', np.unique(y_integers), y_integers)
class_weights_dict = dict(enumerate(class_weights))

Found 62784 images belonging to 128 classes.


In [10]:
start_lr = 1e-5

# Final learning rate is decreased since we do not train network from the beginning.
end_lr = 1e-4
batch_size = 16
epochs = 100
steps_per_epoch = nb_train_samples // batch_size

base_model = xception.Xception(weights='imagenet',
                            include_top=False,
                            input_shape=(img_width, img_height, 3),
                            pooling='avg')

for layer in base_model.layers:
    layer.trainable = True

top_model = Sequential()    
top_model.add(Dropout(0.4, name='top_dropout', input_shape=base_model.output_shape[1:]))
top_model.add(Dense(128, activation='softmax', name='top_softmax'))

model = Model(inputs=base_model.input, outputs=top_model(base_model.output))
model.load_weights(get_checkpoint('weights-xception-5'))

model.compile(optimizer=SGD(),
              loss='categorical_crossentropy',
              metrics=['accuracy'])

train_datagen = image.ImageDataGenerator(preprocessing_function=preprocess_input_hard)
test_datagen = image.ImageDataGenerator(preprocessing_function=xception.preprocess_input)

train_generator = train_datagen.flow_from_directory(
        train_data_dir,
        target_size=(img_height, img_width),
        batch_size=batch_size,
        class_mode='categorical',
        shuffle=True)

validation_generator = test_datagen.flow_from_directory(
        val_data_dir,
        target_size=(img_height, img_width),
        batch_size=batch_size,
        class_mode='categorical',
        shuffle=False)

clr = CyclicLR(base_lr=start_lr,
               max_lr=end_lr,
               step_size=2*steps_per_epoch,
               scale_mode='triangular2')

checkpointer = ModelCheckpoint(filepath=get_checkpoint('weights-xception-6'), verbose=1, monitor='val_acc', save_best_only=True)
early_stopping = EarlyStopping(patience=5, verbose=1, monitor='val_acc', mode='max')

history_6 = model.fit_generator(train_generator,
                            steps_per_epoch=nb_train_samples // batch_size,
                            epochs=epochs,
                            class_weight=class_weights_dict,
                            callbacks=[checkpointer, early_stopping, clr],
                            validation_data=validation_generator,
                            validation_steps=nb_validation_samples // batch_size)

Found 62784 images belonging to 128 classes.
Found 12800 images belonging to 128 classes.
Epoch 1/100

Epoch 00001: val_acc improved from -inf to 0.79039, saving model to weights-xception-6.hdf5
Epoch 2/100

Epoch 00002: val_acc improved from 0.79039 to 0.79148, saving model to weights-xception-6.hdf5
Epoch 3/100

Epoch 00003: val_acc did not improve from 0.79148
Epoch 4/100

Epoch 00004: val_acc improved from 0.79148 to 0.79187, saving model to weights-xception-6.hdf5
Epoch 5/100

Epoch 00005: val_acc improved from 0.79187 to 0.79305, saving model to weights-xception-6.hdf5
Epoch 6/100

Epoch 00006: val_acc did not improve from 0.79305
Epoch 7/100

Epoch 00007: val_acc did not improve from 0.79305
Epoch 8/100

Epoch 00008: val_acc did not improve from 0.79305
Epoch 9/100

Epoch 00009: val_acc did not improve from 0.79305
Epoch 10/100

Epoch 00010: val_acc did not improve from 0.79305
Epoch 00010: early stopping


# Evaluate results

In [6]:
model.load_weights(get_checkpoint('weights-xception-6'))

In [7]:
def preprocess_input_soft(image):    
    augmentation = Compose([
        HorizontalFlip(p=0.5),
        ShiftScaleRotate(shift_limit=0.10, scale_limit=0.10, rotate_limit=10, border_mode=cv2.BORDER_REPLICATE, p=0.25),
        OneOf([
            RandomBrightness(p=0.33, limit=0.1),
            RandomContrast(p=0.33, limit=0.1), 
            HueSaturationValue(hue_shift_limit=0.1, sat_shift_limit=0.1, val_shift_limit=0.1, p=0.33),
        ], p=0.25),
    ], p=1.0)
    
    image_data = {'image': np.uint8(image)}
    aug_image = augmentation(**image_data)['image']
    
    return xception.preprocess_input(aug_image)

In [11]:
probs = evaluate_model(model, 9, val_data_dir, preprocess_input_soft)

validation_generator = test_datagen.flow_from_directory(
        val_data_dir,
        target_size=(img_height, img_width),
        batch_size=batch_size,
        class_mode='categorical',
        shuffle=False)

results = pd.DataFrame(columns=['id', 'predicted', 'predicted_tta', 'actual'])

predictions = np.argmax(probs[0], axis=1) + 1
predictions_tta = np.argmax(np.mean(probs, axis=0), axis=1) + 1

for i, file in enumerate(validation_generator.filenames):
    id = file.split('_')[0].split('/')[1]
    predicted = predictions[i]
    predicted_tta = predictions_tta[i]
    results.loc[i] = [id, predicted, predicted_tta, validation_generator.classes[i] + 1]
    
results['actual'] = results['actual'].astype(np.int32)
results['predicted'] = results['predicted'].astype(np.int32)
results['predicted_tta'] = results['predicted_tta'].astype(np.int32)
    
accuracy = accuracy_score(results['actual'], results['predicted'])
accuracy_tta = accuracy_score(results['actual'], results['predicted_tta'])

print(f'Accuracy: {accuracy}. Accuracy with TTA: {accuracy_tta}')

Found 12800 images belonging to 128 classes.


Unnamed: 0,id,predicted,predicted_tta,actual
0,2308,1,1,1
1,3990,1,1,1
2,4122,114,1,1
3,89810,125,1,1
4,89839,1,1,1


In [20]:
accuracy = accuracy_score(results['actual'], results['predicted'])
accuracy_tta = accuracy_score(results['actual'], results['predicted_tta'])

print('Old accuracy with TTA: 0.7928125')
print(f'Accuracy: {accuracy}. Accuracy with TTA: {accuracy_tta}')

Old accuracy with TTA: 0.7928125
Accuracy: 0.793046875. Accuracy with TTA: 0.79734375


In [18]:
probs = evaluate_model(model, 9, test_data_dir, preprocess_input_soft)

validation_generator = test_datagen.flow_from_directory(
        test_data_dir,
        target_size=(img_height, img_width),
        batch_size=batch_size,
        class_mode='categorical',
        shuffle=False)

results = pd.DataFrame(columns=['id', 'predicted', 'predicted_tta', 'actual'])

predictions = np.argmax(probs[0], axis=1) + 1
predictions_tta = np.argmax(np.mean(probs, axis=0), axis=1) + 1

for i, file in enumerate(validation_generator.filenames):
    id = file.split('_')[0].split('/')[1]
    predicted = predictions[i]
    predicted_tta = predictions_tta[i]
    results.loc[i] = [id, predicted, predicted_tta, validation_generator.classes[i] + 1]
    
results['actual'] = results['actual'].astype(np.int32)
results['predicted'] = results['predicted'].astype(np.int32)
results['predicted_tta'] = results['predicted_tta'].astype(np.int32)
    
results.head()

Found 25600 images belonging to 128 classes.


Unnamed: 0,id,predicted,predicted_tta,actual
0,2593,1,1,1
1,3758,38,38,1
2,426,85,85,1
3,4465,1,1,1
4,6315,1,1,1


In [21]:
accuracy = accuracy_score(results['actual'], results['predicted'])
accuracy_tta = accuracy_score(results['actual'], results['predicted_tta'])

print('Old accuracy with TTA: 0.7928125')
print(f'Accuracy: {accuracy}. Accuracy with TTA: {accuracy_tta}')

Old accuracy with TTA: 0.80015625
Accuracy: 0.803125. Accuracy with TTA: 0.80703125
