In [1]:
# https://www.kaggle.com/mathormad/inceptionv3-baseline-lb-0-379/code
# fork of scratch8, 29

In [2]:
%matplotlib inline
import os, sys
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import skimage.io
from skimage.transform import resize
from imgaug import augmenters as iaa
from tqdm import tqdm
import PIL
from PIL import Image
import cv2
from sklearn.utils import class_weight, shuffle
import keras_metrics

import warnings
warnings.filterwarnings("ignore")
SIZE = 512

Using TensorFlow backend.


In [3]:
# https://www.kaggle.com/rejpalcz/best-loss-function-for-f1-score-metric/notebook
import tensorflow as tf

def f1(y_true, y_pred):
    y_pred = K.round(y_pred)
    tp = K.sum(K.cast(y_true*y_pred, 'float'), axis=0)
    tn = K.sum(K.cast((1-y_true)*(1-y_pred), 'float'), axis=0)
    fp = K.sum(K.cast((1-y_true)*y_pred, 'float'), axis=0)
    fn = K.sum(K.cast(y_true*(1-y_pred), 'float'), axis=0)

    p = tp / (tp + fp + K.epsilon())
    r = tp / (tp + fn + K.epsilon())

    f1 = 2*p*r / (p+r+K.epsilon())
    f1 = tf.where(tf.is_nan(f1), tf.zeros_like(f1), f1)
    return K.mean(f1)

def f1_loss(y_true, y_pred):
    
    tp = K.sum(K.cast(y_true*y_pred, 'float'), axis=0)
    tn = K.sum(K.cast((1-y_true)*(1-y_pred), 'float'), axis=0)
    fp = K.sum(K.cast((1-y_true)*y_pred, 'float'), axis=0)
    fn = K.sum(K.cast(y_true*(1-y_pred), 'float'), axis=0)

    p = tp / (tp + fp + K.epsilon())
    r = tp / (tp + fn + K.epsilon())

    f1 = 2*p*r / (p+r+K.epsilon())
    f1 = tf.where(tf.is_nan(f1), tf.zeros_like(f1), f1)
    return K.mean(K.binary_crossentropy(y_true, y_pred), axis=-1) + (1 - K.mean(f1))

In [4]:
# Load dataset info
path_to_train = '../data/train/'
data = pd.read_csv('../data/train.csv')
# path_to_external_data = '../data/external_data/external_data_1/'
# edata = pd.read_csv('../data/external_data.csv')

In [5]:
data.head()

Unnamed: 0,Id,Target
0,00070df0-bbc3-11e8-b2bc-ac1f6b6435d0,16 0
1,000a6c98-bb9b-11e8-b2b9-ac1f6b6435d0,7 1 2 0
2,000a9596-bbc4-11e8-b2bc-ac1f6b6435d0,5
3,000c99ba-bba4-11e8-b2b9-ac1f6b6435d0,1
4,001838f8-bbca-11e8-b2bc-ac1f6b6435d0,18


In [6]:
train_dataset_info = []

for name, labels in zip(data['Id'], data['Target'].str.split(' ')):
    train_dataset_info.append({
        'path':os.path.join(path_to_train, name),
        'labels':np.array([int(label) for label in labels])})
    
# for name, labels in zip(edata['id'], edata['labels'].str.strip('[]')):
#     labels = labels.split(',')
#     train_dataset_info.append({
#         'path':os.path.join(path_to_external_data, name),
#         'labels':np.array([int(label) for label in labels])})
    
train_dataset_info = np.array(train_dataset_info)

In [7]:
train_dataset_info

array([{'path': '../data/train/00070df0-bbc3-11e8-b2bc-ac1f6b6435d0', 'labels': array([16,  0])},
       {'path': '../data/train/000a6c98-bb9b-11e8-b2b9-ac1f6b6435d0', 'labels': array([7, 1, 2, 0])},
       {'path': '../data/train/000a9596-bbc4-11e8-b2bc-ac1f6b6435d0', 'labels': array([5])},
       ...,
       {'path': '../data/train/fff189d8-bbab-11e8-b2ba-ac1f6b6435d0', 'labels': array([7])},
       {'path': '../data/train/fffdf7e0-bbc4-11e8-b2bc-ac1f6b6435d0', 'labels': array([25,  2, 21])},
       {'path': '../data/train/fffe0ffe-bbc0-11e8-b2bb-ac1f6b6435d0', 'labels': array([2, 0])}],
      dtype=object)

In [8]:
class data_generator:
    
    def create_train(dataset_info, batch_size, shape, augument=True):
        assert shape[2] == 3
        while True:
            dataset_info = shuffle(dataset_info)
            for start in range(0, len(dataset_info), batch_size):
                end = min(start + batch_size, len(dataset_info))
                batch_images = []
                X_train_batch = dataset_info[start:end]
                batch_labels = np.zeros((len(X_train_batch), 28))
                for i in range(len(X_train_batch)):
                    image = data_generator.load_image(
                        X_train_batch[i]['path'], shape)
#                     image = tdi[i+start]
#                     image = cv2.resize(image, (shape[0], shape[1]))
                    if augument:
                        image = data_generator.augment(image)
                    batch_images.append(image/255.)
                    batch_labels[i][X_train_batch[i]['labels']] = 1
                yield np.array(batch_images, np.float32), batch_labels

    def load_image(path, shape):
        image_red_ch = Image.open(path+'_red.png')
        image_yellow_ch = Image.open(path+'_yellow.png')
        image_green_ch = Image.open(path+'_green.png')
        image_blue_ch = Image.open(path+'_blue.png')
        image1 = np.stack((
            np.array(image_red_ch),
            np.array(image_green_ch), 
            np.array(image_blue_ch)), -1)
        w, h = 512, 512
#         zero_data = np.zeros((h, w), dtype=np.uint8)
#         image2 = np.stack((
#             np.array(image_red_ch),
#             np.array(image_green_ch), 
#             np.array(image_yellow_ch)), -1)
#         image3 = np.stack((
#             np.array(image_yellow_ch),
#             np.array(image_green_ch), 
#             np.array(image_blue_ch)), -1)
# #         print(image1.shape, image2.shape)
#         image = np.vstack((image1, image2, image3))
#         print(image.shape)
        image =image1
#         image = canny_image4(image1)
        image = cv2.resize(image, (shape[0], shape[1]))
        return image
    
    def load_image2(path, shape):
        image_red_ch = Image.open(path+'_red.png')
        image_yellow_ch = Image.open(path+'_yellow.png')
        image_green_ch = Image.open(path+'_green.png')
        image_blue_ch = Image.open(path+'_blue.png')
        image1 = np.stack((
            np.array(image_red_ch),
            np.array(image_green_ch), 
            np.array(image_blue_ch)), -1)
        w, h = 512, 512
#         zero_data = np.zeros((h, w), dtype=np.uint8)
#         image2 = np.stack((
#             np.array(image_red_ch),
#             np.array(image_green_ch), 
#             np.array(image_yellow_ch)), -1)
#         image3 = np.stack((
#             np.array(image_yellow_ch),
#             np.array(image_green_ch), 
#             np.array(image_blue_ch)), -1)
# #         print(image1.shape, image2.shape)
#         image = np.vstack((image1, image2, image3))
#         print(image.shape)
        image =image1
#         image = canny_image4(image1)
        image = cv2.resize(image, (shape[0], shape[1]))
        return image
    
    def augment(image):
        augment_img = iaa.Sequential([
            iaa.OneOf([
                iaa.Affine(rotate=0),
                iaa.Affine(rotate=90),
                iaa.Affine(rotate=180),
                iaa.Affine(rotate=270),
                iaa.Fliplr(0.5),
                iaa.Flipud(0.5),
            ])], random_order=True)

        image_aug = augment_img.augment_image(image)
        return image_aug
    def augment2(image):
        augment_img = iaa.Sequential([
            iaa.OneOf([
                    iaa.Fliplr(0.5), # horizontal flips
                    iaa.Affine(rotate=0),
                    iaa.Affine(rotate=90),
                    iaa.Affine(rotate=180),
                    iaa.Affine(rotate=270),
                    iaa.Flipud(0.5),
                    iaa.Crop(percent=(0, 0.1)), # random crops
                    # Small gaussian blur with random sigma between 0 and 0.5.
                    # But we only blur about 50% of all images.
                    iaa.Sometimes(0.5,
                        iaa.GaussianBlur(sigma=(0, 0.5))
                    ),
                    # Strengthen or weaken the contrast in each image.
                    iaa.ContrastNormalization((0.75, 1.5)),
                    # Add gaussian noise.
                    # For 50% of all images, we sample the noise once per pixel.
                    # For the other 50% of all images, we sample the noise per pixel AND
                    # channel. This can change the color (not only brightness) of the
                    # pixels.
                    iaa.AdditiveGaussianNoise(loc=0, scale=(0.0, 0.05*255), per_channel=0.5),
                    # Make some images brighter and some darker.
                    # In 20% of all cases, we sample the multiplier once per channel,
                    # which can end up changing the color of the images.
                    iaa.Multiply((0.8, 1.2), per_channel=0.2),
                    # Apply affine transformations to each image.
                    # Scale/zoom them, translate/move them, rotate them and shear them.
                    iaa.Affine(
                        scale={"x": (0.8, 1.2), "y": (0.8, 1.2)},
                        translate_percent={"x": (-0.2, 0.2), "y": (-0.2, 0.2)},
                        rotate=(-180, 180),
                        shear=(-8, 8)
                    )
                ])], random_order=True)

        image_aug = augment_img.augment_image(image)
        return image_aug


In [9]:
from keras.preprocessing.image import ImageDataGenerator
from keras.models import Sequential, load_model
from keras.layers import Activation, Dropout, Flatten, Dense, Lambda
from keras.layers import GlobalMaxPooling2D, GlobalAveragePooling2D, BatchNormalization, Input, Conv2D
from keras.applications.inception_v3 import InceptionV3
from keras.applications.resnet50 import ResNet50
from keras.applications.inception_resnet_v2 import InceptionResNetV2
from keras.callbacks import ModelCheckpoint
from keras import metrics
from keras.optimizers import Adam 
from keras import backend as K
import keras
from keras.models import Model

In [10]:
def create_model(input_shape, n_out):
    input_tensor = Input(shape=input_shape)
    base_model = ResNet50(include_top=False,
                   weights='imagenet',
                   input_shape=input_shape)
    bn = BatchNormalization(name='bn1')(input_tensor)
    x = base_model(bn)
    x = Conv2D(8, kernel_size=(1,1), activation='relu', name='cam_conv1')(x)
    x = GlobalAveragePooling2D(name='cam')(x)
    output = Dense(n_out, activation='sigmoid')(x)
    model = Model(input_tensor, output)
    
    return model

In [11]:
# warm up model
model = create_model(
    input_shape=(SIZE,SIZE,3), 
    n_out=28)
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         (None, 512, 512, 3)       0         
_________________________________________________________________
bn1 (BatchNormalization)     (None, 512, 512, 3)       12        
_________________________________________________________________
resnet50 (Model)             (None, 16, 16, 2048)      23587712  
_________________________________________________________________
cam_conv1 (Conv2D)           (None, 16, 16, 8)         16392     
_________________________________________________________________
cam (GlobalAveragePooling2D) (None, 8)                 0         
_________________________________________________________________
dense_1 (Dense)              (None, 28)                252       
Total params: 23,604,368
Trainable params: 23,551,242
Non-trainable params: 53,126
___________________________________________________________

In [12]:
# model.compile(loss=f1_loss,
#             optimizer=Adam(lr=1e-4),
#             metrics=[f1])
# model.load_weights('../cache/IV3-34-maximus.h5')

In [13]:
# create callbacks list
from keras.callbacks import ModelCheckpoint, LearningRateScheduler, EarlyStopping, ReduceLROnPlateau
from sklearn.model_selection import train_test_split

epochs = 10; batch_size = 16
checkpoint = ModelCheckpoint('../cache/R50-54-maximus.h5', monitor='val_loss', verbose=1, 
                             save_best_only=True, mode='min', save_weights_only = True)
reduceLROnPlat = ReduceLROnPlateau(monitor='val_loss', factor=0.1, patience=3, 
                                   verbose=1, mode='auto', epsilon=0.0001)
early = EarlyStopping(monitor="val_loss", 
                      mode="min", 
                      patience=6)
callbacks_list = [checkpoint, early, reduceLROnPlat]


In [14]:

# split data into train, valid
indexes = np.arange(train_dataset_info.shape[0])
np.random.shuffle(indexes)
train_indexes, valid_indexes = train_test_split(indexes, test_size=0.15, random_state=8)

# create train and valid datagens
train_generator = data_generator.create_train(
    train_dataset_info[train_indexes], batch_size, (SIZE,SIZE,3), augument=True)
validation_generator = data_generator.create_train(
    train_dataset_info[valid_indexes], 32, (SIZE,SIZE,3), augument=True)


In [15]:
# warm up model
# model = create_model(
#     input_shape=(SIZE,SIZE,3), 
#     n_out=28)
# model.summary()

In [16]:


for layer in model.layers:
    layer.trainable = False
model.layers[-1].trainable = True
model.layers[-2].trainable = True
model.layers[-3].trainable = True
# model.layers[-4].trainable = True
# model.layers[-5].trainable = True
# model.layers[-6].trainable = True

In [17]:
# labels = np.zeros((28))
# labels[0] = 1
model.compile(
    loss=f1_loss, 
    optimizer=Adam(1e-03),
    metrics=[f1])
# model.summary()

In [18]:
model.fit_generator(
    train_generator,
    steps_per_epoch=np.ceil(float(len(train_indexes)) / float(batch_size)),
    validation_data=validation_generator,
    validation_steps=np.ceil(float(len(valid_indexes)) / float(batch_size)),
    epochs=2, 
    verbose=1)

Epoch 1/2
Epoch 2/2


<keras.callbacks.History at 0x7f192b08e748>

In [20]:
# train all layers
epochs=120
batch_size=8
# create train and valid datagens
train_generator = data_generator.create_train(
    train_dataset_info[train_indexes], batch_size, (SIZE,SIZE,3), augument=True)
validation_generator = data_generator.create_train(
    train_dataset_info[valid_indexes], 16, (SIZE,SIZE,3), augument=True)
for layer in model.layers:
    layer.trainable = True
model.compile(loss=f1_loss,
            optimizer=Adam(lr=1e-4),
            metrics=[f1])
model.fit_generator(
    train_generator,
    steps_per_epoch=np.ceil(float(len(train_indexes)) / float(batch_size)),
    validation_data=validation_generator,
    validation_steps=np.ceil(float(len(valid_indexes)) / float(batch_size)),
    epochs=epochs, 
    verbose=1,
    callbacks=callbacks_list)

Epoch 1/120

Epoch 00001: val_loss improved from inf to 0.97928, saving model to ../cache/R50-54-maximus.h5
Epoch 2/120

Epoch 00002: val_loss improved from 0.97928 to 0.96671, saving model to ../cache/R50-54-maximus.h5
Epoch 3/120

Epoch 00003: val_loss improved from 0.96671 to 0.93363, saving model to ../cache/R50-54-maximus.h5
Epoch 4/120

Epoch 00004: val_loss improved from 0.93363 to 0.91434, saving model to ../cache/R50-54-maximus.h5
Epoch 5/120

Epoch 00005: val_loss did not improve from 0.91434
Epoch 6/120

Epoch 00006: val_loss improved from 0.91434 to 0.90441, saving model to ../cache/R50-54-maximus.h5
Epoch 7/120

Epoch 00007: val_loss did not improve from 0.90441
Epoch 8/120

Epoch 00008: val_loss did not improve from 0.90441
Epoch 9/120

Epoch 00009: val_loss improved from 0.90441 to 0.90077, saving model to ../cache/R50-54-maximus.h5
Epoch 10/120

Epoch 00010: val_loss improved from 0.90077 to 0.89552, saving model to ../cache/R50-54-maximus.h5
Epoch 11/120

Epoch 00011: 

<keras.callbacks.History at 0x7f19076b7438>

In [None]:
image = load_image(train_dataset_info[1]['path'])
preprocessed_input = image[np.newaxis]

In [None]:
score_predict = model.predict(preprocessed_input)[0]
# draw_predict.append(score_predict)
label_predict = np.arange(28)[score_predict>=0.2]

In [None]:
label_predict[0]

In [None]:
# get the symbolic outputs of each "key" layer (we gave them unique names).
layer_dict = dict([(layer.name, layer) for layer in model.layers[1:]])

In [None]:
layer_dict

In [None]:
def normalize2(x):
    # utility function to normalize a tensor by its L2 norm
    return x / (K.sqrt(K.mean(K.square(x))) + K.epsilon())


In [None]:
input_img = model.input

In [None]:
import time
from tqdm import tqdm_notebook
kept_filters = []
for filter_index in tqdm_notebook(range(8)):
    # we only scan through the first 200 filters,
    # but there are actually 512 of them
#     print('Processing filter %d' % filter_index)
    start_time = time.time()

    # we build a loss function that maximizes the activation
    # of the nth filter of the layer considered
    layer_output = layer_dict[layer_name].output
    if K.image_data_format() == 'channels_first':
        loss = K.mean(layer_output[:, filter_index, :, :])
    else:
        loss = K.mean(layer_output[:, filter_index])

    # we compute the gradient of the input picture wrt this loss
    grads = K.gradients(loss, input_img)[0]

    # normalization trick: we normalize the gradient
    grads = normalize2(grads)

    # this function returns the loss and grads given the input picture
    iterate = K.function([input_img], [loss, grads])

    # step size for gradient ascent
    step = 1.

    # we start from a gray image with some random noise
#     if K.image_data_format() == 'channels_first':
#         input_img_data = np.random.random((1, 3, img_width, img_height))
#     else:
#         input_img_data = np.random.random((1, img_width, img_height, 3))
    
    input_img_data = (input_img_data - 0.5) * 20 + 128

    # we run gradient ascent for 20 steps
    for i in range(100):
        loss_value, grads_value = iterate([input_img_data])
        input_img_data += grads_value * step

#         print('Current loss value:', loss_value)
        if loss_value <= 0.:
            # some filters get stuck to 0, we can skip them
            break

    # decode the resulting input image
    if loss_value > 0:
        img = deprocess_image(input_img_data[0])
        kept_filters.append((img, loss_value))
    end_time = time.time()
#     print('Filter %d processed in %ds' % (filter_index, end_time - start_time))


In [None]:
# we will stich the best 64 filters on a 8 x 8 grid.
n = 3
img_width = 512
img_height = 512

# the filters that have the highest loss are assumed to be better-looking.
# we will only keep the top 64 filters.
kept_filters.sort(key=lambda x: x[1], reverse=True)
kept_filters = kept_filters[:n * n]

# build a black picture with enough space for
# our 8 x 8 filters of size 128 x 128, with a 5px margin in between
margin = 5
width = n * img_width + (n - 1) * margin
height = n * img_height + (n - 1) * margin
stitched_filters = np.zeros((width, height, 3))

# fill the picture with our saved filters
for i in range(n):
    for j in range(n):
#         print(i*n+j)
        img, loss = kept_filters[i * n + j]
        width_margin = (img_width + margin) * i
        height_margin = (img_height + margin) * j
        stitched_filters[
            width_margin: width_margin + img_width,
            height_margin: height_margin + img_height, :] = img
%matplotlib inline

import matplotlib.pyplot as plt
plt.figure(figsize=(20,10))
plt.imshow(stitched_filters)

In [None]:
n = 3
image = np.zeros((512,512,3))
for i in range(n):
    for j in range(n):
        img, loss = kept_filters[i * n + j]
        image = image + img
img = img/8
plt.imshow(img)

In [None]:
import time
from tqdm import tqdm_notebook
single_label = []
# kept_filters = []
kept_filters = np.memmap('../cache/train_cam', dtype='float32', mode='w+', shape=(31072, 8, 512,512,3))
filters_loss = np.memmap('../cache/train_loss', dtype='float32', mode='w+', shape=(31072, 8))
for filter_index in tqdm_notebook(range(8)):
    # we only scan through the first 200 filters,
    # but there are actually 512 of them
#     print('Processing filter %d' % filter_index)
#     start_time = time.time()

    # we build a loss function that maximizes the activation
    # of the nth filter of the layer considered
    layer_output = layer_dict[layer_name].output
    if K.image_data_format() == 'channels_first':
        loss = K.mean(layer_output[:, filter_index, :, :])
    else:
        loss = K.mean(layer_output[:, filter_index])

    # we compute the gradient of the input picture wrt this loss
    grads = K.gradients(loss, input_img)[0]

    # normalization trick: we normalize the gradient
    grads = normalize2(grads)

    # this function returns the loss and grads given the input picture
    iterate = K.function([input_img], [loss, grads])

    # step size for gradient ascent
    step = 1.

    # we start from a gray image with some random noise
#     if K.image_data_format() == 'channels_first':
#         input_img_data = np.random.random((1, 3, img_width, img_height))
#     else:
#         input_img_data = np.random.random((1, img_width, img_height, 3))
    for ii in range(len(train_dataset_info)):
        path = train_dataset_info[ii]['path']
        labels = train_dataset_info[ii]['labels']
#         if len(labels) > 1:
#             continue
#         single_label.append(ii)
        
        image = load_image(path)
        input_img_data = image[np.newaxis]
        input_img_data = (input_img_data - 0.5) * 20 + 128

        # we run gradient ascent for 20 steps
        for _ in range(50):
            loss_value, grads_value = iterate([input_img_data])
            input_img_data += grads_value * step

#         print('Current loss value:', loss_value)
            if loss_value <= 0.:
                # some filters get stuck to 0, we can skip them
                break

        # decode the resulting input image
        img = np.zeros((512,512,3))
        kept_filters[ii][filter_index] = img
        filters_loss[ii][filter_index] = 0.0
        if loss_value > 0:
            img = deprocess_image(input_img_data[0])
#             kept_filters.append(img)
            kept_filters[ii][filter_index] = img
            filters_loss[ii][filter_index] = loss_value
#     end_time = time.time() 
#     print('Filter %d processed in %ds' % (filter_index, end_time - start_time))


In [None]:
# Create submit
submit = pd.read_csv('../data/sample_submission.csv')
predicted = []
draw_predict = []
# model = create_model(
#     input_shape=(SIZE,SIZE,3), 
#     n_out=28)
# for layer in model.layers:
#     layer.trainable = True
# model.compile(loss=f1_loss,
#             optimizer=Adam(lr=1e-4),
#             metrics=[f1])
model.load_weights('../cache/R50-54-maximus.h5')
for name in tqdm(submit['Id']):
    path = os.path.join('../data/test/', name)
    image = data_generator.load_image(path, (SIZE,SIZE,3))/255.
    score_predict = model.predict(image[np.newaxis])[0]
    draw_predict.append(score_predict)
    label_predict = np.arange(28)[score_predict>=0.5]
    str_predict_label = ' '.join(str(l) for l in label_predict)
    predicted.append(str_predict_label)

submit['Predicted'] = predicted
# np.save('../cache/draw_predict_InceptionV3-30.npy', score_predict)
# submit.to_csv('../submissions/submit_InceptionV3.csv', index=False)

  2%|▏         | 279/11702 [00:16<12:04, 15.76it/s]

In [22]:
predicted = []
for line in tqdm(draw_predict):
    label_predict = np.arange(28)[line>=0.5]
    str_predict_label = ' '.join(str(l) for l in label_predict)
    predicted.append(str_predict_label)

100%|██████████| 11702/11702 [00:00<00:00, 90685.89it/s]


In [23]:
submit.to_csv('../submissions/sub54-max-a.csv', index=False)

In [24]:
#https://stackoverflow.com/questions/1855095/how-to-create-a-zip-archive-of-a-directory
def backup_project_as_zip(project_dir, zip_file):
    assert(os.path.isdir(project_dir))
    assert(os.path.isdir(os.path.dirname(zip_file)))
    shutil.make_archive(zip_file.replace('.zip',''), 'zip', project_dir)
    pass

In [25]:
import datetime, shutil
now = datetime.datetime.now()
print(now)
PROJECT_PATH = '/home/watts/lal/Kaggle/kagglehp/scripts_nbs'
backup_project_as_zip(PROJECT_PATH, '../cache/code.scripts_nbs.%s.zip'%now)

2018-12-12 12:25:06.153704


In [26]:
%%time
!kaggle competitions submit -c human-protein-atlas-image-classification -f ../submissions/sub48-max-a.csv -m ""

100%|████████████████████████████████████████| 479k/479k [00:13<00:00, 36.8kB/s]
Successfully submitted to Human Protein Atlas Image ClassificationCPU times: user 410 ms, sys: 162 ms, total: 572 ms
Wall time: 17.5 s


In [27]:
from time import sleep
sleep(10)
!kaggle competitions submissions -c human-protein-atlas-image-classification

fileName          date                 description  status    publicScore  privateScore  
----------------  -------------------  -----------  --------  -----------  ------------  
sub48-max-a.csv   2018-12-12 06:55:45               complete  0.457        None          
sub44-max-t.csv   2018-12-10 20:51:55               complete  0.019        None          
sub41-t-l.csv     2018-12-08 09:27:37               complete  0.541        None          
sub41-max-b.csv   2018-12-08 08:04:32               complete  0.478        None          
sub41-max-a.csv   2018-12-08 08:03:03               complete  0.479        None          
sub41-c.csv       2018-12-07 01:29:14               complete  0.493        None          
sub41-f.csv       2018-12-07 01:28:39               complete  0.481        None          
sub41-t.csv       2018-12-07 01:27:50               complete  0.498        None          
sub41-v.csv       2018-12-07 01:27:04               complete  0.484        None          

In [28]:
predicted = []
for line in tqdm(draw_predict):
    label_predict = np.arange(28)[line>=0.35]
    str_predict_label = ' '.join(str(l) for l in label_predict)
    predicted.append(str_predict_label)
submit['Predicted'] = predicted
submit.to_csv('../submissions/sub48-max-b.csv', index=False)

100%|██████████| 11702/11702 [00:00<00:00, 86817.64it/s]


In [29]:
%%time
!kaggle competitions submit -c human-protein-atlas-image-classification -f ../submissions/sub48-max-b.csv -m ""

from time import sleep
sleep(10)
!kaggle competitions submissions -c human-protein-atlas-image-classification

100%|████████████████████████████████████████| 470k/470k [00:14<00:00, 33.7kB/s]
Successfully submitted to Human Protein Atlas Image ClassificationfileName          date                 description  status    publicScore  privateScore  
----------------  -------------------  -----------  --------  -----------  ------------  
sub48-max-b.csv   2018-12-12 06:57:44               complete  0.449        None          
sub48-max-a.csv   2018-12-12 06:55:45               complete  0.457        None          
sub44-max-t.csv   2018-12-10 20:51:55               complete  0.019        None          
sub41-t-l.csv     2018-12-08 09:27:37               complete  0.541        None          
sub41-max-b.csv   2018-12-08 08:04:32               complete  0.478        None          
sub41-max-a.csv   2018-12-08 08:03:03               complete  0.479        None          
sub41-c.csv       2018-12-07 01:29:14               complete  0.493        None          
sub41-f.csv       2018-12-07 01:28:39      

In [15]:
# Create submit

submit = pd.read_csv('../data/sample_submission.csv')
predicted = []
draw_predict = []
model = create_model(
    input_shape=(SIZE,SIZE,3), 
    n_out=28)
for layer in model.layers:
    layer.trainable = True
model.compile(loss=f1_loss,
            optimizer=Adam(lr=1e-4),
            metrics=[f1])
model.load_weights('../cache/IV3-34-maximus.h5')
for name in tqdm(submit['Id']):
    path = os.path.join('../data/test/', name)
    image = data_generator.load_image(path, (SIZE,SIZE,3))/255.
    score_predict = model.predict(image[np.newaxis])[0]
    draw_predict.append(score_predict)
    label_predict = np.arange(28)[score_predict>=0.35]
    str_predict_label = ' '.join(str(l) for l in label_predict)
    predicted.append(str_predict_label)

submit['Predicted'] = predicted

100%|██████████| 11702/11702 [08:21<00:00, 23.34it/s]


In [16]:
submit.to_csv('../submissions/sub34a-max.csv', index=False)

In [17]:
%%time
!kaggle competitions submit -c human-protein-atlas-image-classification -f ../submissions/sub34a-max.csv -m ""

from time import sleep
sleep(60)
!kaggle competitions submissions -c human-protein-atlas-image-classification

100%|████████████████████████████████████████| 472k/472k [00:12<00:00, 37.5kB/s]
Successfully submitted to Human Protein Atlas Image ClassificationfileName        date                 description  status    publicScore  privateScore  
--------------  -------------------  -----------  --------  -----------  ------------  
sub34a-max.csv  2018-11-24 18:50:22               complete  0.469        None          
sub34-max.csv   2018-11-24 17:27:36               complete  0.473        None          
sub33-h.csv     2018-11-24 06:48:41               complete  0.464        None          
sub33-g.csv     2018-11-24 06:46:19               complete  0.472        None          
sub33-c.csv     2018-11-23 11:48:41               complete  0.493        None          
sub33-bb.csv    2018-11-23 11:47:32               complete  0.493        None          
sub33-b.csv     2018-11-23 11:46:26               complete  0.498        None          
sub33-a.csv     2018-11-23 11:45:09               complete  0

In [18]:
# Create submit

submit = pd.read_csv('../data/sample_submission.csv')
predicted = []
draw_predict = []
model = create_model(
    input_shape=(SIZE,SIZE,3), 
    n_out=28)
for layer in model.layers:
    layer.trainable = True
model.compile(loss=f1_loss,
            optimizer=Adam(lr=1e-4),
            metrics=[f1])
model.load_weights('../cache/IV3-34-maximus.h5')
for name in tqdm(submit['Id']):
    path = os.path.join('../data/test/', name)
    image = data_generator.load_image(path, (SIZE,SIZE,3))
    image = data_generator.augment(image)
    image = image/255.
    score_predict = model.predict(image[np.newaxis])[0]
    draw_predict.append(score_predict)
    label_predict = np.arange(28)[score_predict>=0.25]
    str_predict_label = ' '.join(str(l) for l in label_predict)
    predicted.append(str_predict_label)

submit['Predicted'] = predicted

submit.to_csv('../submissions/sub34b-max.csv', index=False)


100%|██████████| 11702/11702 [09:22<00:00, 20.79it/s]


In [19]:
%%time
!kaggle competitions submit -c human-protein-atlas-image-classification -f ../submissions/sub34b-max.csv -m ""

from time import sleep
sleep(60)
!kaggle competitions submissions -c human-protein-atlas-image-classification

100%|████████████████████████████████████████| 476k/476k [00:13<00:00, 36.9kB/s]
Successfully submitted to Human Protein Atlas Image ClassificationfileName        date                 description  status    publicScore  privateScore  
--------------  -------------------  -----------  --------  -----------  ------------  
sub34b-max.csv  2018-11-24 19:03:59               complete  0.459        None          
sub34a-max.csv  2018-11-24 18:50:22               complete  0.469        None          
sub34-max.csv   2018-11-24 17:27:36               complete  0.473        None          
sub33-h.csv     2018-11-24 06:48:41               complete  0.464        None          
sub33-g.csv     2018-11-24 06:46:19               complete  0.472        None          
sub33-c.csv     2018-11-23 11:48:41               complete  0.493        None          
sub33-bb.csv    2018-11-23 11:47:32               complete  0.493        None          
sub33-b.csv     2018-11-23 11:46:26               complete  0

In [None]:
# Create submit
submit = pd.read_csv('../data/sample_submission.csv')
predicted = []
draw_predict = []
# model = create_model(
#     input_shape=(SIZE,SIZE,3), 
#     n_out=28)
# for layer in model.layers:
#     layer.trainable = True
# model.compile(loss=f1_loss,
#             optimizer=Adam(lr=1e-4),
#             metrics=[f1])
model.load_weights('../cache/IV3-34-maximus.h5')
for name in tqdm(submit['Id']):
    path = os.path.join('../data/test/', name)
    image = data_generator.load_image(path, (SIZE,SIZE,3))
    image = data_generator.augment(image)
    image = image/255.
    score_predict = model.predict(image[np.newaxis])[0]
    draw_predict.append(score_predict)
    label_predict = np.arange(28)[score_predict>=0.2]
    str_predict_label = ' '.join(str(l) for l in label_predict)
    predicted.append(str_predict_label)

submit['Predicted'] = predicted
# np.save('../cache/draw_predict_InceptionV3-30.npy', score_predict)
# submit.to_csv('../submissions/submit_InceptionV3.csv', index=False)

In [27]:
from time import sleep
sleep(60)
!kaggle competitions submissions -c human-protein-atlas-image-classification

fileName       date                 description  status    publicScore  privateScore  
-------------  -------------------  -----------  --------  -----------  ------------  
sub34-max.csv  2018-11-24 17:27:36               complete  0.473        None          
sub33-h.csv    2018-11-24 06:48:41               complete  0.464        None          
sub33-g.csv    2018-11-24 06:46:19               complete  0.472        None          
sub33-c.csv    2018-11-23 11:48:41               complete  0.493        None          
sub33-bb.csv   2018-11-23 11:47:32               complete  0.493        None          
sub33-b.csv    2018-11-23 11:46:26               complete  0.498        None          
sub33-a.csv    2018-11-23 11:45:09               complete  0.496        None          
sub36-a.csv    2018-11-23 10:12:45               complete  0.287        None          
sub35b-c.csv   2018-11-22 08:00:23               complete  0.417        None          
sub35b-b.csv   2018-11-22 07:59:

In [26]:
from time import sleep
sleep(60)
!kaggle competitions submissions -c human-protein-atlas-image-classification

fileName  date                 description  status    publicScore  privateScore  
--------  -------------------  -----------  --------  -----------  ------------  
sub8.csv  2018-10-20 20:08:45               complete  0.422        None          
sub7.csv  2018-10-20 17:06:09               complete  0.389        None          
sub5.csv  2018-10-19 18:27:33               complete  0.387        None          
sub4.csv  2018-10-19 14:45:15               complete  0.411        None          
sub3.csv  2018-10-19 10:19:26               complete  0.377        None          
sub2.csv  2018-10-19 08:07:30               complete  0.135        None          
sub1.csv  2018-10-19 06:28:57               complete  0.374        None          
