transfer learning excluding yellow

## imports

In [1]:
import os, shutil, gc, sys
import pandas as pd
import numpy as np
from time import time
from itertools import chain
from collections import Counter

from keras.preprocessing.image import ImageDataGenerator

from PIL import Image
import imgaug as ia
from imgaug import augmenters as iaa
import cv2
import matplotlib.pyplot as plt
import tqdm as tqdm
from time import time
from datetime import datetime as dt

import keras
from keras.utils import Sequence
from keras.models import Sequential, load_model, Model
from keras.layers import Activation, Dropout, Flatten, Dense, Input, Conv2D, MaxPooling2D, BatchNormalization, Concatenate
from keras.callbacks import Callback, EarlyStopping, ReduceLROnPlateau, ModelCheckpoint,LearningRateScheduler,CSVLogger
from keras import metrics
from keras.optimizers import SGD, RMSprop, Adam, Adagrad, Adadelta
from keras import backend as K
from tensorflow import set_random_seed
import tensorflow as tf

root_dir = '/home/tensor/content'
project_dir = os.path.join(root_dir,'protein')
train_path = os.path.join(project_dir,'train')
test_path = os.path.join(project_dir,'test')

train = pd.read_csv(os.path.join(project_dir,'train.csv'))
sample_submission = pd.read_csv(os.path.join(project_dir,'sample_submission.csv'))


BATCH_SIZE = 16
SEED = 777
INPUT_SHAPE = (299, 299, 3)
DEBUG = True
THRESHOLD = 0.05
ia.seed(SEED)

VAL_RATIO = 0.1

Using TensorFlow backend.


try this - https://www.kaggle.com/kwentar/two-branches-xception-lb-0-3 .

densenet 169 https://github.com/flyyufelix/DenseNet-Keras/blob/master/densenet169.py


## preprocess

In [0]:
def getTrainDataset():
  paths = []
  labels =[]
  
  for name, lbl in zip(train['Id'],train['Target'].str.split(' ')):
    y = np.zeros(28)
    for key in lbl:
      y[int(key)]=1
    paths.append(os.path.join(train_path,name))
    labels.append(y)
  return np.array(paths), np.array(labels)

def getTestDataset():
    
    path_to_test = test_path
    data = sample_submission

    paths = []
    labels = []
    
    for name in data['Id']:
        y = np.ones(28)
        paths.append(os.path.join(path_to_test, name))
        labels.append(y)

    return np.array(paths), np.array(labels)
  
  
  
  
class ProteinDataGenerator(keras.utils.Sequence):
            
    def __init__(self, paths, labels, batch_size, shape, shuffle = False, use_cache = False, augment = False):
        self.paths, self.labels = paths, labels
        self.batch_size = batch_size
        self.shape = shape
        self.shuffle = shuffle
        self.use_cache = use_cache
        self.augment = augment
        if use_cache == True:
            self.cache = np.zeros((paths.shape[0], shape[0], shape[1], shape[2]), dtype=np.float16)
            self.is_cached = np.zeros((paths.shape[0]))
        self.on_epoch_end()
    
    def __len__(self):
        return int(np.ceil(len(self.paths) / float(self.batch_size)))
    
    def __getitem__(self, idx):
        indexes = self.indexes[idx * self.batch_size : (idx+1) * self.batch_size]

        paths = self.paths[indexes]
        X = np.zeros((paths.shape[0], self.shape[0], self.shape[1], self.shape[2]))
        # Generate data
        if self.use_cache == True:
            X = self.cache[indexes]
            for i, path in enumerate(paths[np.where(self.is_cached[indexes] == 0)]):
                image = self.__load_image(path)
                self.is_cached[indexes[i]] = 1
                self.cache[indexes[i]] = image
                X[i] = image
        else:
            for i, path in enumerate(paths):
                X[i] = self.__load_image(path)

        y = self.labels[indexes]
                
        if self.augment == True:
            seq = iaa.Sequential([
                iaa.OneOf([
                    iaa.Fliplr(0.5), # horizontal flips
                    iaa.Crop(percent=(0, 0.1)), # random crops
                    # Small gaussian blur with random sigma between 0 and 0.5.
                    # But we only blur about 50% of all images.
                    iaa.Sometimes(0.5,
                        iaa.GaussianBlur(sigma=(0, 0.5))
                    ),
                    # Strengthen or weaken the contrast in each image.
                    iaa.ContrastNormalization((0.75, 1.5)),
                    # Add gaussian noise.
                    # For 50% of all images, we sample the noise once per pixel.
                    # For the other 50% of all images, we sample the noise per pixel AND
                    # channel. This can change the color (not only brightness) of the
                    # pixels.
                    iaa.AdditiveGaussianNoise(loc=0, scale=(0.0, 0.05*255), per_channel=0.5),
                    # Make some images brighter and some darker.
                    # In 20% of all cases, we sample the multiplier once per channel,
                    # which can end up changing the color of the images.
                    iaa.Multiply((0.8, 1.2), per_channel=0.2),
                    # Apply affine transformations to each image.
                    # Scale/zoom them, translate/move them, rotate them and shear them.
                    iaa.Affine(
                        scale={"x": (0.8, 1.2), "y": (0.8, 1.2)},
                        translate_percent={"x": (-0.2, 0.2), "y": (-0.2, 0.2)},
                        rotate=(-180, 180),
                        shear=(-8, 8)
                    )
                ])], random_order=True)

            X = np.concatenate((X, seq.augment_images(X), seq.augment_images(X), seq.augment_images(X)), 0)
            y = np.concatenate((y, y, y, y), 0)
        
        return X, y
    
    def on_epoch_end(self):
        
        # Updates indexes after each epoch
        self.indexes = np.arange(len(self.paths))
        if self.shuffle == True:
            np.random.shuffle(self.indexes)

    def __iter__(self):
        """Create a generator that iterate over the Sequence."""
        for item in (self[i] for i in range(len(self))):
            yield item
          
            
    def __load_image(self, path):
        R = Image.open(path + '_red.png')
        G = Image.open(path + '_green.png')
        B = Image.open(path + '_blue.png')
#         Y = Image.open(path + '_yellow.png')

        im = np.stack((
            np.array(R), 
            np.array(G), 
            np.array(B)
#             ,np.array(Y)
                     ),-1)

        
        im = cv2.resize(im, (INPUT_SHAPE[0], INPUT_SHAPE[1]))
        im = np.divide(im, 255)
        return im
    
paths, labels = getTrainDataset()
train_generator = ProteinDataGenerator(paths, labels, BATCH_SIZE, INPUT_SHAPE)

def f1(y_true, y_pred):
    y_pred = K.round(y_pred)
    tp = K.sum(K.cast(y_true*y_pred, 'float'), axis=0)
    tn = K.sum(K.cast((1-y_true)*(1-y_pred), 'float'), axis=0)
    fp = K.sum(K.cast((1-y_true)*y_pred, 'float'), axis=0)
    fn = K.sum(K.cast(y_true*(1-y_pred), 'float'), axis=0)

    p = tp / (tp + fp + K.epsilon())
    r = tp / (tp + fn + K.epsilon())

    f1 = 2*p*r / (p+r+K.epsilon())
    f1 = tf.where(tf.is_nan(f1), tf.zeros_like(f1), f1)
    return K.mean(f1)

def f1_loss(y_true, y_pred):
    
    #y_pred = K.cast(K.greater(K.clip(y_pred, 0, 1), THRESHOLD), K.floatx())
    tp = K.sum(K.cast(y_true*y_pred, 'float'), axis=0)
    tn = K.sum(K.cast((1-y_true)*(1-y_pred), 'float'), axis=0)
    fp = K.sum(K.cast((1-y_true)*y_pred, 'float'), axis=0)
    fn = K.sum(K.cast(y_true*(1-y_pred), 'float'), axis=0)

    p = tp / (tp + fp + K.epsilon())
    r = tp / (tp + fn + K.epsilon())

    f1 = 2*p*r / (p+r+K.epsilon())
    f1 = tf.where(tf.is_nan(f1), tf.zeros_like(f1), f1)
    return 1-K.mean(f1)
  
paths, labels = getTrainDataset()
keys = np.arange(paths.shape[0], dtype=np.int)  
np.random.seed(SEED)
np.random.shuffle(keys)
lastTrainIndex = int((1-VAL_RATIO) * paths.shape[0])


pathsTrain = paths[0:lastTrainIndex]
labelsTrain = labels[0:lastTrainIndex]
pathsVal = paths[lastTrainIndex:]
labelsVal = labels[lastTrainIndex:]

## transfer learning

In [0]:
def create_model(BASE_MODEL,optimizer,turn_off=False):
  if BASE_MODEL=='VGG16':
      from keras.applications.vgg16 import VGG16 as PTModel, preprocess_input
  elif BASE_MODEL=='vgg19':
      from keras.applications.vgg19 import VGG19 as PTModel, preprocess_input
  elif BASE_MODEL=='ResNet50':
      from keras.applications.resnet50 import ResNet50 as PTModel, preprocess_input
  elif BASE_MODEL=='InceptionV3':
      from keras.applications.inception_v3 import InceptionV3 as PTModel, preprocess_input
  elif BASE_MODEL=='Xception':
      from keras.applications.xception import Xception as PTModel, preprocess_input
  elif BASE_MODEL=='DenseNet169': 
      from keras.applications.densenet import DenseNet169 as PTModel, preprocess_input
  elif BASE_MODEL=='DenseNet121':
      from keras.applications.densenet import DenseNet121 as PTModel, preprocess_input
  else:
      raise ValueError('Unknown model: {}'.format(BASE_MODEL))
  
  keras.backend.set_learning_phase(1)
  
  img_rows, img_cols, img_channel = INPUT_SHAPE
  base_model = PTModel(weights='imagenet'
                     ,include_top=False, input_shape=(img_rows, img_cols, img_channel))

  add_model = Sequential()
  add_model.add(Flatten(input_shape=base_model.output_shape[1:]))
  add_model.add(Dense(128, activation='relu'))
  add_model.add(Dense(28, activation='sigmoid'))

  model = Model(inputs=base_model.input, outputs=add_model(base_model.output))


#   for layer in base_model.layers:
#       layer.trainable = False

#       if layer.name.startswith('bn'):
#           layer.call(layer.input, training=False)

#   base_model.layers[-2].trainable = True
#   base_model.layers[-1].trainable = True

  model.compile(loss='binary_crossentropy', 
                optimizer=optimizer,
                metrics=['accuracy',f1])
  return model

def transfer_learning(model,BASE_MODEL,epoch=4 ,batch_size=64):
  
  datetime_now = dt.now().strftime("%Y%m%d_%H%M_")
  
  check_point_name = datetime_now + BASE_MODEL + '.model'
  check_point_name = os.path.join(project_dir,check_point_name)
  model_weights = datetime_now + BASE_MODEL + '.h5'
  model_weights = os.path.join(project_dir,model_weights)
  log_file = datetime_now + '_log.csv'
  log_file = os.path.join(project_dir,log_file)
  
  train_generator = ProteinDataGenerator(pathsTrain, labelsTrain, BATCH_SIZE, INPUT_SHAPE)
  validation_generator = ProteinDataGenerator(pathsVal, labelsVal, BATCH_SIZE, INPUT_SHAPE)  
  
#   STEP_SIZE_TRAIN=train_generator.n//train_generator.batch_size
#   STEP_SIZE_VALID=validation_generator.n//validation_generator.batch_size
    
  STEP_SIZE_TRAIN=pathsTrain.shape[0]//train_generator.batch_size
  STEP_SIZE_VALID=pathsTrain.shape[0]//validation_generator.batch_size
  
  reduceLROnPlato = ReduceLROnPlateau(monitor='val_loss', factor=0.1, patience=3, verbose=1, mode='min')
  earlyStopping = EarlyStopping(monitor='val_loss', min_delta=0.001, patience=8, verbose=1, mode='auto')
  checkPoint = ModelCheckpoint(check_point_name, monitor='val_acc', save_best_only=True)
  csv_logger = CSVLogger(log_file, append=True, separator=';')
  
  history = model.fit_generator(generator=train_generator,
                      steps_per_epoch=STEP_SIZE_TRAIN,
                      validation_data=validation_generator,
                      use_multiprocessing=True,
                      epochs=epoch,
                      validation_steps=STEP_SIZE_VALID,
                     callbacks=[csv_logger,checkPoint,reduceLROnPlato, earlyStopping])
#   model.save_weights(model_weights)
  
  return history, model

In [0]:
%%time
optimizer = SGD(lr=1e-2, momentum=0.9)
# model = create_model('DenseNet169', optimizer, batch_size = 16, turn_off=False)

# saved_model=[f for f in os.listdir(project_dir) if f.endswith('.model')][-1]
# model = load_model(saved_model, custom_objects={'f1': f1})

history, DenseNet169_model = transfer_learning(model,'DenseNet169',epoch=4,batch_size=16)

In [4]:
%%time
saved_model=max([f for f in os.listdir(project_dir) if f.endswith('.model')])
model = load_model(project_dir+'/'+saved_model, custom_objects={'f1': f1})

history, DenseNet169_model = transfer_learning(model,'DenseNet169',epoch=1)

Epoch 1/1
CPU times: user 16min 49s, sys: 5min 54s, total: 22min 43s
Wall time: 18min 15s


## submit

In [0]:
saved_model=max([f for f in os.listdir(project_dir) if f.endswith('.model')])
model = load_model(project_dir+'/'+saved_model, custom_objects={'f1': f1})

In [0]:
%%time
pathsTest, labelsTest = getTestDataset()

testg = ProteinDataGenerator(pathsTest, labelsTest, BATCH_SIZE, INPUT_SHAPE)
P = np.zeros((pathsTest.shape[0], 28))
for i in tqdm.tqdm(range(len(testg))):
    images, labels = testg[i]
    score = model.predict(images)
    P[i*BATCH_SIZE:i*BATCH_SIZE+score.shape[0]] = score
    
PP = np.array(P)
prediction = []

for row in tqdm.tqdm(range(sample_submission.shape[0])):
    
    str_label = ''
    
    for col in range(PP.shape[1]):
        if(PP[row, col] < 0.25):   # to account for losing TP is more costly than decreasing FP
            #print(PP[row])
            str_label += ''
        else:
            str_label += str(col) + ' '
    prediction.append(str_label.strip())
    
sample_submission['Predicted'] = np.array(prediction)
sample_submission.to_csv(project_dir+'/DenseNet169_model3.csv', index=False)

100%|██████████| 732/732 [03:03<00:00,  4.78it/s]
100%|██████████| 11702/11702 [00:00<00:00, 99317.56it/s]

CPU times: user 2min 15s, sys: 21 s, total: 2min 36s
Wall time: 3min 3s





In [0]:
os.chdir(project_dir)
sample_submission['Predicted'] = P
sample_submission.to_csv('DenseNet169_model2.csv', index=False)