## Medical

In [1]:
from math import *
import pandas as pd
import random
import numpy as np
import tensorflow as tf
import warnings
from sklearn.metrics import accuracy_score
from sklearn.utils import shuffle
import matplotlib.pyplot as plt
from matplotlib import gridspec
from scipy import stats
import cv2
import keras
from keras.models import Sequential, model_from_yaml, Model
from keras.layers import Dense, Dropout, BatchNormalization, Activation, Convolution2D, Flatten, \
    MaxPooling2D,Input, Concatenate, GlobalMaxPooling2D, GlobalAveragePooling2D
from keras.optimizers import Adam, RMSprop
from keras import backend as K
from tensorflow.python.client import device_lib
from tensorflow.python.ops import array_ops
from keras.utils.np_utils import to_categorical
from keras.preprocessing.image import ImageDataGenerator
from keras.callbacks import *
from keras.applications.densenet import DenseNet169
from keras_applications.resnext import ResNeXt50
import albumentations
import seaborn as sns

sns.set_style("whitegrid")
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', 200)
warnings.filterwarnings('ignore')

print(device_lib.list_local_devices())
config = tf.ConfigProto(device_count={"CPU": 1, "GPU" : 1})
session = tf.Session(config=config)
K.set_session(session)

Using TensorFlow backend.


[name: "/device:CPU:0"
device_type: "CPU"
memory_limit: 268435456
locality {
}
incarnation: 6556657694481780977
, name: "/device:GPU:0"
device_type: "GPU"
memory_limit: 4945621811
locality {
  bus_id: 1
  links {
  }
}
incarnation: 15680467377423217328
physical_device_desc: "device: 0, name: GeForce GTX 1060 6GB, pci bus id: 0000:08:00.0, compute capability: 6.1"
]


In [2]:
train_transform = albumentations.Compose([
    albumentations.RandomRotate90(p=0.5),
    albumentations.Transpose(p=0.5),
    albumentations.Flip(p=0.5),
    albumentations.OneOf([albumentations.CLAHE(clip_limit=2), 
                         albumentations.IAASharpen(), 
                         albumentations.IAAEmboss(), 
        albumentations.RandomBrightness(), 
                         albumentations.RandomContrast(),
        albumentations.Blur(), 
                          albumentations.GaussNoise(),
                          albumentations.ElasticTransform(),
                         ], p=0.25), 
        albumentations.HueSaturationValue(p=0.25), 
        albumentations.ShiftScaleRotate(shift_limit=0.10, scale_limit=0.10, rotate_limit=0, p=0.5),
        albumentations.Normalize(p=1)
    
    ])

val_transform = albumentations.Compose([
    albumentations.Normalize(p=1)
    ])

test_transform = albumentations.Compose([
    albumentations.RandomRotate90(p=0.5),
    albumentations.Flip(p=0.5),
    albumentations.Normalize(p=1)
    ])

def preprocess_train(image):
    return (train_transform(image = image.astype(np.uint8))['image'])

def preprocess_val(image):
    return (val_transform(image = image.astype(np.uint8))['image'])
    
def preprocess_test(image):
    return (test_transform(image = image.astype(np.uint8))['image'])

def preprocess_np(image):
    return (image.astype(np.uint8))


In [3]:
class NPGenerator(keras.utils.Sequence):
    def __init__(self, batch_size=32, indep=np.empty([0,96,96,3]), dep = np.empty([0]), transform = preprocess_train):
        'Initialization'
        self.batch_size = batch_size
        self.indep = indep
        self.dep = dep
        self.index = 0
        self.transform = transform
        self.tr = (lambda x: transform(image=x))
        
    def __len__(self):
        l = int(np.floor(len(self.indep) / self.batch_size))
        if ((len(self.indep) % self.batch_size) >0):
            l+=1
        return l

    def __getitem__(self, index):
        X, y = self.indep[self.index * self.batch_size:(self.index + 1) * self.batch_size], \
               self.dep[self.index * self.batch_size:(self.index + 1) * self.batch_size]
            
        X = np.array([self.tr(img.astype(np.uint8)).reshape(96,96,3) for img in X])   
        
        self.index +=1
        if (self.index>=self.indep.shape[0]/self.batch_size):
            self.index=0    
        return X, y

    def reset(self):
        self.index=0
    

In [4]:
from keras.callbacks import *

class CyclicLR(Callback):
 
    def __init__(self, base_lr=0.001, max_lr=0.006, step_size=2000., mode='triangular',
                 gamma=1., scale_fn=None, scale_mode='cycle'):
        super(CyclicLR, self).__init__()

        self.base_lr = base_lr
        self.max_lr = max_lr
        self.step_size = step_size
        self.mode = mode
        self.gamma = gamma
        if scale_fn == None:
            if self.mode == 'triangular':
                self.scale_fn = lambda x: 1.
                self.scale_mode = 'cycle'
            elif self.mode == 'triangular2':
                self.scale_fn = lambda x: 1/(2.**(x-1))
                self.scale_mode = 'cycle'
            elif self.mode == 'exp_range':
                self.scale_fn = lambda x: gamma**(x)
                self.scale_mode = 'iterations'
        else:
            self.scale_fn = scale_fn
            self.scale_mode = scale_mode
        self.clr_iterations = 0.
        self.trn_iterations = 0.
        self.history = {}

        self._reset()

    def _reset(self, new_base_lr=None, new_max_lr=None,
               new_step_size=None):
        if new_base_lr != None:
            self.base_lr = new_base_lr
        if new_max_lr != None:
            self.max_lr = new_max_lr
        if new_step_size != None:
            self.step_size = new_step_size
        self.clr_iterations = 0.
        
    def clr(self):
        cycle = np.floor(1+self.clr_iterations/(2*self.step_size))
        x = np.abs(self.clr_iterations/self.step_size - 2*cycle + 1)
        if self.scale_mode == 'cycle':
            return self.base_lr + (self.max_lr-self.base_lr)*np.maximum(0, (1-x))*self.scale_fn(cycle)
        else:
            return self.base_lr + (self.max_lr-self.base_lr)*np.maximum(0, (1-x))*self.scale_fn(self.clr_iterations)
        
    def on_train_begin(self, logs={}):
        logs = logs or {}

        if self.clr_iterations == 0:
            K.set_value(self.model.optimizer.lr, self.base_lr)
        else:
            K.set_value(self.model.optimizer.lr, self.clr())        
            
    def on_batch_end(self, epoch, logs=None):
        
        logs = logs or {}
        self.trn_iterations += 1
        self.clr_iterations += 1

        self.history.setdefault('lr', []).append(K.get_value(self.model.optimizer.lr))
        self.history.setdefault('iterations', []).append(self.trn_iterations)

        for k, v in logs.items():
            self.history.setdefault(k, []).append(v)
        
        K.set_value(self.model.optimizer.lr, self.clr())
        


In [5]:
def KerasFocalLoss(target, input):
    
    gamma = 2.
    input = tf.cast(input, tf.float32)
    
    max_val = K.clip(-input, 0, 1)
    loss = input - input * target + max_val + K.log(K.exp(-max_val) + K.exp(-input - max_val))
    invprobs = tf.log_sigmoid(-input * (target * 2.0 - 1.0))
    loss = K.exp(invprobs * gamma) * loss
    
    return K.mean(K.sum(loss, axis=1))

In [7]:
def getfold(n):
    val_folds = [(a + n) % 20 for a in range(0,3)]
    train_folds = [(a + n + 3) % 20 for a in range(0,7)]
    random.shuffle(train_folds)
    
    indep = np.empty([0,96,96,3])
    dep = np.empty([0])
    indep_val = np.empty([0,96,96,3])
    dep_val = np.empty([0])
    
    for i in val_folds:
        indep_val = np.append(indep_val, np.load("indep_{0}.npy".format(i)), axis=0)
        dep_val = np.append(dep_val, np.load("dep_{0}.npy".format(i)), axis=0)
        
    for i in train_folds:
        indep = np.append(indep, np.load("indep_{0}.npy".format(i)), axis=0)
        dep = np.append(dep, np.load("dep_{0}.npy".format(i)), axis=0)
    
    return indep, dep, indep_val, dep_val

In [7]:
with tf.device('/device:GPU:0'):
    for i in range (0, 1):
        K.clear_session()
        indep, dep, indep_val, dep_val, res = None, None, None, None, None
        indep, dep, indep_val, dep_val = getfold(i)
        inputs = Input((96, 96, 3))
        base_model = ResNeXt50(include_top=False, weights='imagenet', input_shape=(96, 96, 3), backend = keras.backend, layers = keras.layers, models = keras.models, utils = keras.utils)
        x = base_model(inputs)
        base_model.Trainable = False
        out = GlobalMaxPooling2D()(x)
        out = BatchNormalization()(out)
        out = Dropout(0.2)(out)
        out = Dense(256, activation='relu')(out)
        out = Dropout(0.3)(out)
        out = Dense(64, activation='relu')(out)
        out = Dropout(0.3)(out)
        out = BatchNormalization()(out)
        out = Dense(1, activation='sigmoid')(out)
        
        gs1 = Model(inputs, out)
        gs1.compile(Adam(lr=0.001), loss="binary_crossentropy", metrics=['accuracy']) 

        with open(r"med_rn_{0}.yaml".format(i), "w") as yaml_file:
            yaml_file.write(gs1.to_yaml())
        clr_triangular = CyclicLR(base_lr=5e-6, max_lr=0.005, mode="triangular2", step_size=1000)
        gs1.fit_generator(generator=NPGenerator(indep=indep, dep=dep, batch_size=32, transform=preprocess_train), 
                          validation_data=NPGenerator(indep=indep_val, dep=dep_val, batch_size=32, transform=preprocess_val), 
                          steps_per_epoch=indep.shape[0]/32,
                          validation_steps=indep_val.shape[0]/32,
                          epochs=50, verbose=1, callbacks = 
                          [
                          ModelCheckpoint("med_rn_{0}.h5".format(i), monitor='val_acc', verbose=1, save_best_only=True, mode='max'),
                          clr_triangular])
        

Instructions for updating:
Colocations handled automatically by placer.
Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.
Instructions for updating:
Use tf.cast instead.
Instructions for updating:
Deprecated in favor of operator or tf.math.divide.
Epoch 1/50

Epoch 00001: val_acc improved from -inf to 0.77665, saving model to med_rn_0.h5
Epoch 2/50

Epoch 00002: val_acc did not improve from 0.77665
Epoch 3/50

Epoch 00003: val_acc improved from 0.77665 to 0.80623, saving model to med_rn_0.h5
Epoch 4/50

Epoch 00004: val_acc improved from 0.80623 to 0.81685, saving model to med_rn_0.h5
Epoch 5/50

Epoch 00005: val_acc improved from 0.81685 to 0.87101, saving model to med_rn_0.h5
Epoch 6/50

Epoch 00006: val_acc did not improve from 0.87101
Epoch 7/50

Epoch 00007: val_acc did not improve from 0.87101
Epoch 8/50

Epoch 00008: val_acc did not improve from 0.87101
Epoch 9/50

Epoch 00009: val_acc improved from 0.87101 to 0.87

KeyboardInterrupt: 

In [9]:
res = np.load("res.npy")
data = pd.DataFrame()
model =[]
for i in range(0, 1):
    with open(r"med_rn_l{0}.yaml".format(i), "r") as yaml_file:
        K.clear_session()
        m = model_from_yaml(yaml_file.read())
        m.load_weights(r"med_rn_l{0}.h5".format(i))
        for j in range(0,4):
            print (i, j)
            tg = NPGenerator(indep=res, batch_size=640, transform=preprocess_test)
            a = m.predict_generator(tg ,verbose=1, steps=len(tg))
            data["model_{0}".format(i * 16 + j)] = pd.Series(a.reshape(len(a)))

0 0
0 1
0 2
0 3


In [8]:
with tf.device('/device:GPU:0'):
    for i in range (0, 1):
        with open(r"med_rn_{0}.yaml".format(i), "r") as yaml_file:
            K.clear_session()
            indep, dep, indep_val, dep_val, res = None, None, None, None, None
            indep, dep, indep_val, dep_val = getfold(i)
            gs1 = model_from_yaml(yaml_file.read())
            gs1.load_weights("med_rn_{0}.h5".format(i))
            rn = gs1.layers[1]
            
#            rn.Trainable = False
#            set_trainable = False
#            for layer in rn.layers:
#                if layer.name == 'res5a_branch2a':
#                    set_trainable = True
#                if set_trainable:
#                    layer.trainable = True
#                else:
#                    layer.trainable = False
            rn.Trainable = True
            for layer in rn.layers:
                layer.trainable = True


            gs1.compile(RMSprop(lr=0.00001), loss="binary_crossentropy", metrics=['accuracy'])
            clr_triangular = CyclicLR(base_lr=1e-6, max_lr=0.00001, mode="triangular2", step_size=1000)
        
            with open(r"med_rn_l{0}.yaml".format(i), "w") as yaml_file:
                yaml_file.write(gs1.to_yaml())
            gs1.fit_generator(generator=NPGenerator(indep=indep, dep=dep, batch_size=32, transform=preprocess_train), 
                          validation_data=NPGenerator(indep=indep_val, dep=dep_val, batch_size=32, transform=preprocess_val), 
                          steps_per_epoch=indep.shape[0]/32,
                          validation_steps=indep_val.shape[0]/32,
                          epochs=1, verbose=1, callbacks = 
                          [
                              ModelCheckpoint("med_rn_l{0}.h5".format(i), monitor='acc', verbose=1, save_best_only=True, mode='max'),
                              clr_triangular
                          ])
            

Instructions for updating:
Colocations handled automatically by placer.
Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.
Instructions for updating:
Use tf.cast instead.
Instructions for updating:
Deprecated in favor of operator or tf.math.divide.
Epoch 1/1

Epoch 00001: acc improved from -inf to 0.94859, saving model to med_rn_l0.h5


In [None]:
fig, ax = plt.subplots(2, 1, sharex='col', figsize=(20, 10))
ax[0].set_title('Model accuracy history')
ax[0].plot(gs1.history.history['acc'])
ax[0].plot(gs1.history.history['val_acc'])
ax[0].set_ylabel('Accuracy')
ax[0].legend(['train', 'test'], loc='right')
ax[0].grid()

ax[1].set_title('Model loss history')
ax[1].plot(gs1.history.history['loss'])
ax[1].plot(gs1.history.history['val_loss'])
ax[1].set_ylabel('Loss')
ax[1].legend(['train', 'test'], loc='right')
ax[1].grid()
plt.xlabel('Epoch')
plt.show()

In [None]:
with tf.device('/device:GPU:0'):
    for i in range (0, 9):
        with open(r"med_dn169_lll{0}.yaml".format(i), "r") as yaml_file:
            K.clear_session()
            indep, dep, indep_val, dep_val, res = None, None, None, None, None
            indep, dep, indep_val, dep_val = getfold(i+10)
            gs1 = model_from_yaml(yaml_file.read())
            gs1.load_weights("med_dn169_lll{0}.h5".format(i))
            rn = gs1.layers[1]
            rn.Trainable = True
            for layer in rn.layers:
                layer.trainable = True


            gs1.compile(RMSprop(lr=0.001), loss="binary_crossentropy", metrics=['accuracy'])
            clr_triangular = CyclicLR(base_lr=1e-5, max_lr=0.0001, mode="triangular2", step_size=2000)
        
            with open(r"med_dn169_llll{0}.yaml".format(i), "w") as yaml_file:
                yaml_file.write(gs1.to_yaml())
            gs1.fit_generator(generator=NPGenerator(indep=indep, dep=dep, batch_size=64, transform=preprocess_train), 
                          validation_data=NPGenerator(indep=indep_val, dep=dep_val, batch_size=64, transform=preprocess_val), 
                          steps_per_epoch=indep.shape[0]/64,
                          validation_steps=indep_val.shape[0]/64,
                          epochs=5, verbose=1, callbacks = 
                          [
                              ModelCheckpoint("med_dn169_llll{0}.h5".format(i), monitor='acc', verbose=1, save_best_only=True, mode='max'),
                          ])


In [9]:
res = np.load("res.npy")
data = pd.DataFrame()
model =[]
for i in range(0, 1):
    with open(r"med_rn_l{0}.yaml".format(i), "r") as yaml_file:
        K.clear_session()
        m = model_from_yaml(yaml_file.read())
        m.load_weights(r"med_rn_l{0}.h5".format(i))
        for j in range(0,4):
            print (i, j)
            tg = NPGenerator(indep=res, batch_size=640, transform=preprocess_test)
            a = m.predict_generator(tg ,verbose=1, steps=len(tg))
            data["model_{0}".format(i * 10 + j)] = pd.Series(a.reshape(len(a)))

0 0
0 1
0 2
0 3


KeyboardInterrupt: 

In [10]:
test_generator = ImageDataGenerator().flow_from_directory(directory=r"c:/users/ajaln/test/",
                                                    target_size=(96, 96),
                                                    color_mode="rgb", batch_size=1000,
                                                    class_mode="binary", shuffle=False)
result = pd.DataFrame()
result["id"] = test_generator.filenames
result["id"] = result["id"].str[5:45]
result["label"] = data.iloc[:,:].mean(axis=1)
result.head()
samples = pd.read_csv(r"c:/users/ajaln/sample_submission.csv", usecols=["id"])
samples = pd.merge(samples, result, on="id", how="inner")
samples.head()
samples.to_csv(r"c:/work/dataset/medical/medical_rn.csv", index=False)

Found 57458 images belonging to 1 classes.


In [None]:
data.to_csv(r"c:/work/dataset/medical/medical_dn169_data.csv", index=False)

In [11]:
a2 = pd.read_csv(r"C:/Work/dataset/medical/medical_rn.csv")
a = pd.read_csv(r"C:/Work/dataset/medical/kl2.csv")

a["label"] = (a2["label"]*0.5+a["label"]*0.5)
a.loc[:, ["id", "label"]].to_csv(r"c:/work/dataset/medical/last.csv", index=False)