In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import matplotlib.pyplot as plt
%matplotlib inline 
import seaborn as sns


# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 5GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

from IPython.display import clear_output
clear_output()

In [None]:
IMG_SIZE = 128
N_CHANNELS = 3

img_path = '../input/pneumothorax-chest-xray-images-and-masks/siim-acr-pneumothorax/png_images'
mask_path = '../input/pneumothorax-chest-xray-images-and-masks/siim-acr-pneumothorax/png_masks'

In [None]:
train_df = pd.read_csv('../input/pneumothorax-chest-xray-images-and-masks/siim-acr-pneumothorax/stage_1_train_images.csv')
test_df = pd.read_csv('../input/pneumothorax-chest-xray-images-and-masks/siim-acr-pneumothorax/stage_1_test_images.csv')

In [None]:
train_df.head(15)

In [None]:
sns.set_palette("Greys")

train_df_with_pneumo = train_df[train_df['has_pneumo'] == 1]
train_df_without_pneumo = train_df[train_df['has_pneumo'] == 0]

plt.title("TRAIN. Number of images with and without pneumothorax")
sns.barplot(x=["has_pneumo", "has_no_pneumo"], y = [len(train_df_with_pneumo), len(train_df_without_pneumo)])
plt.ylabel("# of images")

In [None]:
train_df_with_pneumo = train_df[train_df['has_pneumo'] == 1]
#train_df_without_pneumo = train_df[train_df['has_pneumo'] == 0][:len(train_df_with_pneumo)]
train_df_without_pneumo = train_df[train_df['has_pneumo'] == 0][:300]

plt.title("TRAIN. Number of images with and without pneumothorax")
sns.barplot(x=["has_pneumo", "has_no_pneumo"], y = [len(train_df_with_pneumo), len(train_df_without_pneumo)])
plt.ylabel("# of images")

In [None]:
test_df_with_pneumo = test_df[test_df['has_pneumo'] == 1]
test_df_without_pneumo = test_df[test_df['has_pneumo'] == 0]


plt.title("TEST. Number of images with and without pneumothorax")
sns.barplot(x=["has_pneumo", "has_no_pneumo"], y = [len(test_df_with_pneumo), len(test_df_without_pneumo)])
plt.ylabel("# of images")

In [None]:
test_df_with_pneumo = test_df[test_df['has_pneumo'] == 1]
#test_df_without_pneumo = test_df[test_df['has_pneumo'] == 0][:len(test_df_with_pneumo)]

test_df_without_pneumo = test_df[test_df['has_pneumo'] == 0][:30]

plt.title("TEST. Number of images with and without pneumothorax")
sns.barplot(x=["has_pneumo", "has_no_pneumo"], y = [len(test_df_with_pneumo), len(test_df_without_pneumo)])
plt.ylabel("# of images")

In [None]:
from tqdm import tqdm 
from skimage.io import imread, imshow, imread_collection, concatenate_images
from skimage.transform import resize
import cv2
N_CHANNELS = 3

# Create X_train, Y_train, and Label 
train_size = len(train_df_with_pneumo + train_df_without_pneumo)

X_train = np.zeros((train_size, IMG_SIZE, IMG_SIZE, N_CHANNELS))
Y_train = np.zeros((train_size, IMG_SIZE, IMG_SIZE, 1), dtype=np.bool)
labels = np.zeros(train_size, dtype=np.uint8)

img_data = list( pd.concat([train_df_with_pneumo, train_df_without_pneumo]).T.to_dict().values())


for i, data_row in tqdm(enumerate(img_data), total=len(img_data)):
    
    patientImage = data_row['new_filename']
    imageLabel  = data_row['has_pneumo']

    lungImage = imread(os.path.join(img_path, patientImage))
    lungImage = np.expand_dims(resize(lungImage, (IMG_SIZE, IMG_SIZE), mode='constant', preserve_range=True), axis=-1)
    #normalizedImg = lungImage/255
    norm_img = np.zeros((IMG_SIZE,IMG_SIZE))
    norm_img = cv2.normalize(lungImage,  norm_img, 0, 255, cv2.NORM_MINMAX)
    img2 = cv2.merge((norm_img,norm_img,norm_img))
    
    X_train[i] = img2/255

    labels[i] = imageLabel

    maskImage = imread(os.path.join(mask_path, patientImage))
    maskImage = np.expand_dims(resize(maskImage, (IMG_SIZE, IMG_SIZE), mode='constant', preserve_range=True), axis=-1)
    maskImage = maskImage/255
    Y_train[i] = maskImage


In [None]:
#X_train[:, :, :]/=255

print(max(X_train[1].flatten()))

In [None]:
print(X_train.shape)

In [None]:
# Illustrate the train images and masks
plt.figure(figsize=(20, 16))
x, y = 12, 4

for i in range(y):
    for j in range(x):
        plt.subplot(y*2, x, i*2*x+j+1)
        pos = i*120 + j*10
        plt.imshow(X_train[pos])
        plt.title('Image')
        plt.axis('off')
        plt.subplot(y*2, x, (i*2+1)*x+j+1)

        plt.imshow(np.squeeze(Y_train[pos]), cmap='gray_r')
        plt.title('Mask\nLabel: {}'.format(labels[pos]))
        plt.axis('off')

plt.tight_layout()
plt.show()

In [None]:
import keras.backend as K
from keras.engine import Layer

from keras.models import Sequential,Model
from keras.layers import Input, Dropout, merge
from keras.layers.convolutional import Convolution2D, UpSampling2D, ZeroPadding2D, Cropping2D, Deconvolution2D
from keras.layers.core import Activation
from keras.optimizers import Adam, SGD

from keras.applications.resnet50 import ResNet50
from keras.models import Model, load_model
from keras.metrics import binary_accuracy

import numpy as np
import tensorflow as tf


In [None]:
value_dict = {1: False,
              0: True,
              }

def rgb_to_onehot(rgb_arr, value_dict):
    
    print(rgb_arr.shape)
    
    num_classes = len(value_dict)
    shape = rgb_arr.shape[:3]+(num_classes,)
    
    #print(shape)
    arr = np.zeros( shape, dtype=np.int8 )
    

    for i, cls in enumerate(value_dict):
        #print(np.all(rgb_arr.reshape( (-1,1) ) == value_dict[i], axis=1).reshape(shape[:3]))
        arr[:,:,:,i] = np.all(rgb_arr.reshape( (-1,1) ) == value_dict[i], axis=1).reshape(shape[:3])
    return arr

new_Y = rgb_to_onehot(Y_train, value_dict)
new_Y = 1*np.array(new_Y, dtype="bool")


def onehot_to_rgb(onehot):
    
    single_layer = np.argmax(onehot, axis=-1)
    output = np.zeros( onehot.shape[:2]+(1,) )
    
    for i in range(output.shape[0]):
        for k in range(output.shape[1]):
            #if onehot[i, k , 0] >onehot[i, k , 1]:
            if onehot[i, k , 0] >onehot[i, k , 1]:
                output[i, k] = 0
            else:
                output[i, k] = 1
    
    

    return np.uint8(output)


In [None]:
#Model parameters
stride = 32
learning_rate = 0.1
N_CLASSES = 2
batch_size = 5
N_epochs = 100

# Init the VGG model
vgg_conv = tf.keras.applications.VGG19(weights='imagenet', include_top=False, input_shape=(IMG_SIZE, IMG_SIZE, 3))
for layer in vgg_conv.layers[:]:
    layer.trainable = False

# ENCODER
# Create the model based /on VGG16
model_with_vgg = Sequential()
model_with_vgg.add(vgg_conv)

#DECODER
# Upsampling
model_with_vgg.add(Deconvolution2D(N_CLASSES, (16, 16), strides=4, padding='same') )
model_with_vgg.add(Dropout(0.2))
model_with_vgg.add(Deconvolution2D(N_CLASSES, (32, 32), strides=4, padding='same') )
model_with_vgg.add(Dropout(0.2))
model_with_vgg.add(Deconvolution2D(N_CLASSES, (64, 64), strides=2, padding='same') )

model_with_vgg.add(Activation('softmax'))
model_with_vgg.summary()        


opt = tf.keras.layers.LeakyReLU()
m = tf.keras.metrics.MeanIoU(num_classes=2)

def dice(y_true, y_pred):
    # Symbolically compute the intersection
    y_int = y_true*y_pred
    # Technically this is the negative of the Sorensen-Dice index. This is done for
    # minimization purposes
    return -(2*K.sum(y_int) / (K.sum(y_true) + K.sum(y_pred)))

model_with_vgg.compile(loss="binary_crossentropy",
              optimizer="Adam",
              metrics=['accuracy', m, dice])


In [None]:
model_with_vgg.fit(
    X_train, new_Y,
    validation_split=0.1,
    batch_size = batch_size,
    epochs = 1000
)

In [None]:
import os 
os.mkdir('../output1')

In [None]:
model_with_vgg.save('./vgg_100')

In [None]:
from tqdm import tqdm 
from skimage.io import imread, imshow, imread_collection, concatenate_images
from skimage.transform import resize
import cv2
N_CHANNELS = 3

# Create X_train, Y_train, and Label 
train_size = len(test_df_with_pneumo)

X_test = np.zeros((train_size, IMG_SIZE, IMG_SIZE, N_CHANNELS))
Y_test = np.zeros((train_size, IMG_SIZE, IMG_SIZE, 1), dtype=np.bool)
labels_test = np.zeros(train_size, dtype=np.uint8)

img_data = list( test_df_with_pneumo.T.to_dict().values())


for i, data_row in tqdm(enumerate(img_data), total=len(img_data)):
    
    patientImage = data_row['new_filename']
    imageLabel  = data_row['has_pneumo']

    lungImage = imread(os.path.join(img_path, patientImage))
    lungImage = np.expand_dims(resize(lungImage, (IMG_SIZE, IMG_SIZE), mode='constant', preserve_range=True), axis=-1)
    #normalizedImg = lungImage/255
    norm_img = np.zeros((IMG_SIZE,IMG_SIZE))
    norm_img = cv2.normalize(lungImage,  norm_img, 0, 255, cv2.NORM_MINMAX)
    img2 = cv2.merge((norm_img,norm_img,norm_img))
    
    X_test[i] = img2/255

    labels_test[i] = imageLabel

    maskImage = imread(os.path.join(mask_path, patientImage))
    maskImage = np.expand_dims(resize(maskImage, (IMG_SIZE, IMG_SIZE), mode='constant', preserve_range=True), axis=-1)
    maskImage = maskImage/255
    Y_test[i] = maskImage

In [None]:
res = (model_with_vgg.predict(X_test))
print(res.shape)

In [None]:
new_Y_test = rgb_to_onehot(Y_test, value_dict)
new_Y_test = 1*np.array(new_Y_test, dtype="bool")
print(new_Y_test.shape)

In [None]:
res = onehot_to_rgb(model_with_vgg.predict(X_train[0:6])[0] )
plt.imshow(res.reshape( (128,128) ))

In [None]:
res = (255 - Y_train[0] )
plt.imshow(res.reshape( (128,128) ))

In [None]:
res = (X_train[0] )
plt.imshow(res)

In [None]:
#Model parameters
stride = 32
learning_rate = 0.1
N_CLASSES = 2
batch_size = 5
N_epochs = 100

# Init the VGG model
resnet_conv = tf.keras.applications.ResNet50(weights='imagenet', include_top=False, input_shape=(IMG_SIZE, IMG_SIZE, 3))
for layer in resnet_conv.layers[:]:
    layer.trainable = False

# ENCODER
# Create the model based /on VGG16
model_with_resnet = Sequential()
model_with_resnet.add(resnet_conv)

#model_with_resnet.add(Convolution2D(32,kernel_size=(1,1),padding = "same",activation = "relu",name = "fc6"))
#model_with_resnet.add(tf.keras.layers.MaxPooling2D(pool_size=(2, 2), strides=(1, 1), padding='valid'))

# add classifier - to get size down to at most 8x8
model_with_resnet.add(Deconvolution2D(N_CLASSES, (4, 4), strides=2, padding='same') )
model_with_resnet.add(Dropout(0.2))
model_with_resnet.add(Deconvolution2D(N_CLASSES, (16, 16), strides=2, padding='same') )
model_with_resnet.add(Dropout(0.2))
model_with_resnet.add(Deconvolution2D(N_CLASSES, (32, 32), strides=2, padding='same') )
model_with_resnet.add(Dropout(0.2))
model_with_resnet.add(Deconvolution2D(N_CLASSES, (32, 32), strides=2, padding='same') )
model_with_resnet.add(Dropout(0.2))
model_with_resnet.add(Deconvolution2D(N_CLASSES, (64, 64), strides=2, padding='same') )

model_with_resnet.add(Activation('softmax'))
model_with_resnet.summary()        

m = tf.keras.metrics.MeanIoU(num_classes=2)

model_with_resnet.compile(loss="binary_crossentropy",
              optimizer="adam",
              metrics=['accuracy'])

In [None]:
model_with_resnet.fit(
    X_train, new_Y,
    validation_split=0.1,
    batch_size = batch_size,
    epochs = N_epochs
)

In [None]:
from tqdm import tqdm 
from skimage.io import imread, imshow, imread_collection, concatenate_images
from skimage.transform import resize
import cv2
N_CHANNELS = 3

# Create X_train, Y_train, and Label 
train_size = len(test_df_with_pneumo)

X_test = np.zeros((train_size, IMG_SIZE, IMG_SIZE, N_CHANNELS))
Y_test = np.zeros((train_size, IMG_SIZE, IMG_SIZE, 1), dtype=np.bool)
labels_test = np.zeros(train_size, dtype=np.uint8)

img_data = list( test_df_with_pneumo.T.to_dict().values())


for i, data_row in tqdm(enumerate(img_data), total=len(img_data)):
    
    patientImage = data_row['new_filename']
    imageLabel  = data_row['has_pneumo']

    lungImage = imread(os.path.join(img_path, patientImage))
    lungImage = np.expand_dims(resize(lungImage, (IMG_SIZE, IMG_SIZE), mode='constant', preserve_range=True), axis=-1)
    #normalizedImg = lungImage/255
    norm_img = np.zeros((IMG_SIZE,IMG_SIZE))
    norm_img = cv2.normalize(lungImage,  norm_img, 0, 255, cv2.NORM_MINMAX)
    img2 = cv2.merge((norm_img,norm_img,norm_img))
    
    X_test[i] = img2/255

    labels_test[i] = imageLabel

    maskImage = imread(os.path.join(mask_path, patientImage))
    maskImage = np.expand_dims(resize(maskImage, (IMG_SIZE, IMG_SIZE), mode='constant', preserve_range=True), axis=-1)
    maskImage = maskImage/255
    Y_test[i] = maskImage



In [None]:
new_image = np.zeros( (Y_test[1].shape[0], Y_test[1].shape[1]) )
for i in range(Y_test[1].shape[0]):
    for j in range(Y_test[1].shape[1]):
        
        new_image[i, j] = abs( Y_test[1, i, j] - 1) 

plt.imshow(new_image)

In [None]:
res = onehot_to_rgb(model_with_resnet.predict(X_test[0:6])[1] )
plt.imshow(res.reshape( (128,128) ))

In [None]:
print(dir(tf.keras.applications))

In [None]:
import tensorflow as tf
tf.__version__

In [None]:
from tensorflow.keras.applications import InceptionV3

#Model parameters
stride = 32
learning_rate = 0.1
N_CLASSES = 2
batch_size = 2
N_epochs = 30

# Init the VGG model
efnet_conv = InceptionV3(weights='imagenet', include_top=False, input_shape=(IMG_SIZE, IMG_SIZE, 3))
for layer in efnet_conv.layers[:]:
    layer.trainable = False

# ENCODER
# Create the model based /on VGG16
model_with_efnet = Sequential()
model_with_efnet.add(efnet_conv)


# Upsampling

model_with_efnet.add(Deconvolution2D(N_CLASSES, (16, 16), strides=(4, 4), padding='same') )
model_with_efnet.add(Dropout(0.2))

model_with_efnet.add(Deconvolution2D(N_CLASSES, (32, 32), strides=(4, 4), padding='same') )
model_with_efnet.add(Dropout(0.2))

model_with_efnet.add(Deconvolution2D(N_CLASSES, (32, 32), strides=(2, 2), padding='same') )
model_with_efnet.add(Dropout(0.2))


model_with_efnet.add(Deconvolution2D(N_CLASSES, (64, 64), strides=(2, 2), padding='same') )



model_with_efnet.add(Activation('softmax'))
model_with_efnet.summary()        

m = tf.keras.metrics.MeanIoU(num_classes=2)

model_with_efnet.compile(loss="categorical_crossentropy",
              optimizer="SGD",
              metrics=['accuracy'])

In [None]:
model_with_efnet.fit(
    X_train, new_Y,
    validation_split=0.1,
    batch_size = batch_size,
    epochs = N_epochs
)