<a href="https://colab.research.google.com/github/PeterJackson61/data_science_bowl_2018/blob/main/Data_science_bowl_2018_model_increase.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install -q kaggle

In [None]:
!kaggle competitions download -c data-science-bowl-2018


In [None]:
# !mkdir ~/.kaggle
!touch ~/.kaggle/kaggle.json

api_token = {"username":"winsonnguyen","key":"3201b1729ad34de122cc0fff4d03090f"}

import json

with open('/root/.kaggle/kaggle.json', 'w') as file:
    json.dump(api_token, file)

!chmod 600 ~/.kaggle/kaggle.json

In [None]:
!kaggle competitions download -c data-science-bowl-2018

In [None]:
!unzip data-science-bowl-2018.zip -d /content/my_data

In [None]:
!mkdir /content/data_train/

In [None]:
!unzip /content/my_data/stage1_train.zip -d /content/data_train/

In [None]:
!unzip /content/my_data/stage1_test.zip -d /content/data_test

In [None]:
import os

In [None]:
TRAIN_PATH = './data_train/'
TEST_PATH = './data_test/'
# print(next(os.walk(TRAIN_PATH))[1])

In [None]:
print(next(os.walk(TRAIN_PATH))[1] == os.listdir(TRAIN_PATH))

In [None]:
IMG_CHANNEL = 3
IMG_WIDTH = 512
IMG_HEIGHT = 512

In [None]:
import numpy as np
from tqdm import tqdm
import sys
from skimage.io import imread, imshow
from skimage.transform import resize


In [None]:
def getting_X_Y_train():
  train_ids = os.listdir(TRAIN_PATH)
  test_ids = os.listdir(TEST_PATH)

  X_train = np.zeros((len(train_ids), IMG_HEIGHT, IMG_WIDTH, IMG_CHANNEL), dtype=np.uint8)
  Y_train = np.zeros((len(train_ids), IMG_HEIGHT, IMG_WIDTH, 1), dtype=np.bool_)

  print("Getting and resizing the training images and masks")

  sys.stdout.flush()
  for n, id_ in tqdm(enumerate(train_ids), total=len(train_ids)):
        path = TRAIN_PATH + id_
        img = imread(path + '/images/' + id_ + '.png')[:,:,:IMG_CHANNEL]
        img = resize(img, (IMG_HEIGHT, IMG_WIDTH), mode='constant', preserve_range=True)
        img = np.array(img/np.amax(img)*255, np.int32)
        X_train[n] = img
        mask = np.zeros((IMG_HEIGHT, IMG_WIDTH, 1), dtype=np.bool_)
        for mask_file in next(os.walk(path + '/masks/'))[2]:
            mask_ = imread(path + '/masks/' + mask_file)
            mask_ = np.expand_dims(resize(mask_, (IMG_HEIGHT, IMG_WIDTH), mode='constant',
                                          preserve_range=True), axis=-1)
            mask = np.maximum(mask, mask_)
        Y_train[n] = mask
  return X_train, Y_train

In [None]:
train_ids = os.listdir(TRAIN_PATH)
idx = np.random.randint(1,199)
train_ids[idx]
path = TRAIN_PATH + train_ids[idx]
img = imread(path + '/images/' + train_ids[idx] + '.png')[:,:,:IMG_CHANNEL]
img = resize(img, (IMG_HEIGHT, IMG_WIDTH), mode='constant', preserve_range=True)
img = np.array(img/np.amax(img)*255, np.int32)
print(type(img))
import matplotlib.pyplot as plt
imshow(img)

In [None]:
X_train, Y_train = getting_X_Y_train()

In [None]:
X_train.shape

In [None]:
Y_train.shape

In [None]:
from tensorflow.keras.layers import Conv2D, BatchNormalization, Activation, MaxPool2D, Conv2DTranspose, Concatenate, Input
from tensorflow.keras.models import Model

In [None]:
def conv_block(inputs, num_filters):
    x = Conv2D(num_filters, 3, padding="same")(inputs)
    x = BatchNormalization()(x)
    x = Activation("relu")(x)

    x = Conv2D(num_filters, 3, padding="same")(x)
    x = BatchNormalization()(x)
    x = Activation("relu")(x)

    return x

def encoder_block(inputs, num_filters):
    s = conv_block(inputs, num_filters)
    p = MaxPool2D((2, 2))(s)
    return s, p

def decoder_block(inputs, skip_features, num_filters):
    x = Conv2DTranspose(num_filters, (2, 2), strides=2, padding="same")(inputs)
    x = Concatenate()([x, skip_features])
    x = conv_block(x, num_filters)
    return x

def build_unet(input_shape):
    """ Input layer """
    inputs = Input(input_shape)

    """ Encoder """
    s1, p1 = encoder_block(inputs, 64)
    s2, p2 = encoder_block(p1, 128)
    s3, p3 = encoder_block(p2, 256)
    s4, p4 = encoder_block(p3, 512)

    """ Bottleneck """
    b1 = conv_block(p4, 1024)

    """ Decoder """
    d1 = decoder_block(b1, s4, 512)
    d2 = decoder_block(d1, s3, 256)
    d3 = decoder_block(d2, s2, 128)
    d4 = decoder_block(d3, s1, 64)

    """ Output layer """
    outputs = Conv2D(1, 1, padding="same", activation="sigmoid")(d4)

    model = Model(inputs, outputs, name="UNET")
    return model
model = build_unet((512, 512, 3))
model.summary()


In [None]:
import numpy as np
import tensorflow as tf
from tensorflow.keras import backend as K

def iou(y_true, y_pred):
    def f(y_true, y_pred):
        intersection = (y_true * y_pred).sum()
        union = y_true.sum() + y_pred.sum() - intersection
        x = (intersection + 1e-15) / (union + 1e-15)
        x = x.astype(np.float32)
        return x
    return tf.numpy_function(f, [y_true, y_pred], tf.float32)

smooth = 1e-15
def dice_coef(y_true, y_pred):
    y_true = tf.keras.layers.Flatten()(y_true)
    y_pred = tf.keras.layers.Flatten()(y_pred)
    intersection = tf.reduce_sum(y_true * y_pred)
    return (2. * intersection + smooth) / (tf.reduce_sum(y_true) + tf.reduce_sum(y_pred) + smooth)

def dice_loss(y_true, y_pred):
    return 1.0 - dice_coef(y_true, y_pred)


In [None]:
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.metrics import Recall, Precision
from tensorflow.keras.callbacks import ModelCheckpoint, CSVLogger, ReduceLROnPlateau, EarlyStopping

In [None]:
!mkdir files

In [None]:
import pandas as pd
data = pd.DataFrame()
data.to_csv('./files/data.csv')

In [None]:
model.save('files/test_cp.h5')

In [None]:
""" Hyperparaqmeters """
batch_size = 4
lr = 1e-4
num_epochs = 10
checkpoint_path = "files/test_cp.h5"
checkpoint_dir = os.path.dirname(checkpoint_path)
csv_path = "files/data.csv"

model = build_unet((IMG_HEIGHT, IMG_WIDTH, 3))
metrics = [dice_coef, iou, Recall(), Precision()]
model.compile(loss="binary_crossentropy", optimizer=Adam(lr), metrics=metrics)
train_steps = (len(X_train)//batch_size)
# valid_steps = (len(valid_x)//batch_size)
callbacks = [
        ModelCheckpoint(checkpoint_path, verbose=1, save_best_only=True, save_freq=5*batch_size),
        ReduceLROnPlateau(monitor='val_loss', factor=0.1, patience=5, min_lr=1e-7, verbose=1),
        CSVLogger(csv_path),
        EarlyStopping(monitor='val_loss', patience=20, restore_best_weights=False)
    ]
model.fit(
        X_train, Y_train,
        epochs=num_epochs,
        # validation_data=valid_dataset,
        steps_per_epoch=train_steps,
        # validation_steps=valid_steps,
        callbacks=callbacks
    )
model.save('model_data_science_bowl_2018.h5')

In [None]:
def getting_X_test():
    test_ids = next(os.walk(TEST_PATH))[1]
    X_test = np.zeros((len(test_ids), IMG_HEIGHT, IMG_WIDTH, IMG_CHANNEL), dtype=np.uint8)
    sizes_test = []
    print('Getting and resizing test images ... ')
    sys.stdout.flush()
    for n, id_ in tqdm(enumerate(test_ids), total=len(test_ids)):
        path = TEST_PATH + id_
        img = imread(path + '/images/' + id_ + '.png')[:, :, :IMG_CHANNEL]
        sizes_test.append([img.shape[0], img.shape[1]])
        img = resize(img, (IMG_HEIGHT, IMG_WIDTH), mode='constant', preserve_range=True)
        X_test[n] = img
    return X_test, sizes_test
np.random.seed(42)
tf.random.set_seed(42)

In [None]:
X_test, sizes_test = getting_X_test()

In [None]:
model_2 = tf.keras.models.load_model('model_data_science_bowl_2018.h5',
                                     custom_objects = {'dice_coef':dice_coef, 'iou':iou})

In [None]:
preds_test = model_2.predict(X_test, verbose = 1)

preds_test_t = (preds_test > 0.5).astype(np.uint8)
preds_test_upsampled = []
for i in range(len(preds_test)):
    preds_test_upsampled.append(resize(np.squeeze(preds_test[i]),
                                       (sizes_test[i][0], sizes_test[i][1]), mode='constant', preserve_range=True))

ix = np.random.randint(0, len(preds_test_t))
imshow(X_test[ix])
plt.show()
imshow(np.squeeze(preds_test_t[ix]))
plt.show()