In [2]:
COLAB = False
# Set this depending on whether running in colab or not

In [3]:
if COLAB:
    from google.colab import drive

    drive.mount("/content/drive", force_remount=True)
    # Add directory above current directory to path
    DATAPATH = Path("/content/drive/MyDrive/DataSets/data")

else:
    import sys; 
    sys.path.insert(0, '..')
    DATAPATH = "data"

In [4]:
import cv2
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import keras
from keras.models import Model
from keras.layers import (
    Input,
    Conv2D,
    MaxPooling2D,
    Conv2DTranspose,
    GaussianNoise,
    Lambda,
    Dropout,
    UpSampling2D,
)
from keras import backend as K
from keras.optimizer_v2 import adam
from keras.metrics import RootMeanSquaredError
from sklearn.model_selection import train_test_split
from pathlib import Path
from PIL import Image
from skimage import io
import math
from scipy.sparse import coo_matrix
import pickle
import tensorflow as tf
from tensorflow.keras.utils import Sequence

2021-12-28 19:58:48.397178: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /home/jamesholcombe/git/personal/cell-instance-segmentation/venv/lib/python3.8/site-packages/cv2/../../lib64:
2021-12-28 19:58:48.397213: I tensorflow/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.


In [5]:
def rle_decode(
    mask_rle, shape=(520, 704), color=1
):  # function to convert tabular mask data to image
    """
    mask_rle: run-length as string formated (start length)
    shape: (height, width, channels) of array to return
    color: color for the mask
    Returns numpy array (mask)

    """
    s = mask_rle.split()

    starts = list(map(lambda x: int(x) - 1, s[0::2]))
    lengths = list(map(int, s[1::2]))
    ends = [x + y for x, y in zip(starts, lengths)]

    img = np.zeros((shape[0] * shape[1]), dtype=np.float32)

    for start, end in zip(starts, ends):
        img[start:end] = color

    return img.reshape(shape)

In [100]:
class DataGenerator(Sequence):
    """Generates data for Keras
    Sequence based data generator. Suitable for building data generator for training and prediction.
    """

    def __init__(
        self,
        image_directory,
        data_path,
        to_fit=True,
        batch_size=32,
        chip_size=180,
        dim=(520, 704),
    ):
        """Initialization"""
        self.data = pd.read_csv(data_path)
        # self.data = self.data[
        #     (self.data["height"] < chip_size) | (self.data["width"] < chip_size)
        # ]
        # self.data.reset_index(inplace=True)

        self.indexes = list(self.data.index)
        self.image_ids = self.data["id"].unique()
        self.image_indexes = dict(zip(range(len(self.image_ids)), self.image_ids))
        self.to_fit = to_fit
        self.batch_size = batch_size
        self.chip_size = chip_size

        # self.on_epoch_end()

    def __len__(self):
        """Denotes the number of batches per epoch
        :return: number of batches per epoch
        """
        return int(np.floor(len(self.indexes) / self.batch_size))

    def __getitem__(self, index):
        """Generate one batch of data
        :param index: index of the batch
        :return: X and y when fitting. X only when predicting
        """
        # Generate indexes of the batch
        indexes = self.indexes[index * self.batch_size : (index + 1) * self.batch_size]

        # Generate data
        if self.to_fit:
            X ,y = self._generate_X_and_Y(indexes)
            return X, y

    def _generate_X_and_Y(self, indexes):
        """Generates data containing batch_size images
        :param list_IDs_temp: list of label ids to load
        :return: batch of images
        """

        # Initialization
        
        
        train = self.data.loc[indexes]
        train["mask"] = train["annotation"].apply(rle_decode)
        
        nonzero_masks = train["mask"].apply(lambda x: np.where(x == 1))
        x_nonzeros, y_nonzeros = zip(*nonzero_masks)
        train["x_min"] = [x.min() for x in x_nonzeros]
        train["x_max"] = [x.max() for x in x_nonzeros]
        train["y_min"] = [y.min() for y in y_nonzeros]
        train["y_max"] = [y.max() for y in y_nonzeros]
        train["width"] = train["x_max"] - train["x_min"]
        train["height"] = train["y_max"] - train["y_min"]

        train["y_cent"] = train["mask"].apply(lambda x: np.median(np.where(x == 1)[1]))
        train["x_cent"] = train["mask"].apply(lambda x: np.median(np.where(x == 1)[0]))
                
        
      

        train["frac_x"], _ = zip(*train["x_cent"].apply(math.modf))
        train["frac_y"], _ = zip(*train["y_cent"].apply(math.modf))

        train["x_round_down"] = (train["frac_x"] < 0.5).astype(int)
        train["y_round_down"] = (train["frac_y"] < 0.5).astype(int)

        train["x1"] = (np.rint(train["x_cent"] - 90).values).astype(int)
        train["x2"] = (np.rint(train["x_cent"] + 90).values).astype(int)
        train["y1"] = (np.rint(train["y_cent"] - 90).values).astype(int)
        train["y2"] = (np.rint(train["y_cent"] + 90).values).astype(int)

        train["mask"] = train.apply(func= lambda x: x["mask"][x["x1"]:x["x2"], x["y1"]:x["y2"]],axis = 1)
        train["shape"] = train["mask"].map(np.shape)
        train["xshape"], train["yshape"] = zip(*train["shape"])
        keep = (train["xshape"].astype(int) == 180) & (train["yshape"].astype(int) == 180) 
        if not keep.all():
     
            new_train = train[keep]
            notkeep = ~keep
            to_copy = new_train.sample(notkeep.sum(),replace = True)
            
            train = pd.concat([new_train,to_copy])
        X = np.empty((self.batch_size, self.chip_size, self.chip_size, 3))
        Y = np.empty((self.batch_size, self.chip_size, self.chip_size, 1))
        for index, (i, row) in enumerate(train.iterrows()): 
            
            mask = row["mask"]
            x1 = row["x1"]
            x2 = row["x2"] 
            y1 = row["y1"]
            y2 = row["y2"] 
            try:
                mask = np.reshape(mask,(self.chip_size, self.chip_size,1))
            except ValueError:
                print(mask.shape)
                print(f"{x1=}")
                print(f"{x2=}")
                print(f"{y1=}")
                print(f"{y2=}")
                raise 
            
            Y[index] = mask
            
            ID = row["id"]
            img = np.repeat(
        np.asarray(io.imread(DATAPATH / Path(f"train/{ID}.png")))[
            :, :, np.newaxis
        ],
        repeats=3,
        axis=2,
    )
            img = img[x1:x2, y1:y2]
            
            
            img = np.reshape(img,(self.chip_size, self.chip_size,3))
            
            X[index] = img
            


        return X, Y

   

Building the model

In [108]:
'''parameters of Vec2Instace parametrization'''

img_dim = 180 # input dimension 
out_var = 1 # output bands (single band for mask)
hidden_size = 180 # hidden size of vanilla neural network of Vec2Instace
num_param = (2*hidden_size + hidden_size) + (hidden_size*out_var + out_var) # total number of parameters in vanilla neural network of Vec2Instace 

'''Encoder Section'''

x_in = Input(shape=(None, None, 3)) # input shape: (height, width, 3 bands of RGB)
x_temp = Conv2D(32, (3, 3), activation='relu', padding='same')(x_in)
x_temp = Dropout(0.25)(x_temp)
x_temp = Conv2D(32, (3, 3), activation='relu', padding='same')(x_temp)
x_temp = MaxPooling2D((2,2))(x_temp)
x_temp = Conv2D(32, (3, 3), dilation_rate=(2, 2), activation='relu', padding='same')(x_temp) # dilated convolutions
x_temp = Dropout(0.25)(x_temp)
x_temp = Conv2D(32, (3, 3), dilation_rate=(2, 2), activation='relu', padding='same')(x_temp) # dilated convolutions
x_temp = MaxPooling2D((2,2))(x_temp)
x_temp = Conv2D(64, (3, 3), dilation_rate=(2, 2), activation='relu', padding='same')(x_temp) # dilated convolutions
x_temp = Dropout(0.25)(x_temp)
x_temp = Conv2D(64, (3, 3), dilation_rate=(2, 2), activation='relu', padding='same')(x_temp) # dilated convolutions
x_temp = MaxPooling2D((2,2))(x_temp)
x_temp = Conv2D(64, (3, 3), dilation_rate=(2, 2), activation='relu', padding='same')(x_temp) # dilated convolutions
x_temp = Dropout(0.25)(x_temp)
x_temp = Conv2D(64, (3, 3), dilation_rate=(2, 2), activation='relu', padding='same')(x_temp) # dilated convolutions

x_temp = Conv2D(64, (1, 1), activation='relu', padding='same')(x_temp)
x_temp = Conv2D(64, (1, 1), activation='relu', padding='same')(x_temp)
x_cnn = Conv2D(num_param, (1, 1), activation='linear', padding='same')(x_temp)  # output bands are corresponding to number of parameters in vanilla neural network (MLP) of Vec2Instace.

'''Extracting the vector at the middle from the output of the encoder. This vector parametrize the shape of an instance'''

def get_middle_param(arg_in):
    mid_idx = tf.shape(arg_in)[1]/2
    dec_para = arg_in[:, K.cast(mid_idx,"int32"), K.cast(mid_idx,"int32"), :]    
    return dec_para

'''rearranging the vector at the middle as vanilla neural network (MLP) of Vec2Instace'''

def decorder2D(arg_in):
    
    dec_size = tf.shape(arg_in)[0]
    
    # creating input of vanilla neural network (x and y coordinates)
    xx, yy = tf.meshgrid(tf.range(-img_dim/2, img_dim/2), tf.range(-img_dim/2, img_dim/2))
    xx = K.transpose(K.flatten(tf.cast(xx,tf.float32)))
    yy = K.transpose(K.flatten(tf.cast(yy,tf.float32)))
    xx_yy_stack = K.stack((xx, yy), axis=1)
    lyr_in = K.tile( K.reshape(xx_yy_stack, (1,img_dim*img_dim,2)), (dec_size,1,1))
    
    # vanilla neural network (MLP)
    lyr_hidden_wgt = K.stack( (arg_in[:, 0:hidden_size],arg_in[:, hidden_size*1:hidden_size*2]), axis=1)
    lyr_hidden_b = tf.matmul(tf.ones((dec_size,img_dim*img_dim,1)), K.reshape(arg_in[:,hidden_size*2:hidden_size*3], (dec_size,1,hidden_size)))
    lyr_hidden_out = K.tanh( tf.matmul(lyr_in, lyr_hidden_wgt) +  lyr_hidden_b) 
    
    lyr_out_wgt = K.reshape( arg_in[:, hidden_size*3:hidden_size*(3+out_var)], (dec_size,hidden_size,out_var))
    lyr_out_b = tf.matmul(tf.ones((dec_size,img_dim*img_dim,1)), K.reshape(arg_in[:, hidden_size*(3+out_var):hidden_size*(3+out_var)+out_var], (dec_size,1,out_var)))
    lyr_out = K.sigmoid( tf.matmul(lyr_hidden_out, lyr_out_wgt) + lyr_out_b ) 

    lyr_out_img = K.reshape( lyr_out, (dec_size,img_dim,img_dim,out_var))
    
    return lyr_out_img

x_temp = Lambda(get_middle_param)(x_cnn)
x_out = Lambda(decorder2D)(x_temp)

model = Model(inputs=x_in, outputs=x_out)


model.compile(
    optimizer='adam', 
    loss = 'mse',
    metrics=['mean_squared_error']
)
# model.compile(loss=RootMeanSquaredError, optimizer=adam.Adam()) # setting loss and optimizer

model.summary() # printing the model summary

Model: "model_13"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_14 (InputLayer)       [(None, None, None, 3)]   0         
                                                                 
 conv2d_143 (Conv2D)         (None, None, None, 32)    896       
                                                                 
 dropout_52 (Dropout)        (None, None, None, 32)    0         
                                                                 
 conv2d_144 (Conv2D)         (None, None, None, 32)    9248      
                                                                 
 max_pooling2d_39 (MaxPoolin  (None, None, None, 32)   0         
 g2D)                                                            
                                                                 
 conv2d_145 (Conv2D)         (None, None, None, 32)    9248      
                                                          

In [110]:
data_gen = DataGenerator(
    DATAPATH / Path("train"),
    DATAPATH / Path("train/.csv"),
)
# model.fit(data_gen, epochs=50)
model.fit(data_gen,epochs=4)

0
Epoch 1/4
4768
5344
   1/2299 [..............................] - ETA: 1:41:20 - loss: 0.4754 - mean_squared_error: 0.475466720
   2/2299 [..............................] - ETA: 1:22:07 - loss: 0.4608 - mean_squared_error: 0.460873280
   3/2299 [..............................] - ETA: 1:23:18 - loss: 0.4526 - mean_squared_error: 0.452645344
   4/2299 [..............................] - ETA: 1:26:56 - loss: 0.4378 - mean_squared_error: 0.437831456
   5/2299 [..............................] - ETA: 1:26:39 - loss: 0.4264 - mean_squared_error: 0.426413280
   6/2299 [..............................] - ETA: 1:29:47 - loss: 0.4164 - mean_squared_error: 0.416440928
   7/2299 [..............................] - ETA: 1:29:22 - loss: 0.4031 - mean_squared_error: 0.403133760
   8/2299 [..............................] - ETA: 1:31:35 - loss: 0.3895 - mean_squared_error: 0.389510016
   9/2299 [..............................] - ETA: 1:31:12 - loss: 0.3797 - mean_squared_error: 0.379721120
  10/2299 [....

KeyboardInterrupt: 