# Configure Python environment

## load required modules

In [2]:
import os
import re
import sys
import numpy as np 
import pandas as pd 
import cv2
import random
import skimage
from glob import glob
from PIL import Image
from PIL.ExifTags import TAGS
from google.colab import drive
import matplotlib.pyplot as plt

%matplotlib inline

In [3]:
!pip install awscli

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting awscli
  Downloading awscli-1.27.119-py3-none-any.whl (4.1 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m4.1/4.1 MB[0m [31m30.8 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting PyYAML<5.5,>=3.10
  Downloading PyYAML-5.4.1-cp39-cp39-manylinux1_x86_64.whl (630 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m630.1/630.1 kB[0m [31m37.0 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting rsa<4.8,>=3.1.2
  Downloading rsa-4.7.2-py3-none-any.whl (34 kB)
Collecting s3transfer<0.7.0,>=0.6.0
  Downloading s3transfer-0.6.0-py3-none-any.whl (79 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m79.6/79.6 kB[0m [31m8.1 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting botocore==1.29.119
  Downloading botocore-1.29.119-py3-none-any.whl (10.7 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m10.7/10.7 MB[0m [31m63.5 MB/s

## mount google drive

In [4]:
GD_PATH = '/content/drive'

if os.path.exists(GD_PATH) :
  print ('Google drive has mounted.')
else : 
  drive.mount(GD_PATH)

# if cellimages data is unavailable
CELLS_PATH = '/content/drive/MyDrive/cellimages'
AWSCONFIG_PATH = '/content/drive/MyDrive/config/awscli.ini'
if not os.path.exists(CELLS_PATH) : 

  import subprocess

  os.mkdir(CELLS_PATH)
  os.environ['AWS_SHARED_CREDENTIALS_FILE'] = AWSCONFIG_PATH
  cmd_sync = f'aws s3 sync s3://jingqicomputervisiontest/cellimages {CELLS_PATH}'
  sync_run = subprocess.run(cmd_sync, shell=True, stderr=subprocess.DEVNULL, stdout=subprocess.DEVNULL)
  if sync_run.returncode != 0 : 
    print ('Data sync failed.')
else : 
  print ('Cellimages is already available.')

Mounted at /content/drive
Cellimages is already available.


# Some functions

In [5]:
def image_loader(path, suffix) : 
    ''' image_loader find the files of the given suffix under the given path, output a list containing the whole path for each file.'''
    import os
    
    fn_list = [] 
    for path, _, names in os.walk(path) : 
        for fn in names : 
            if fn.endswith(suffix) : 
                fn_list.append(os.path.join(path, fn))
    
    return fn_list

def get_imageEXIF(fn) : 
    '''get_imageEXIF retrieve the image metadata'''
    from PIL import Image 
    from PIL.ExifTags import TAGS
    
    meta_data = {}
    image = Image.open(fn)
    for key, value in image.getexif().items() : 
        if key in TAGS : 
            meta_data[TAGS[key]] = value
    
    return meta_data

def resize_with_padding(image, new_dimension) : 
    '''resize_with_padding resize the image while maintains aspect ratio.
    
    Params : 
        image: Image (numpy.array) to be resized;
        new_dimension: a tuple (width, height) of new image.
        
    Returns : 
        image: Resized image with padding
    '''
    import cv2
  
    ori_dimension = (image.shape[1], image.shape[0])
    ratio = float(max(new_dimension)/max(ori_dimension))
    # Resize with maintained aspect ratio
    new_size = tuple([int(x*ratio) for x in ori_dimension])
    image = cv2.resize(image, new_size)
    # Padding
    delta_w = new_dimension[0] - new_size[0]
    delta_h = new_dimension[1] - new_size[1]
    top, bottom = delta_h//2, delta_h-(delta_h//2)
    left, right = delta_w//2, delta_w-(delta_w//2)
    image = cv2.copyMakeBorder(image, top, bottom, left, right, cv2.BORDER_CONSTANT, value=(0,0,0))
    
    return image

def find_ConvexHull(contours) : 
    '''contours_to_ConvexHull convert dashed contours to the ones with solid line
    
    Params :
        contours: contours (array of numpy.array) to be processed.
        
    Returns : 
        contours: output contours.
    '''
    import cv2
    
    pts = []
    for i in range(0, len(contours)) : 
        for j in range(0, len(contours[i])) : 
            pts.append(contours[i][j])
    pts = np.array(pts)
    return cv2.convexHull(pts)
    
def generate_mask(image, thresholding) : 
    '''generate_mask generates a new image with all contours from the input image filled with white color
    
    Params : 
        image: Image (numpy.array) with contours.
        
    Returns
        [images]: A list of image (numpy.array), each image contains a single cell mask.'''
    import cv2
    
    # Apply threshold to input image
    ret, thresh = cv2.threshold(image, 127, 255, thresholding)
    # Extract the contours from the binary image
    contours, hierarchy = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_NONE)

    masks = []

    # Loop through hierarchy and
    # looking for the contours of 
    # hierarchy[2] == hierarchy[3] == -1 
    # or
    # (hierarchy[2] == -1 and hierarchy[3] > 0)
    cnts_indices = [] 
    for idx, h in enumerate(hierarchy[0]) :
        if h[2] == h[3] == -1 or (h[2] == -1 and h[3] > 0) : 
            cnts_indices.append(idx)
    
    for idx in cnts_indices :
        # Skip the contour of area < 50, which is unlikely a cell
        if cv2.contourArea(contours[idx], oriented=True) < 50 :
            continue
        # Generate a black image
        mask = np.zeros(image.shape, dtype=np.uint8)
        # Draw contour and fill it with white
        cv2.drawContours(mask, contours, idx, 255, -1)
        masks.append(mask)
    return masks

def iou_coef(y_true, y_pred, smooth=1) : 
    '''iou_coef calculate Intersection-Over-Union (IoU, Jaccard Index) between prediction and ground truth.'''
    from keras import backend as K
    
    intersection = K.sum(K.abs(y_true * y_pred), axis=[1,2,3])
    union = K.sum(y_true, [1,2,3]) + K.sum(y_pred, [1,2,3]) - intersection
    iou = K.mean((intersection + smooth) / (union + smooth), axis=0)
    
    return iou

def image_plot_1by2(image, label) : 
    '''image_plot_1by2 generates original image and its label from their numpy array in 1X2.'''
    fig, (ax1, ax2) = plt.subplots(1,2)
    ax1.set_title('Image')
    ax2.set_title('Label')
    ax1.imshow(image)
    ax2.imshow(label)

def image_label_display(fn1, fn2) : 
    '''image_label_display shows the image and its labels side-by-side.
    Param
        fn1: the path to the image file;
        fn2: the path to the label files.
    Returns
        None. But show the images.'''

    import cv2 
    import matplotlib.pyplot as plt
    from glob import glob

    image = cv2.imread(fn1, cv2.IMREAD_COLOR)
    label = np.zeros(image.shape)
    for fn in fn2 : 
        label_ = cv2.imread(fn, cv2.IMREAD_GRAYSCALE)
        label_ = np.expand_dims(label_, axis=-1)
        label = np.maximum(label, label_)

    # Plot the figures
    fig, (ax1, ax2) = plt.subplots(1,2)
    ax1.set_title('Image')
    ax2.set_title('Label')
    ax1.imshow(image)
    ax2.imshow(label)
    
def RLE_encoding(image, binary=True) : 
    '''RLE_encoding generates the run length encoding of input binary image.
    Params 
        image: 
        binary: True in default; 
    
    Returns
        rle: 1-D numpy array of RLE codings'''
    # RLE encoding 
    rle = [] # Store RLE coding
    start = 0 
    length = 0 
    prev = None # Use to check previous pixel
    image = image.flatten() # Flatten the image numpy array
        
    for indice, pixel in enumerate(image) : 
        
        if prev == None : # The first pixel
            prev = pixel 
        
        if pixel == 0 : # Background 
            prev = pixel 
            
            # Record start and length
            if (start > 0) and (start not in rle) :
                rle.extend([start, length])
        
        if pixel == 1 : # Signal
            
            if pixel != prev : # Encounter the first signal pixel
                
                start = indice # Record start indice
                length = 1
                prev = pixel 
                
            else : # The following signal pixel
                
                length += 1
                prev = pixel
                
    return np.array(rle)
  
def rle_decode(mask_rle, shape, color=1):
    '''
    mask_rle: run-length as string formated (start length)
    shape: (height,width) of array to return 
    Returns numpy array, 1 - mask, 0 - background
    '''
    s = mask_rle.split()
    starts, lengths = [np.asarray(x, dtype=int) for x in (s[0:][::2], s[1:][::2])]
    #starts -= 1
    ends = starts + lengths
    img = np.zeros(shape[0] * shape[1], dtype=np.float32)
    for lo, hi in zip(starts, ends):
        img[lo : hi] = color
    return img.reshape(shape)

def image_label_gen(df) : 
    '''A generator return image and label (ground truth) from a csvfile, which records one label for one image per row.'''
       
    for name in df['name'].unique() :
        path = df[df.name == name].iloc[0,5]
        width = df[df.name == name].iloc[0,2]
        height = df[df.name == name].iloc[0,3]
        annotations = df[df.name == name].iloc[:,1]
        bit = re.search(r'\d+', df[df.name == name].iloc[0,6]).group()   
        # image
        image = cv2.imread(path, cv2.IMREAD_UNCHANGED).astype(np.float32)
        if bit == '8' : 
          image = image / 255
        elif bit == '16' : 
          image = image / (257 * 255)
        # ground truth
        label = np.zeros((height, width), dtype=np.float32)   
        for rle in annotations : 
            mask = rle_decode(rle[1:-1], (height, width), color=1)
            label = np.maximum(label, mask)
            
        yield image, label

# Image Preprocessing
The image preprocessing step extracts the following informations out of each image and record them in a pandas dataframe, one label of a image per row. 
1. name: image name
2. label: run length encoded label, [start, length] * Num_of_labels
3. width: image width, i.e. image.shape[1]
4. height: image height, i.e. image.shape[0]
5. cell_type: biological information of the cell imaged
6. image_path: file path
7. bit_depth: image bit depth, 8 bit or 16 bit
8. image_format: image format, for example, 'tiff', 'tif', 'png'...
9. channels: the number of channels

In [None]:
grand_list = [] 
column_names = ['name', 'label', 'width', 'height', 'cell_type', 'image_path', 'bit_depth', 'image_format', 'channels']

## 1. BBBC007
The dataset contains drosophila melanogaster Kc167 cells stained with DNA and actin. Images were acquired using a motorized Zeiss Axioplan2 and a Axiocam MRm camera. Each image, in 8-bit gray-scale tiff format, is roughly 512 X 512 pixels, with cells roughly 25 pixels in diameter and 80 cells per image averagely.

In [None]:
# Parameters for BBBC007 dataset
thresholding = cv2.THRESH_BINARY_INV
bbbc007 = []

# Name pattern for actin-stained images
patterns = ['A9 p\d+f.tif$', '\w+d1.tif$', '\w+F_2UL.tif$']

# Load images
image_address = '/media/jingqi/JJStore2/datasets/BBBC007_v1_images'
image_suffix = 'tif'
label_address = '/media/jingqi/JJStore2/datasets/BBBC007_v1_outlines'
label_suffix = 'tif'
image_fns = image_loader(image_address, image_suffix)
label_fns = image_loader(label_address, label_suffix)

# Loop through image filenames
for fn in image_fns : 
    
    # Select actin-stained image
    for p in patterns : 
        if re.search(p, fn) : 
            
            # Collect info from image
            info = dict.fromkeys(column_names, '')
            info['image_path'] = fn
            info['name'] = os.path.basename(fn).split('.')[0]
            info['image_format'] = os.path.basename(fn).split('.')[1]
            info['cell_type'] = 'drosophila melanogaster Kc167 cells'
            info['width'] = get_imageEXIF(fn)['ImageWidth'] 
            info['height'] = get_imageEXIF(fn)['ImageLength']
            info['channels'] = get_imageEXIF(fn)['SamplesPerPixel']
            info['bit_depth'] = get_imageEXIF(fn)['BitsPerSample']
            
            # Collect RLE from label
            lfn = [fn for fn in label_fns if re.search(info['name'], fn)][0]
            # Generate label masks. It should be safe to read in the image in gray-scale.
            label_image = cv2.imread(lfn, cv2.IMREAD_GRAYSCALE)
            label_masks = generate_mask(label_image, thresholding)
            for mask in label_masks : 
                _info = info.copy()
                mask = mask / np.amax(mask)
                rle = RLE_encoding(mask)
                _info['label'] = rle
                bbbc007.append(_info)

In [None]:
# Write bbbc007 to a pandas dataframe
bbbc007_df = pd.DataFrame(bbbc007, columns=column_names)
bbbc007_df.head()

Unnamed: 0,name,label,width,height,cell_type,image_path,bit_depth,image_format,channels
0,A9 p10f,"[185014, 2, 185456, 11, 185905, 13, 186354, 16...",450,450,drosophila melanogaster Kc167 cells,/media/jingqi/JJStore2/datasets/BBBC007_v1_ima...,"(8, 8, 8)",tif,3
1,A9 p10f,"[183689, 4, 184136, 8, 184584, 11, 185033, 12,...",450,450,drosophila melanogaster Kc167 cells,/media/jingqi/JJStore2/datasets/BBBC007_v1_ima...,"(8, 8, 8)",tif,3
2,A9 p10f,"[174174, 4, 174623, 8, 175072, 11, 175523, 11,...",450,450,drosophila melanogaster Kc167 cells,/media/jingqi/JJStore2/datasets/BBBC007_v1_ima...,"(8, 8, 8)",tif,3
3,A9 p10f,"[167446, 8, 167894, 12, 168343, 16, 168792, 18...",450,450,drosophila melanogaster Kc167 cells,/media/jingqi/JJStore2/datasets/BBBC007_v1_ima...,"(8, 8, 8)",tif,3
4,A9 p10f,"[163887, 1, 164333, 12, 164781, 15, 165229, 18...",450,450,drosophila melanogaster Kc167 cells,/media/jingqi/JJStore2/datasets/BBBC007_v1_ima...,"(8, 8, 8)",tif,3


In [None]:
bbbc007_df.to_csv('/home/jingqi/ucsd_bootcamp/Cell_Instance_segmentation/bbbc007_cells.csv')

## 2. BBBC009 
This image set consists of five differential interference contrast (DIC) images of red bood cells.

In [None]:
# Parameters for BBBC009 dataset
thresholding = cv2.THRESH_BINARY_INV
bbbc009 = []

# Load images
image_address = '/media/jingqi/JJStore2/datasets/BBBC009_v1_images/human_rbc_dic_images'
image_suffix = 'tif'
label_address = '/media/jingqi/JJStore2/datasets/BBBC009_v1_outlines/human_rbc_dic_outlines'
label_suffix = 'tif'
image_fns = image_loader(image_address, image_suffix)
label_fns = image_loader(label_address, label_suffix)

# Loop through image filenames
for fn in image_fns : 
           
    # Collect info from image
    info = dict.fromkeys(column_names, '')
    info['image_path'] = fn
    info['name'] = os.path.basename(fn).split('.')[0]
    info['image_format'] = os.path.basename(fn).split('.')[1]
    info['cell_type'] = 'human red blood cells'
    info['width'] = get_imageEXIF(fn)['ImageWidth'] 
    info['height'] = get_imageEXIF(fn)['ImageLength']
    info['channels'] = get_imageEXIF(fn)['SamplesPerPixel']
    info['bit_depth'] = get_imageEXIF(fn)['BitsPerSample']
    
    # Collect RLE from label
    lfn = [fn for fn in label_fns if re.search(info['name'], fn)][0]
    # Generate label masks. It should be safe to read in the image in gray-scale.
    label_image = cv2.imread(lfn, cv2.IMREAD_GRAYSCALE)
    label_masks = generate_mask(label_image, thresholding)
    for mask in label_masks : 
        _info = info.copy()
        mask = mask / np.amax(mask)
        rle = RLE_encoding(mask)
        _info['label'] = rle
        bbbc009.append(_info)

In [None]:
# Write bbbc009 to a pandas dataframe
bbbc009_df = pd.DataFrame(bbbc009, columns=column_names)
bbbc009_df.head()

Unnamed: 0,name,label,width,height,cell_type,image_path,bit_depth,image_format,channels
0,48hr-001-DIC,"[401774, 6, 402572, 10, 403370, 14, 404169, 17...",800,600,human red blood cells,/media/jingqi/JJStore2/datasets/BBBC009_v1_ima...,8,tif,1
1,48hr-001-DIC,"[397853, 11, 398651, 15, 399449, 19, 400248, 2...",800,600,human red blood cells,/media/jingqi/JJStore2/datasets/BBBC009_v1_ima...,8,tif,1
2,48hr-001-DIC,"[383040, 8, 383836, 16, 384634, 20, 385433, 22...",800,600,human red blood cells,/media/jingqi/JJStore2/datasets/BBBC009_v1_ima...,8,tif,1
3,48hr-001-DIC,"[371780, 10, 372576, 18, 373374, 22, 374173, 2...",800,600,human red blood cells,/media/jingqi/JJStore2/datasets/BBBC009_v1_ima...,8,tif,1
4,48hr-001-DIC,"[356500, 10, 357296, 18, 358092, 25, 358889, 3...",800,600,human red blood cells,/media/jingqi/JJStore2/datasets/BBBC009_v1_ima...,8,tif,1


In [None]:
bbbc009_df.to_csv('/home/jingqi/ucsd_bootcamp/Cell_Instance_segmentation/bbbc009_cells.csv')

## 3. BBBC018
The dataset contains images of human colon-cancer cells. The samples were stained with Hoechst33342 for nucleus, phospho-histone H3 indicating mitosis, and phalloidin for actin. Each image is 512 X 512 pixels.

In [None]:
# Parameters for BBBC018 dataset
thresholding = cv2.THRESH_BINARY
bbbc018 = []

# Name pattern for actin-stained images
patterns = ['\w+-actin_0.tif$']

# Load images
image_address = '/media/jingqi/JJStore2/datasets/BBBC018_v1_images'
image_suffix = 'tif'
label_address = '/media/jingqi/JJStore2/datasets/BBBC018_v1_outlines'
label_suffix = 'png'
image_fns = image_loader(image_address, image_suffix)

# Loop through image filenames
for fn in image_fns : 
    
    # Select actin-stained image
    for p in patterns : 
        if re.search(p, fn) :
            
            # Collect info from image
            info = dict.fromkeys(column_names, '')
            info['image_path'] = fn
            info['name'] = os.path.basename(fn).split('.')[0]
            info['image_format'] = os.path.basename(fn).split('.')[1]
            info['cell_type'] = 'human colon-cancer cell line HT29'
            info['width'] = get_imageEXIF(fn)['ImageWidth'] 
            info['height'] = get_imageEXIF(fn)['ImageLength']
            info['channels'] = get_imageEXIF(fn)['SamplesPerPixel']
            info['bit_depth'] = get_imageEXIF(fn)['BitsPerSample']
            
            # Collect RLE from label
            label_name = info['name'].replace('actin_0', 'cells')
            lfn = os.path.join(label_address, label_name + '.' + label_suffix)
            if not os.path.isfile(lfn) : 
                continue 
            # Generate label masks. It should be safe to read in the image in gray-scale.
            label_image = cv2.imread(lfn, cv2.IMREAD_GRAYSCALE)
            label_masks = generate_mask(label_image, thresholding)
            for mask in label_masks : 
                _info = info.copy()
                mask = mask / np.amax(mask)
                rle = RLE_encoding(mask)
                _info['label'] = rle
                bbbc018.append(_info)

In [None]:
# Write bbbc018 to a pandas dataframe
bbbc018_df = pd.DataFrame(bbbc018, columns=column_names)
bbbc018_df.head()

Unnamed: 0,name,label,width,height,cell_type,image_path,bit_depth,image_format,channels
0,00733-actin_0,"[65253, 1, 65762, 7, 66273, 10, 66769, 4, 6677...",512,512,human colon-cancer cell line HT29,/media/jingqi/JJStore2/datasets/BBBC018_v1_ima...,16,tif,1
1,00733-actin_0,"[49374, 3, 49883, 9, 50392, 13, 50901, 18, 514...",512,512,human colon-cancer cell line HT29,/media/jingqi/JJStore2/datasets/BBBC018_v1_ima...,16,tif,1
2,00733-actin_0,"[248424, 1, 248935, 3, 249446, 9, 249958, 10, ...",512,512,human colon-cancer cell line HT29,/media/jingqi/JJStore2/datasets/BBBC018_v1_ima...,16,tif,1
3,00733-actin_0,"[246952, 4, 247462, 11, 247972, 16, 248483, 20...",512,512,human colon-cancer cell line HT29,/media/jingqi/JJStore2/datasets/BBBC018_v1_ima...,16,tif,1
4,00733-actin_0,"[243334, 9, 243347, 2, 243838, 24, 244349, 26,...",512,512,human colon-cancer cell line HT29,/media/jingqi/JJStore2/datasets/BBBC018_v1_ima...,16,tif,1


In [None]:
bbbc018_df.to_csv('/home/jingqi/ucsd_bootcamp/Cell_Instance_segmentation/bbbc018_cells.csv')

## 4. Human CHO cells

In [None]:
# Parameters for CHO dataset
thresholding = cv2.THRESH_BINARY
cho = []

# Load images
image_address = '/media/jingqi/JJStore2/datasets/images/images'
image_suffix = 'png'
label_address = '/media/jingqi/JJStore2/datasets/ground_truth/ground_truth'
label_suffix = 'png'
image_fns = image_loader(image_address, image_suffix)

# Loop through image filenames
for fn in image_fns : 

    # Collect info from image
    info = dict.fromkeys(column_names, '')
    info['image_path'] = fn
    info['name'] = os.path.basename(fn).split('.')[0]
    info['image_format'] = os.path.basename(fn).split('.')[1]
    info['cell_type'] = 'human CHO cells'
    
    image = cv2.imread(fn, cv2.IMREAD_ANYDEPTH)
    info['width'] = image.shape[1]
    info['height'] = image.shape[0]
    info['channels'] = 1
    if image.dtype == 'uint8' : 
        bit_depth = 8
    elif image.dtype == 'uint16' : 
        bit_depth = 16
    else : 
        bit_depth = 'unknown'
    info['bit_depth'] = bit_depth
            
    # Collect RLE from label
    lfn = os.path.join(label_address, info['name'] + '.' + label_suffix)
    if not os.path.isfile(lfn) : 
        continue 
    # Generate label masks. It should be safe to read in the image in gray-scale.
    label_image = cv2.imread(lfn, cv2.IMREAD_GRAYSCALE)
    label_masks = generate_mask(label_image, thresholding)
    for mask in label_masks : 
        _info = info.copy()
        mask = mask / np.amax(mask)
        rle = RLE_encoding(mask)
        _info['label'] = rle
        cho.append(_info)

In [None]:
# Write bbbc018 to a pandas dataframe
cho_df = pd.DataFrame(cho, columns=column_names)
cho_df.head()

Unnamed: 0,name,label,width,height,cell_type,image_path,bit_depth,image_format,channels
0,cho26,"[1271848, 6, 1273217, 18, 1274592, 21, 1275948...",1376,1032,human CHO cells,/media/jingqi/JJStore2/datasets/images/images/...,8,png,1
1,cho26,"[1154943, 9, 1156316, 15, 1157687, 24, 1159062...",1376,1032,human CHO cells,/media/jingqi/JJStore2/datasets/images/images/...,8,png,1
2,cho26,"[977304, 2, 978677, 6, 980050, 11, 981423, 15,...",1376,1032,human CHO cells,/media/jingqi/JJStore2/datasets/images/images/...,8,png,1
3,cho26,"[716399, 10, 717774, 13, 719147, 18, 720522, 2...",1376,1032,human CHO cells,/media/jingqi/JJStore2/datasets/images/images/...,8,png,1
4,cho26,"[712857, 6, 714229, 15, 715603, 20, 716977, 24...",1376,1032,human CHO cells,/media/jingqi/JJStore2/datasets/images/images/...,8,png,1


In [None]:
cho_df.to_csv('/home/jingqi/ucsd_bootcamp/Cell_Instance_segmentation/cho_cells.csv')

In [None]:
# Combine all dataframes
comb_df = pd.concat([bbbc007_df, bbbc009_df, bbbc018_df, cho_df], axis=0, ignore_index=True)
comb_df = comb_df.reindex()
comb_df.tail()

Unnamed: 0,name,label,width,height,cell_type,image_path,bit_depth,image_format,channels
8591,cho60,"[505630, 4, 507004, 10, 508378, 14, 509753, 17...",1376,1032,human CHO cells,/media/jingqi/JJStore2/datasets/images/images/...,8,png,1
8592,cho60,"[409241, 3, 410615, 7, 411989, 10, 413357, 20,...",1376,1032,human CHO cells,/media/jingqi/JJStore2/datasets/images/images/...,8,png,1
8593,cho60,"[399578, 9, 400952, 13, 402326, 16, 403701, 18...",1376,1032,human CHO cells,/media/jingqi/JJStore2/datasets/images/images/...,8,png,1
8594,cho60,"[193317, 7, 194682, 21, 196056, 24, 197429, 29...",1376,1032,human CHO cells,/media/jingqi/JJStore2/datasets/images/images/...,8,png,1
8595,cho60,"[59769, 10, 61143, 20, 62517, 36, 63892, 40, 6...",1376,1032,human CHO cells,/media/jingqi/JJStore2/datasets/images/images/...,8,png,1


In [None]:
comb_df.to_csv('/home/jingqi/ucsd_bootcamp/Cell_Instance_segmentation/all_cells.csv', index=False)

In [None]:
# Copy files in 'Cell_Instance_segmentation' directory
dir_path = '/home/jingqi/ucsd_bootcamp/Cell_Instance_segmentation/cell_images'
os.mkdir(dir_path)

In [None]:
import shutil

for fn in comb_df.loc[:, 'image_path'].unique() : 
    name = os.path.basename(fn)
    shutil.copyfile(fn, os.path.join(dir_path, name))

In [None]:
def fpath(x, y) : 
    return os.path.join(x, y)

test_df = comb_df.copy()
test_df['image_path'] = test_df['image_path'].apply(fpath)
test_df.head()

Unnamed: 0,name,label,width,height,cell_type,image_path,bit_depth,image_format,channels
0,A9 p10f,"[185014, 2, 185456, 11, 185905, 13, 186354, 16...",450,450,drosophila melanogaster Kc167 cells,Cell_Instance_segmentation/cell_images/A9 p10f...,"(8, 8, 8)",tif,3
1,A9 p10f,"[183689, 4, 184136, 8, 184584, 11, 185033, 12,...",450,450,drosophila melanogaster Kc167 cells,Cell_Instance_segmentation/cell_images/A9 p10f...,"(8, 8, 8)",tif,3
2,A9 p10f,"[174174, 4, 174623, 8, 175072, 11, 175523, 11,...",450,450,drosophila melanogaster Kc167 cells,Cell_Instance_segmentation/cell_images/A9 p10f...,"(8, 8, 8)",tif,3
3,A9 p10f,"[167446, 8, 167894, 12, 168343, 16, 168792, 18...",450,450,drosophila melanogaster Kc167 cells,Cell_Instance_segmentation/cell_images/A9 p10f...,"(8, 8, 8)",tif,3
4,A9 p10f,"[163887, 1, 164333, 12, 164781, 15, 165229, 18...",450,450,drosophila melanogaster Kc167 cells,Cell_Instance_segmentation/cell_images/A9 p10f...,"(8, 8, 8)",tif,3


In [None]:
test_df.to_csv('/home/jingqi/ucsd_bootcamp/Cell_Instance_segmentation/all_cells.csv', index=False)

# Build Tensorflow Dataset pipeline

In [6]:
import tensorflow as tf

In [7]:
df = pd.read_csv(os.path.join(CELLS_PATH, 'all_cells.csv'))
df['image_path'] = df['image_path'].apply(lambda x: os.path.join(CELLS_PATH, x.split('/')[-2], x.split('/')[-1]))

In [9]:
# resize image and label
HEIGHT = 256
WIDTH = 256
dim = (WIDTH, HEIGHT)
images = []
labels = [] 
for image, label in image_label_gen(df) : 
  images.append(cv2.resize(image, dim, interpolation=cv2.INTER_AREA))
  labels.append(cv2.resize(label, dim, interpolation=cv2.INTER_AREA))

In [None]:
for i in images : 
  print (i.shape)

In [None]:
dataset1 = tf.data.Dataset.from_tensor_slices(images)

In [None]:
dataset2 = tf.data.Dataset.from_tensor_slices(labels)

In [None]:
cells_dataset = tf.data.Dataset.from_generator(image_label_gen,
                                               args=[df],
                                               output_types=(tf.float32, tf.float32)
                                              )

In [8]:
df.head(10)

Unnamed: 0,name,label,width,height,cell_type,image_path,bit_depth,image_format,channels
0,A9 p10f,[185014 2 185456 11 185905 13 186...,450,450,drosophila melanogaster Kc167 cells,/content/drive/MyDrive/cellimages/cell_images/...,"(8, 8, 8)",tif,3
1,A9 p10f,[183689 4 184136 8 184584 11 185...,450,450,drosophila melanogaster Kc167 cells,/content/drive/MyDrive/cellimages/cell_images/...,"(8, 8, 8)",tif,3
2,A9 p10f,[174174 4 174623 8 175072 11 175...,450,450,drosophila melanogaster Kc167 cells,/content/drive/MyDrive/cellimages/cell_images/...,"(8, 8, 8)",tif,3
3,A9 p10f,[167446 8 167894 12 168343 16 168...,450,450,drosophila melanogaster Kc167 cells,/content/drive/MyDrive/cellimages/cell_images/...,"(8, 8, 8)",tif,3
4,A9 p10f,[163887 1 164333 12 164781 15 165...,450,450,drosophila melanogaster Kc167 cells,/content/drive/MyDrive/cellimages/cell_images/...,"(8, 8, 8)",tif,3
5,A9 p10f,[172231 3 172679 6 173128 8 173...,450,450,drosophila melanogaster Kc167 cells,/content/drive/MyDrive/cellimages/cell_images/...,"(8, 8, 8)",tif,3
6,A9 p10f,[162758 13 163206 16 163653 20 164...,450,450,drosophila melanogaster Kc167 cells,/content/drive/MyDrive/cellimages/cell_images/...,"(8, 8, 8)",tif,3
7,A9 p10f,[172083 1 172088 7 172531 15 172...,450,450,drosophila melanogaster Kc167 cells,/content/drive/MyDrive/cellimages/cell_images/...,"(8, 8, 8)",tif,3
8,A9 p10f,[167105 9 167554 13 168003 16 168...,450,450,drosophila melanogaster Kc167 cells,/content/drive/MyDrive/cellimages/cell_images/...,"(8, 8, 8)",tif,3
9,A9 p10f,[161281 9 161730 12 162174 3 162...,450,450,drosophila melanogaster Kc167 cells,/content/drive/MyDrive/cellimages/cell_images/...,"(8, 8, 8)",tif,3
