# This notebook is used to process raw_data folder, by:
- Converting all masks to 8-bit and segment the masks into black-white (for semantic segmentation)
- Converting Fluo-N2DH-SIM+ images to 8-bit (this is the only image dataset in 16 bit)

In [2]:
import os
import sys
import random
import warnings
from skimage import io
import numpy as np
import pandas as pd
import cv2

import matplotlib.pyplot as plt

from tqdm import tqdm
from itertools import chain
from skimage.io import imread, imshow, imread_collection, concatenate_images
from skimage.transform import resize
from skimage.morphology import label

In [5]:
data_path = '../'
for (dirpath, dirnames, filenames) in os.walk(data_path):
    if '.ini' in dirpath:
        print(dirpath)

# Change the filenames (CTC Full data)

In [None]:
raw_data_path = '../raw_data/'
for (dirpath, dirnames, filenames) in os.walk(raw_data_path):
    if not('01' in dirpath or '02' in dirpath):
        continue
        
    if '01' in dirpath:
        prefix = 'seq01_'
    else:
        prefix = 'seq02_'
        
    for filename in os.listdir(dirpath):
        if '.tif' in filename:
            img_no = filename[filename.find('.tif')-1:filename.find('.tif')-4:-1][::-1]
            src = os.path.join(dirpath, filename)
            dst = os.path.join(dirpath, prefix+img_no+'.tif')
            os.rename(src, dst)

# Change the filenames (Group Project data)

In [16]:
output_data_path = '../output/'

for (dirpath, dirnames, filenames) in os.walk(output_data_path):
    if 'Sequence' in dirpath:
        if ' 1' in dirpath:
            prefix = 'seq01_'
        elif ' 2' in dirpath:
            prefix = 'seq02_'
        elif ' 3' in dirpath:
            prefix = 'seq03_'
        elif ' 4' in dirpath:
            prefix = 'seq04_'            
        for filename in os.listdir(dirpath):
            if '.tif' in filename:
                img_no = filename[filename.find('.tif')-1:filename.find('.tif')-4:-1][::-1]
                src = os.path.join(dirpath, filename)
                dst = os.path.join(dirpath, prefix+img_no+'.tif')
                os.rename(src, dst)            

# Convert masks to 8-bit and Black-White segmentation

In [2]:
def convert_masks_and_write(filepath):
    img16 = cv2.imread(filepath,-1)
    ratio = np.amax(img16) / 256
    img8 = (img16 / ratio).astype('uint8')
    thres = cv2.adaptiveThreshold(img8, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY,3,2)
    background = img8 == 0
    thres[background] = 0
    cv2.imwrite(filepath, thres)

In [3]:
raw_data_path = '../raw_data/'
for (dirpath, dirnames, filenames) in os.walk(raw_data_path):
    if 'SEG' in dirpath:
        for filename in os.listdir(dirpath):
            convert_masks_and_write(os.path.join(dirpath, filename))

  after removing the cwd from sys.path.


# Convert Fluo-N2DH-SIM+ images to 8-bit

In [4]:
def convert_imgs_and_write(filepath):
    img16 = cv2.imread(filepath,-1)
    ratio = np.amax(img16) / 256
    img8 = (img16 / ratio).astype('uint8')
    cv2.imwrite(filepath, img8)

In [8]:
raw_data_path = '../raw_data/'
for (dirpath, dirnames, filenames) in os.walk(raw_data_path):
    if 'Fluo-N2DH-SIM+' in dirpath and '_GT' not in dirpath and ('01' in dirpath or '02' in dirpath):
        for filename in os.listdir(dirpath):
            convert_imgs_and_write(os.path.join(dirpath, filename))

# Apply CLAHE

In [14]:
def apply_CLAHE_and_save(img_path):
    img = cv2.imread(img_path,0)

    clahe = cv2.createCLAHE()
    img_clahe = clahe.apply(img)
    
    cv2.imwrite(img_path, img_clahe)


In [16]:
imgs_path = [r'../data/DIC-C2DH-HeLa/train/images/', 
             r'../data/DIC-C2DH-HeLa/val/images/', 
             r'../output/DIC-C2DH-HeLa/images/'
            ]

for path in imgs_path:
    for (dirpath, dirnames, filenames) in os.walk(path):
        for filename in filenames:
            img_path = os.path.join(dirpath, filename)
            apply_CLAHE_and_save(img_path)