References:

For Data Augmentation

LA Tran, 13 August 2021, Data Augmentation Compilation with Python and OpenCV, Towards Data Science

https://towardsdatascience.com/data-augmentation-compilation-with-python-and-opencv-b76b1cd500e0

In [1]:
import pandas as pd
import os
import random
from skimage import io
import numpy as np
import cv2
import matplotlib.pyplot as plt
import seaborn as sns
sns.set_theme(style = "dark", 
        color_codes = True,
        font_scale = 1.5)

In [3]:
# load all images into a pandas dataframe
def read_image_data(data_path, subset=False):

    folders = sorted(os.listdir(data_path))
    images, labels = [], []
    label_dict = {}

    for label_no in range(len(folders)):
        folder = folders[label_no]
        label_dict[label_no] = folder
        oslistdir = sorted(os.listdir(os.path.join(os.getcwd(),data_path,folder))) 
        oslistdir = oslistdir[:10] if subset else oslistdir
        
        for image_name in oslistdir:
            image_path = f"{data_path}/{folders[label_no]}/{image_name}"
            image = io.imread(image_path)  
            images.append(image)
            labels.append(label_no)
        
        print(f"\t [{((label_no + 1)/len(folders)) * 100:.1f} %] Fetching label '{folder}'           ", end='\r')
            
    print()
    df = pd.DataFrame()
    df['Image_Id'] = [i for i in range(len(images))]
    df['Image'] = images
    df['Label'] = labels
    return df, label_dict

# loading the folder image data and convert into nd array
training_data, label_dict_Train = read_image_data("Training")
print(label_dict_Train)

testing_data, label_dict_Test = read_image_data("Testing")
print(label_dict_Test)

if label_dict_Train==label_dict_Test:
    full_image_df = pd.concat([training_data,testing_data],ignore_index=True)

	 [100.0 %] Fetching label 'pituitary'           
{0: 'glioma', 1: 'meningioma', 2: 'notumor', 3: 'pituitary'}
	 [100.0 %] Fetching label 'pituitary'           
{0: 'glioma', 1: 'meningioma', 2: 'notumor', 3: 'pituitary'}


In [4]:
from skimage.color import rgb2gray

full_image_df["Dim"] = full_image_df["Image"].apply(np.shape)
full_image_df['width'], full_image_df['height'], _ = zip(*full_image_df.Dim) # * <- unpacks the array's elements
full_image_df['greyscale_image'] = full_image_df["Image"].apply(rgb2gray)
full_image_df

ValueError: not enough values to unpack (expected 3, got 2)

# Data Augmentation

- increase the sample to approximately 3000 instances per class
- color jitter/noise/filter

In [3]:
full_image_df['Label'].value_counts()

1    937
0    926
3    901
2    500
Name: Label, dtype: int64

In [5]:
# gaussian noise
image = cv2.imread(r'D:\Users\dxlee\DX_degree\Y3S2\ODL\working-dir\brain-tumor\Testing\no_tumor\image(90).jpg')
mean=0
st=0.7
gauss = np.random.normal(mean,st,image.shape)
gauss = gauss.astype('uint8')
image = cv2.add(image,gauss)
cv2.imwrite("gaussian_noise.jpg", image)

True

In [6]:
# salt/pepper noise
image = cv2.imread(r'D:\Users\dxlee\DX_degree\Y3S2\ODL\working-dir\brain-tumor\Testing\no_tumor\image(90).jpg')
prob = 0.05
if len(image.shape) == 2:
    black = 0
    white = 255            
else:
    colorspace = image.shape[2]
    if colorspace == 3:  # RGB
        black = np.array([0, 0, 0], dtype='uint8')
        white = np.array([255, 255, 255], dtype='uint8')
    else:  # RGBA
        black = np.array([0, 0, 0, 255], dtype='uint8')
        white = np.array([255, 255, 255, 255], dtype='uint8')
probs = np.random.random(image.shape[:2])
image[probs < (prob / 2)] = black
image[probs > 1 - (prob / 2)] = white
cv2.imwrite("salt_pepper_noise.jpg", image)

True

In [7]:
#mean blur
image = cv2.imread(r'D:\Users\dxlee\DX_degree\Y3S2\ODL\working-dir\brain-tumor\Testing\no_tumor\image(90).jpg')
fsize = 9
blur = cv2.blur(image,(fsize,fsize))
cv2.imwrite("mean_blur.jpg", blur)

True

In [9]:
#gaussian blur
image = cv2.imread(r'D:\Users\dxlee\DX_degree\Y3S2\ODL\working-dir\brain-tumor\Testing\no_tumor\image(90).jpg')
fsize = 9
blur = cv2.GaussianBlur(image,(fsize,fsize),0)
cv2.imwrite("gaussian_blur.jpg", blur)

True

In [10]:
#median blur
image = cv2.imread(r'D:\Users\dxlee\DX_degree\Y3S2\ODL\working-dir\brain-tumor\Testing\no_tumor\image(90).jpg')
fsize = 9
blur = cv2.medianBlur(image,fsize)
cv2.imwrite("median_blur.jpg", blur)

True

In [None]:
# need to generate 400 more instances for the no-tumor class
# random filter (blur/gaussian/median blur)
# random noise (gaussian/salt_pepper)

def colorjitter(img, cj_type="b"):
    '''
    ### Different Color Jitter ###
    img: image
    cj_type: {b: brightness, s: saturation, c: constast}
    '''
    if cj_type == "b":
        # value = random.randint(-50, 50)
        value = np.random.choice(np.array([-50, -40, -30, 30, 40, 50]))
        hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)
        h, s, v = cv2.split(hsv)
        if value >= 0:
            lim = 255 - value
            v[v > lim] = 255
            v[v <= lim] += value
        else:
            lim = np.absolute(value)
            v[v < lim] = 0
            v[v >= lim] -= np.absolute(value)

        final_hsv = cv2.merge((h, s, v))
        img = cv2.cvtColor(final_hsv, cv2.COLOR_HSV2BGR)
        return img
    
    elif cj_type == "s":
        # value = random.randint(-50, 50)
        value = np.random.choice(np.array([-50, -40, -30, 30, 40, 50]))
        hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)
        h, s, v = cv2.split(hsv)
        if value >= 0:
            lim = 255 - value
            s[s > lim] = 255
            s[s <= lim] += value
        else:
            lim = np.absolute(value)
            s[s < lim] = 0
            s[s >= lim] -= np.absolute(value)

        final_hsv = cv2.merge((h, s, v))
        img = cv2.cvtColor(final_hsv, cv2.COLOR_HSV2BGR)
        return img
    
    elif cj_type == "c":
        brightness = 10
        contrast = random.randint(40, 100)
        dummy = np.int16(img)
        dummy = dummy * (contrast/127+1) - contrast + brightness
        dummy = np.clip(dummy, 0, 255)
        img = np.uint8(dummy)
        return img

def noisy(img, noise_type="gauss"):
    '''
    ### Adding Noise ###
    img: image
    cj_type: {gauss: gaussian, sp: salt & pepper}
    '''
    if noise_type == "gauss":
        image=img.copy() 
        mean=0
        st=0.7
        gauss = np.random.normal(mean,st,image.shape)
        gauss = gauss.astype('uint8')
        image = cv2.add(image,gauss)
        return image
    
    elif noise_type == "sp":
        image=img.copy() 
        prob = 0.05
        if len(image.shape) == 2:
            black = 0
            white = 255            
        else:
            colorspace = image.shape[2]
            if colorspace == 3:  # RGB
                black = np.array([0, 0, 0], dtype='uint8')
                white = np.array([255, 255, 255], dtype='uint8')
            else:  # RGBA
                black = np.array([0, 0, 0, 255], dtype='uint8')
                white = np.array([255, 255, 255, 255], dtype='uint8')
        probs = np.random.random(image.shape[:2])
        image[probs < (prob / 2)] = black
        image[probs > 1 - (prob / 2)] = white
        return image

def filters(img, f_type = "blur"):
    '''
    ### Filtering ###
    img: image
    f_type: {blur: blur, gaussian: gaussian, median: median}
    '''
    if f_type == "blur":
        image=img.copy()
        fsize = 9
        return cv2.blur(image,(fsize,fsize))
    
    elif f_type == "gaussian":
        image=img.copy()
        fsize = 9
        return cv2.GaussianBlur(image, (fsize, fsize), 0)
    
    elif f_type == "median":
        image=img.copy()
        fsize = 9
        return cv2.medianBlur(image, fsize)

# Image Preprocessing

- Convert BGR TO GRAY
- GaussianBlur
- Threshold
- Erode
- Dilate
- Find Contours

Filtering: remove gaussian noise, salt-and-pepper noise, speckle noise; by median/mean/gaussian filter
- Gaussian smoothing, median filtering, and wavelet denoising

Enhancement: adjust brightness/contrast
- histogram equalization, adaptive histogram equalization, and contrast stretching

Restoration: deconvolution/denoising

Image resizing
- smaller/bigger/alter ratio
- nearest neighbor interpolation, bilinear interpolation, and bicubic interpolation

Colour correction
- gray world assumption, white balance, and color transfer

Segmentation
- thresholding, edge detection, and region growing


In [None]:
# Convert BGR TO GRAY: reduce dimension
# GaussianBlur: reduce noise
# Threshold: only become white if the color intensity exceed the threshold, otherwise black

# Erode: remove border of foreground object, reduce image features (can be iteratively)
# Dilate: increase object area, emphasise features

# Uses of Erosion and Dilation: 

# Erosion: 
# It is useful for removing small white noises.
# Used to detach two connected objects etc.
# Dilation:
# In cases like noise removal, erosion is followed by dilation. Because, erosion removes white noises, but it also shrinks our object. So we dilate it. Since noise is gone, they won’t come back, but our object area increases.
# It is also useful in joining broken parts of an object.

# Find Contours, to perform cropping

# image resize for to 256/512?

# normalise to 0-1

# contrast: equalizeHist () method enhances the contrast of images.

In [6]:
import numpy as np
import cv2

# im = cv2.imread(r'D:\Users\dxlee\DX_degree\Y3S2\ODL\working-dir\brain-tumor\Testing\glioma_tumor\image.jpg')
im = cv2.imread(r'Testing\glioma_tumor\image.jpg')

# To Grayscale
im = cv2.cvtColor(im, cv2.COLOR_BGR2GRAY)

print(np.shape(im))
data = np.asarray(im)
# for row in data:
#     for cell in row:
#         if cell !=0:
#             print(cell)
# print(data[np.any(data!=0, axis=1)])

cv2.imwrite("grayscale.jpg", im)

(619, 495)


True