# Skin Lesion Classification

## Dataset Preperation

### Data Placement in Class Directories

Download the Training Dataset from links below :
- [Training Data](https://isic-challenge-data.s3.amazonaws.com/2018/ISIC2018_Task3_Training_Input.zip) Place all the training images downloaded in a folder named orig in the directory of Notebook.
##### OR
- Download orig folder from here [OneDrive](https://langara-my.sharepoint.com/:f:/r/personal/a196_mylangara_ca/Documents/Skin%20Cancer%20Detection%20Capstone/ISIC%202018?csf=1&web=1&e=R9AE8h)


In [None]:
# Importing the Libraries
# For missing libraries
# pip install -U package_name --user
import pandas as pd
import os
import shutil
from glob import glob
import cv2
from tqdm import tqdm
import warnings
warnings.filterwarnings("ignore")
from keras.preprocessing.image import ImageDataGenerator
from keras.models import Sequential
from sklearn.utils import shuffle
import numpy as np
import tensorflow as tf
from skimage import io
import albumentations
from albumentations import CenterCrop, ShiftScaleRotate, HorizontalFlip, VerticalFlip, RandomBrightnessContrast

We need the directory structure for running ImageDataGenerator for model training.
- Data :
     - |-train
          - |-mel
          - |-nv
          - |-bcc
          - |-akiec
          - |-bkl
          - |-df
          - |-vasc

In [2]:
# Creating Train Directories
for clas in ['mel', 'nv', 'bcc', 'akiec', 'bkl', 'df', 'vasc']:
    os.makedirs(f'Data/train/{clas}')

The above code creates Training dataset directory as follows:
- Data :
     - |-train
          - |-mel
          - |-nv
          - |-bcc
          - |-akiec
          - |-bkl
          - |-df
          - |-vasc

In [3]:
meta = pd.read_csv('labels/ISIC2018_Task3_Training_GroundTruth.csv')
for row in meta.values:
    if row[1] == 1:
        os.rename(f"orig/{row[0]}.jpg", f"Data/train/mel/{row[0]}.jpg")
    elif row[2] == 1:
        os.rename(f"orig/{row[0]}.jpg", f"Data/train/nv/{row[0]}.jpg")
    elif row[3] == 1:
        os.rename(f"orig/{row[0]}.jpg", f"Data/train/bcc/{row[0]}.jpg")
    elif row[4] == 1:
        os.rename(f"orig/{row[0]}.jpg", f"Data/train/akiec/{row[0]}.jpg")
    elif row[5] == 1:
        os.rename(f"orig/{row[0]}.jpg", f"Data/train/bkl/{row[0]}.jpg")
    elif row[6] == 1:
        os.rename(f"orig/{row[0]}.jpg", f"Data/train/df/{row[0]}.jpg")
    else:
        os.rename(f"orig/{row[0]}.jpg", f"Data/train/vasc/{row[0]}.jpg")
# Placing Manually Selected Hair Removal Images into directories
hair_df = pd.read_csv('labels/Hair_Removal_2018_Image_Class_Map.csv')
for row in hair_df.values:
    if not os.path.isdir(f'Data/hair removal/{row[0]}'):
        os.makedirs(f'Data/hair removal/{row[0]}')
    shutil.copyfile(f'Data/train/{row[0]}/{row[1]}', f'Data/hair removal/{row[0]}/{row[1]}')

# Creating Directories for Processed Data
for clas in ['mel', 'nv', 'bcc', 'akiec', 'bkl', 'df', 'vasc']:
    os.makedirs(f'Processed_Data/train/{clas}')

The above code copies training images from 'orig' folder to individual class folder in Data/train directory.

Also, manually selected hair removal images as mentioned in labels/Hair_Removal_2018_Image_Class_Map.csv are copied from Data/train directry to Data/hair removal directory.

The code also creates class directories for processed data. 

### Hair Removal

In [4]:
# Dullrazor Algorith Implementation
def dullrazor(img, lowbound=10, showimgs=True, filterstruc=17, inpaintmat=1):
    #grayscale
    imgtmp1 = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY)
    #applying a blackhat
    filterSize =(filterstruc, filterstruc)
    kernel = cv2.getStructuringElement(cv2.MORPH_RECT, filterSize) 
    imgtmp2 = cv2.morphologyEx(imgtmp1, cv2.MORPH_BLACKHAT, kernel)
    #0=skin and 255=hair
    ret, mask = cv2.threshold(imgtmp2, lowbound, 255, cv2.THRESH_BINARY)
    #inpainting
    img_final = cv2.inpaint(img, mask, inpaintmat ,cv2.INPAINT_TELEA)
    return img_final

In [5]:
# Folders Containing Actual Training Data chosen for hair removal class wise
folders = glob('Data/hair removal/*')
# Looping over each class folder
for sk_class in folders:
    skcls = sk_class.split('/')[-1]
    # Looping over each image in a class folder
    for image in glob(sk_class+'/*'):
        filename = image.split('/')[-1]
        img = cv2.imread(image)
        img_clean = dullrazor(img,lowbound=6, filterstruc=9, inpaintmat=1)
        # Saving processed image to new folder under specific classes.
        cv2.imwrite(f'Processed_Data/train/{skcls}/{filename}', img_clean)

In [6]:
folders = glob('Data/train/*')
folders
# Copying the non-processed images from actual training data set to processed dataset for complete 
# Data folder.
for sk_class in folders:
    skcls = sk_class.split('/')[-1]
    for file in glob(sk_class+'/*'):
        filename = file.split('/')[-1]
        if not os.path.isfile(f'Processed_Data/train/{skcls}/{filename}'):
            shutil.copyfile(file, f'Processed_Data/train/{skcls}/{filename}')

### Center Cropping

In [7]:
# Placing Manually Selected Cropping Images into directories
import os
import pandas as pd
hair_df = pd.read_csv('labels/Cropping_2018_Image_Class_Map.csv')
for row in hair_df.values:
    if not os.path.isdir(f'Data/cropping/{row[0]}'):
        os.makedirs(f'Data/cropping/{row[0]}')
    shutil.copyfile(f'Processed_Data/train/{row[0]}/{row[1]}', f'Data/cropping/{row[0]}/{row[1]}')

In [8]:
def center_crop(images, save_path, augment = True):
    H = 128
    W = 128

    for x in tqdm(images, total= len(images)):
        # First, we have to extract the image name, image extention.
        name = x.split("/")[-1].split(".")
        image_name = name[0]
        image_extn = name[1]

        # Now that we have the names, we have to read the image

        x = cv2.imread(x, cv2.IMREAD_COLOR)

        if augment ==True:
            aug = CenterCrop(always_apply=True, height=256, width=256, p=1.0)
            augmented = aug(image=x)
            x1 = augmented["image"]

            save_images = [x1]

        # If the augment paramter is set to False, The function only saves the original image and mask to the defined save_path
        else:
            save_images = [x]
        
        idx = 0
        for i in save_images:
            i = cv2.resize(i, (W,H))

            tmp_img_name = f"{image_name}.{image_extn}"
            
            image_path = os.path.join(save_path, tmp_img_name)

            cv2.imwrite(image_path, i)

            idx +=1

In [9]:
dataset_path = glob('Data/cropping/*')
# Looping over each class folder
for sk_class in dataset_path:
    skcls = sk_class.split("/")[-1]
    # Looping over each image in a class folder
    for image in glob(sk_class+'/*'):
        filename = image.split('/')[-1]
        if os.path.isfile(f'Processed_Data/train/{skcls}/{filename}'):
            os.remove(f'Processed_Data/train/{skcls}/{filename}')
        center_crop([image],save_path=f'Processed_Data/train/{skcls}', augment=True)

100%|█████████████████████████████████████████████| 1/1 [00:00<00:00, 84.93it/s]
100%|████████████████████████████████████████████| 1/1 [00:00<00:00, 135.26it/s]
100%|████████████████████████████████████████████| 1/1 [00:00<00:00, 263.94it/s]
100%|████████████████████████████████████████████| 1/1 [00:00<00:00, 314.34it/s]
100%|████████████████████████████████████████████| 1/1 [00:00<00:00, 145.22it/s]
100%|████████████████████████████████████████████| 1/1 [00:00<00:00, 155.81it/s]
100%|████████████████████████████████████████████| 1/1 [00:00<00:00, 152.84it/s]
100%|████████████████████████████████████████████| 1/1 [00:00<00:00, 178.79it/s]
100%|████████████████████████████████████████████| 1/1 [00:00<00:00, 195.58it/s]
100%|████████████████████████████████████████████| 1/1 [00:00<00:00, 161.66it/s]
100%|████████████████████████████████████████████| 1/1 [00:00<00:00, 377.36it/s]
100%|████████████████████████████████████████████| 1/1 [00:00<00:00, 393.54it/s]
100%|███████████████████████

100%|████████████████████████████████████████████| 1/1 [00:00<00:00, 138.80it/s]
100%|████████████████████████████████████████████| 1/1 [00:00<00:00, 153.68it/s]
100%|████████████████████████████████████████████| 1/1 [00:00<00:00, 189.00it/s]
100%|████████████████████████████████████████████| 1/1 [00:00<00:00, 208.72it/s]
100%|████████████████████████████████████████████| 1/1 [00:00<00:00, 208.82it/s]
100%|████████████████████████████████████████████| 1/1 [00:00<00:00, 221.49it/s]
100%|████████████████████████████████████████████| 1/1 [00:00<00:00, 162.31it/s]
100%|████████████████████████████████████████████| 1/1 [00:00<00:00, 163.29it/s]
100%|████████████████████████████████████████████| 1/1 [00:00<00:00, 164.39it/s]
100%|████████████████████████████████████████████| 1/1 [00:00<00:00, 302.95it/s]
100%|████████████████████████████████████████████| 1/1 [00:00<00:00, 176.99it/s]
100%|████████████████████████████████████████████| 1/1 [00:00<00:00, 165.40it/s]
100%|███████████████████████

100%|████████████████████████████████████████████| 1/1 [00:00<00:00, 177.14it/s]
100%|████████████████████████████████████████████| 1/1 [00:00<00:00, 185.98it/s]
100%|████████████████████████████████████████████| 1/1 [00:00<00:00, 205.25it/s]
100%|████████████████████████████████████████████| 1/1 [00:00<00:00, 194.06it/s]
100%|████████████████████████████████████████████| 1/1 [00:00<00:00, 178.88it/s]
100%|████████████████████████████████████████████| 1/1 [00:00<00:00, 184.57it/s]
100%|████████████████████████████████████████████| 1/1 [00:00<00:00, 194.33it/s]
100%|████████████████████████████████████████████| 1/1 [00:00<00:00, 174.49it/s]
100%|████████████████████████████████████████████| 1/1 [00:00<00:00, 176.93it/s]
100%|████████████████████████████████████████████| 1/1 [00:00<00:00, 205.00it/s]
100%|████████████████████████████████████████████| 1/1 [00:00<00:00, 196.00it/s]
100%|████████████████████████████████████████████| 1/1 [00:00<00:00, 183.09it/s]
100%|███████████████████████

100%|████████████████████████████████████████████| 1/1 [00:00<00:00, 353.35it/s]
100%|████████████████████████████████████████████| 1/1 [00:00<00:00, 175.69it/s]
100%|████████████████████████████████████████████| 1/1 [00:00<00:00, 398.40it/s]
100%|████████████████████████████████████████████| 1/1 [00:00<00:00, 202.80it/s]
100%|████████████████████████████████████████████| 1/1 [00:00<00:00, 182.25it/s]
100%|████████████████████████████████████████████| 1/1 [00:00<00:00, 364.56it/s]
100%|████████████████████████████████████████████| 1/1 [00:00<00:00, 224.23it/s]
100%|████████████████████████████████████████████| 1/1 [00:00<00:00, 179.54it/s]
100%|████████████████████████████████████████████| 1/1 [00:00<00:00, 353.86it/s]
100%|████████████████████████████████████████████| 1/1 [00:00<00:00, 181.99it/s]
100%|████████████████████████████████████████████| 1/1 [00:00<00:00, 378.51it/s]
100%|████████████████████████████████████████████| 1/1 [00:00<00:00, 181.25it/s]
100%|███████████████████████

100%|████████████████████████████████████████████| 1/1 [00:00<00:00, 265.24it/s]
100%|████████████████████████████████████████████| 1/1 [00:00<00:00, 426.64it/s]
100%|████████████████████████████████████████████| 1/1 [00:00<00:00, 202.76it/s]
100%|████████████████████████████████████████████| 1/1 [00:00<00:00, 399.99it/s]
100%|████████████████████████████████████████████| 1/1 [00:00<00:00, 187.25it/s]
100%|████████████████████████████████████████████| 1/1 [00:00<00:00, 210.04it/s]
100%|████████████████████████████████████████████| 1/1 [00:00<00:00, 185.84it/s]
100%|████████████████████████████████████████████| 1/1 [00:00<00:00, 407.21it/s]
100%|████████████████████████████████████████████| 1/1 [00:00<00:00, 427.03it/s]
100%|████████████████████████████████████████████| 1/1 [00:00<00:00, 213.36it/s]
100%|████████████████████████████████████████████| 1/1 [00:00<00:00, 392.47it/s]
100%|████████████████████████████████████████████| 1/1 [00:00<00:00, 191.94it/s]
100%|███████████████████████

## Deleting Excessive Hair Images

In [None]:
bcc_files = ['ISIC_0025584.jpg', 'ISIC_0026343.jpg','ISIC_0030446.jpg','ISIC_0030687.jpg',
             'ISIC_0031527.jpg','ISIC_0031640.jpg','ISIC_0034047.jpg']
bkl_file = ['ISIC_0024832.jpg']
mel_file = ['ISIC_0032214.jpg']

for image in [f'Processed_Data/train/bcc/{image_file}' for image_file in bcc_files]:
    os.remove(image)
os.remove(f'Processed_Data/train/bkl/ISIC_0024832.jpg')
os.remove(f'Processed_Data/train/mel/ISIC_0032214.jpg')

## Train Test Split

In [12]:
TRAIN_DIR = 'Processed_Data/train'
def shuffling(X, rs):
    X= shuffle(X, random_state=rs)
    return X
classes = ['akiec','bcc','bkl','df','nv','mel','vasc']
for cl in classes:
    rs = np.random.randint(100,150)
    r_images = glob(f'{TRAIN_DIR}/{cl}/*')
    r_images_shuf = shuffling(r_images,rs)
    train = int(0.7*len(r_images))
    r_train = r_images_shuf[0:train]
    r_test = r_images_shuf[train:]
    if not os.path.isdir(f'PData/train/{cl}'):
        os.makedirs(f'PData/train/{cl}')
    if not os.path.isdir(f'PData/test/{cl}'):
        os.makedirs(f'PData/test/{cl}')
    for image in r_train:
        shutil.copyfile(image, f'PData/train/{cl}/{image.split("/")[-1]}')
    for image in r_test:
        shutil.copyfile(image, f'PData/test/{cl}/{image.split("/")[-1]}')

In [None]:
# Skin Lesion Classification: Augmentation

In [13]:
# Shifting, Scaling & Rotations
def shiftscalerotate(images, save_path,
                     augment = True,
                     resize = [256,256],
                     random=1.0,
                     rotation=0,
                     shift=0,
                     scale=1,
                     interpolation=cv2.INTER_NEAREST,
                     always=True,
                     call=1):
    H, W = resize
    for x in tqdm(images, total= len(images)):
        # First, we have to extract the image name, image extention.
        name = x.split("/")[-1].split(".")
        image_name = name[0]
        image_extn = name[1]

        # Now that we have the names, we have to read the image
        x = cv2.imread(x, cv2.IMREAD_COLOR)

        if augment ==True:
            aug = ShiftScaleRotate(p=random, rotate_limit=(-30,30), interpolation=interpolation,
                                  always_apply=always)
            augmented = aug(image=x)
            x1 = augmented["image"]
            save_images = [x, x1] # save original image and the augmented image
        # If the augment paramter is set to False, The function only saves the original image and mask to the defined save_path
        else:
            save_images = [x] 
        idx = 0
        for i in save_images:
            i = cv2.resize(i, (W,H))
            if idx == 0:
                tmp_img_name = f"{image_name}.{image_extn}"
            else:
                aug_ext = '_ssr'*call
                tmp_img_name = f"{image_name}{aug_ext}.{image_extn}"
            image_path = os.path.join(save_path, tmp_img_name)
            cv2.imwrite(image_path, i)
            idx +=1

In [14]:
# Horizontal Flipping
def horizontalflip(images, save_path,
                   augment = True,
                   resize = [256,256],
                   random=1.0,
                   always=True):
    H, W = resize
    for x in tqdm(images, total= len(images)):
        # First, we have to extract the image name, image extention.
        name = x.split("/")[-1].split(".")
        image_name = name[0]
        image_extn = name[1]
        # Now that we have the names, we have to read the image and the masks 
        x = cv2.imread(x, cv2.IMREAD_COLOR)
        if augment ==True:
            aug = HorizontalFlip(always_apply=always, p=random)
            augmented = aug(image=x)
            x1 = augmented["image"]
            save_images = [x, x1] # make sure only x1. If x is given in 2nd augmentation onwards, then duplicate images will be saved
        # If the augment paramter is set to False, The function only saves the original image and mask to the defined save_path
        else:
            save_images = [x]
        idx = 0
        for i in save_images:
            i = cv2.resize(i, (W,H))
            if idx == 0:
                tmp_img_name = f"{image_name}.{image_extn}"
            else:
                tmp_img_name = f"{image_name}_hf.{image_extn}"
            image_path = os.path.join(save_path, tmp_img_name)
            cv2.imwrite(image_path, i)
            idx +=1

In [15]:
#vertical flipping
def verticalflip(images,
                 save_path,
                 augment = True,
                 resize=[256,256],
                 random=1.0,
                 always=True):
    H, W = resize
    for x in tqdm(images, total= len(images)):
        # First, we have to extract the image name, image extention.
        name = x.split("/")[-1].split(".")
        image_name = name[0]
        image_extn = name[1]
        # Now that we have the names, we have to read the image
        x = cv2.imread(x, cv2.IMREAD_COLOR)
        if augment ==True:
            aug = VerticalFlip(always_apply=always, p=random)
            augmented = aug(image=x)
            x1 = augmented["image"]
            save_images = [x, x1]
        # If the augment paramter is set to False, The function only saves the original image and mask to the defined save_path
        else:
            save_images = [x]
        idx = 0
        for i in save_images:
            i = cv2.resize(i, (W,H))
            if idx == 0:
                tmp_img_name = f"{image_name}.{image_extn}"
            else:
                tmp_img_name = f"{image_name}_vf.{image_extn}"
            image_path = os.path.join(save_path, tmp_img_name)
            cv2.imwrite(image_path, i)
            idx +=1

In [16]:
def brightness_contrast(images,
                        save_path,
                        augment = True,
                        resize=[256,256],
                        random=1.0,
                        brightness=0.2, 
                        contrast=0.2,
                        by_max=False,
                        always=True):
    H, W = resize
    for x in tqdm(images, total= len(images)):
        # First, we have to extract the image name, image extention.
        name = x.split("/")[-1].split(".")
        image_name = name[0]
        image_extn = name[1]
        # Now that we have the names, we have to read the image
        x = cv2.imread(x, cv2.IMREAD_COLOR)
        if augment ==True:
            aug = RandomBrightnessContrast(brightness_limit=brightness,
                                           contrast_limit=contrast,
                                           brightness_by_max=by_max,
                                           always_apply=always,
                                           p=random)
            augmented = aug(image=x)
            x1 = augmented["image"]
            save_images = [x, x1]
        # If the augment paramter is set to False, The function only saves the original image and mask to the defined save_path
        else:
            save_images = [x]
        idx = 0
        for i in save_images:
            i = cv2.resize(i, (W,H))
            if idx == 0:
                tmp_img_name = f"{image_name}.{image_extn}"
            else:
                tmp_img_name = f"{image_name}_bc.{image_extn}"
            image_path = os.path.join(save_path, tmp_img_name)
            cv2.imwrite(image_path, i)
            idx +=1

In [21]:
# For Creating Directory Structure
for clas in ['mel', 'nv', 'bcc', 'akiec', 'bkl', 'df', 'vasc']:
    os.makedirs(f'augmented/{clas}')

In [22]:
# Performing Augmentations Class-Wise
# AKIEC
image_data = glob('PData/train/akiec/*')
out_path = 'augmented/'
out_data = out_path+'akiec'
shiftscalerotate(image_data,
                 out_data,
                 augment = True,
                 resize = [128,128],
                 random=1.0,
                 rotation=40,
                 shift=0.0625,
                 scale=0.2,
                 interpolation=cv2.INTER_NEAREST)
ssr_images = glob('augmented/akiec/*')
horizontalflip(ssr_images, out_data,
                   augment = True,
                   resize = [128,128],
                   random=1.0)
hf_images = glob('augmented/akiec/*')
verticalflip(hf_images,
                 out_data,
                 augment = True,
                 resize=[128,128],
                 random=1.0)
df_images = glob('augmented/akiec/*')
brightness_contrast(df_images,
                        out_data,
                        augment = True,
                        resize=[128,128],
                        random=1.0,
                        brightness=0.2, 
                        contrast=0.2,
                        by_max=False)
# Selecting remaining images by shuffling all and selecting required
bc_images = glob('augmented/akiec/*')
from sklearn.utils import shuffle
def shuffling(X):
    X= shuffle(X, random_state=42)
    return X
# shuffle and take only required number of images to perform next augmentation:
bc_images_shuf = shuffling(bc_images)
target = 4693
available = len(bc_images)
bc_selected = bc_images_shuf[0:target-available]
shiftscalerotate(bc_selected,
                 out_data,
                 augment = True,
                 resize = [128,128],
                 random=1.0,
                 rotation=0,
                 shift=0,
                 scale=0.3,
                 interpolation=cv2.INTER_NEAREST,
                 call=2)

100%|████████████████████████████████████████| 228/228 [00:01<00:00, 203.16it/s]
100%|███████████████████████████████████████| 456/456 [00:00<00:00, 1592.83it/s]
100%|███████████████████████████████████████| 912/912 [00:00<00:00, 1447.47it/s]
100%|█████████████████████████████████████| 1824/1824 [00:01<00:00, 1377.57it/s]
100%|█████████████████████████████████████| 1045/1045 [00:00<00:00, 1422.97it/s]


In [23]:
# DF
image_data = glob('PData/train/df/*')
out_data = out_path+'df'
shiftscalerotate(image_data,
                 out_data,
                 augment = True,
                 resize = [128,128],
                 random=1.0,
                 rotation=40,
                 shift=0.0625,
                 scale=0.2,
                 interpolation=cv2.INTER_NEAREST,
                 call=1)
ssr_images = glob('augmented/df/*')
horizontalflip(ssr_images, out_data,
                   augment = True,
                   resize = [128,128],
                   random=1.0)
hf_images = glob('augmented/df/*')
verticalflip(hf_images,
                 out_data,
                 augment = True,
                 resize=[128,128],
                 random=1.0)
df_images = glob('augmented/df/*')
brightness_contrast(df_images,
                        out_data,
                        augment = True,
                        resize=[128,128],
                        random=1.0,
                        brightness=0.2, 
                        contrast=0.2,
                        by_max=False)
bc_images = glob('augmented/df/*')
shiftscalerotate(bc_images,
                 out_data,
                 augment = True,
                 resize = [128,128],
                 random=1.0,
                 rotation=40,
                 shift=0,
                 scale=1,
                 interpolation=cv2.INTER_NEAREST,
                 call=2)
r_images = glob('augmented/df/*')
# shuffle and take only required number of images to perform next augmentation:
r_images_shuf = shuffling(r_images)
target = 4693
available = len(r_images)
r_selected = r_images_shuf[0:target-available]
shiftscalerotate(r_selected,
                 out_data,
                 augment = True,
                 resize = [128,128],
                 random=1.0,
                 rotation=80,
                 shift=0.2,
                 scale=1,
                 interpolation=cv2.INTER_NEAREST,
                 call=3)

100%|██████████████████████████████████████████| 80/80 [00:00<00:00, 210.07it/s]
100%|███████████████████████████████████████| 160/160 [00:00<00:00, 1591.92it/s]
100%|███████████████████████████████████████| 320/320 [00:00<00:00, 1579.02it/s]
100%|███████████████████████████████████████| 640/640 [00:00<00:00, 1507.47it/s]
100%|█████████████████████████████████████| 1280/1280 [00:00<00:00, 1449.41it/s]
100%|█████████████████████████████████████| 2133/2133 [00:01<00:00, 1532.85it/s]


In [24]:
# VASC
image_data = glob('PData/train/vasc/*')
out_data = out_path+'vasc'
shiftscalerotate(image_data,
                 out_data,
                 augment = True,
                 resize = [128,128],
                 random=1.0,
                 rotation=40,
                 shift=0.0625,
                 scale=0.2,
                 interpolation=cv2.INTER_NEAREST,
                 call=1)
ssr_images = glob('augmented/vasc/*')
horizontalflip(ssr_images, out_data,
                   augment = True,
                   resize = [128,128],
                   random=1.0)
hf_images = glob('augmented/vasc/*')
verticalflip(hf_images,
                 out_data,
                 augment = True,
                 resize=[128,128],
                 random=1.0)
df_images = glob('augmented/vasc/*')
brightness_contrast(df_images,
                        out_data,
                        augment = True,
                        resize=[128,128],
                        random=1.0,
                        brightness=0.2, 
                        contrast=0.2,
                        by_max=False)
bc_images = glob('augmented/vasc/*')
shiftscalerotate(bc_images,
                 out_data,
                 augment = True,
                 resize = [128,128],
                 random=1.0,
                 rotation=40,
                 shift=0,
                 scale=1,
                 interpolation=cv2.INTER_NEAREST,
                 call=2)
r_images = glob('augmented/vasc/*')
# shuffle and take only required number of images to perform next augmentation:
r_images_shuf = shuffling(r_images)
target = 4693
available = len(r_images)
r_selected = r_images_shuf[0:target-available]
shiftscalerotate(r_selected,
                 out_data,
                 augment = True,
                 resize = [128,128],
                 random=1.0,
                 rotation=80,
                 shift=0.2,
                 scale=1,
                 interpolation=cv2.INTER_NEAREST,
                 call=3)

100%|██████████████████████████████████████████| 99/99 [00:00<00:00, 576.52it/s]
100%|███████████████████████████████████████| 198/198 [00:00<00:00, 1682.53it/s]
100%|███████████████████████████████████████| 396/396 [00:00<00:00, 1709.23it/s]
100%|███████████████████████████████████████| 792/792 [00:00<00:00, 1553.23it/s]
100%|█████████████████████████████████████| 1584/1584 [00:01<00:00, 1524.60it/s]
100%|█████████████████████████████████████| 1525/1525 [00:00<00:00, 1563.34it/s]


In [25]:
# BCC
image_data = glob('PData/train/bcc/*')
out_data = out_path+'bcc'
shiftscalerotate(image_data,
                 out_data,
                 augment = True,
                 resize = [128,128],
                 random=1.0,
                 rotation=40,
                 shift=0.0625,
                 scale=0.2,
                 interpolation=cv2.INTER_NEAREST,
                 call=1)
ssr_images = glob('augmented/bcc/*')
horizontalflip(ssr_images, out_data,
                   augment = True,
                   resize = [128,128],
                   random=1.0)
hf_images = glob('augmented/bcc/*')
verticalflip(hf_images,
                 out_data,
                 augment = True,
                 resize=[128,128],
                 random=1.0)
r_images = glob('augmented/bcc/*')
# shuffle and take only required number of images to perform next augmentation:
r_images_shuf = shuffling(r_images)
target = 4693
available = len(r_images)
r_selected = r_images_shuf[0:target-available]
shiftscalerotate(r_selected,
                 out_data,
                 augment = True,
                 resize = [128,128],
                 random=1.0,
                 rotation=80,
                 shift=0.2,
                 scale=1,
                 interpolation=cv2.INTER_NEAREST,
                 call=2)

100%|████████████████████████████████████████| 359/359 [00:01<00:00, 209.06it/s]
100%|███████████████████████████████████████| 718/718 [00:00<00:00, 1629.14it/s]
100%|█████████████████████████████████████| 1436/1436 [00:00<00:00, 1647.87it/s]
100%|█████████████████████████████████████| 1821/1821 [00:01<00:00, 1488.63it/s]


In [26]:
# BKL
image_data = glob('PData/train/bkl/*')
out_data = out_path+'bkl'
shiftscalerotate(image_data,
                 out_data,
                 augment = True,
                 resize = [128,128],
                 random=1.0,
                 rotation=40,
                 shift=0.0625,
                 scale=0.2,
                 interpolation=cv2.INTER_NEAREST,
                 call=1)
ssr_images = glob('augmented/bkl/*')
verticalflip(ssr_images,
                 out_data,
                 augment = True,
                 resize=[128,128],
                 random=1.0)
r_images = glob('augmented/bkl/*')
# shuffle and take only required number of images to perform next augmentation:
r_images_shuf = shuffling(r_images)
target = 4693
available = len(r_images)
r_selected = r_images_shuf[0:target-available]
shiftscalerotate(r_selected,
                 out_data,
                 augment = True,
                 resize = [128,128],
                 random=1.0,
                 rotation=80,
                 shift=0.2,
                 scale=1,
                 interpolation=cv2.INTER_NEAREST,
                 call=2)

100%|████████████████████████████████████████| 769/769 [00:03<00:00, 206.97it/s]
100%|█████████████████████████████████████| 1538/1538 [00:00<00:00, 1617.09it/s]
100%|█████████████████████████████████████| 1617/1617 [00:01<00:00, 1268.43it/s]


In [27]:
# MEL
image_data = glob('PData/train/mel/*')
out_data = out_path+'mel'
verticalflip(image_data,
                 out_data,
                 augment = True,
                 resize=[128,128],
                 random=1.0)
vf_images = glob('augmented/mel/*')
brightness_contrast(vf_images,
                        out_data,
                        augment = True,
                        resize=[128,128],
                        random=1.0,
                        brightness=0.2, 
                        contrast=0.2,
                        by_max=False)
r_images = glob('augmented/mel/*')
# shuffle and take only required number of images to perform next augmentation:
r_images_shuf = shuffling(r_images)
target = 4693
available = len(r_images)
r_selected = r_images_shuf[0:target-available]
shiftscalerotate(r_selected,
                 out_data,
                 augment = True,
                 resize = [128,128],
                 random=1.0,
                 rotation=80,
                 shift=0.2,
                 scale=1,
                 interpolation=cv2.INTER_NEAREST,
                 call=1)

100%|████████████████████████████████████████| 779/779 [00:03<00:00, 246.57it/s]
100%|█████████████████████████████████████| 1558/1558 [00:01<00:00, 1481.92it/s]
100%|█████████████████████████████████████| 1577/1577 [00:01<00:00, 1521.57it/s]


In [28]:
# NV
# Selecting remaining images by shuffling all and selecting required
nv_images = glob('PData/train/nv/*')
for image in nv_images:
    shutil.copyfile(image, f'augmented/nv/{image.split("/")[-1]}')

## Deleting Non-Recoverable Files
