In [1]:
import Augmentor as aug
import glob
import os
import numpy as np
import cv2 
import PIL
from Augmentor.Operations import Operation

## Custom Lightning Class for the Data Augmentation using Augmentor Python's lib 

In [2]:
class Lightning(Operation):
    def __init__(self, probability, intensity_low=0.7, intensity_high=1.2):
        Operation.__init__(self, probability)
        # Init classes variables with default values 
        # Default values treshold intent to create a optimal range
        # Imagens cant be too dark or too brigher
        self.intensity_low = intensity_low
        self.intensity_high = intensity_high

    def perform_operation(self, images):
        for i, image in enumerate(images):
            image = np.array(image.convert('RGB'))
            row, col, _ = image.shape
            light_intensity = np.random.randint(int(self.intensity_low * 100),
                                          int(self.intensity_high * 100))
    
            light_intensity /= 100

            gaussian = 100 * np.random.random((row, col, 1))
            gaussian = np.array(gaussian, dtype=np.uint8)
            gaussian = np.concatenate((gaussian, gaussian, gaussian), axis=2)
            image = cv2.addWeighted(image, light_intensity, gaussian, 0.25, 0)

            image = PIL.Image.fromarray(image)
            images[i] = image
  
        return images

In [3]:
# Multiplier used to set the final augmented images number
MULTIPLIER=29

In [4]:
# Default dir where we can find the train dataset
TRAIN_DIRECTORY_DATASET = '/home/helpthx/TCC-1/TCC-1-UnB/downloads/*'

## Dataset classes accounting base on the diferent project folders

Each folder containg imagens from a specific class

In [5]:
folders = []
for f in glob.glob(TRAIN_DIRECTORY_DATASET):
    if os.path.isdir(f):
        folders.append(os.path.abspath(f))

print('Classes found {}'.format([os.path.split(x)[1] for x in folders]))
print('Numb: ', len([os.path.split(x)[1] for x in folders]))

Classes found ['                     Basal cell carcinoma', '                     Dermatofibroma', '                     Squamous cell carcinoma', 'Actinic Keratosis', '                     Pyogenic granuloma', '                     Seborrheic keratosis', '                     Melanocytic nevus', '                     Wart', '                     Lentigo', '                     Bowen’s disease', '                     Intraepithelial carcinoma', '                    Hemangioma', '                     Malignant melanoma']
Numb:  13


## Creating a transformation's pipeline for each class in the train dataset

In [6]:
# Dictionari to hold the abspath and class's name
pipelines = {}

In [7]:
for folder in folders:
    pipelines[os.path.split(folder)[1]] = (aug.Pipeline(
        source_directory=folder,
        output_directory='resnet_augmented',
        save_format='jpg'))

Initialised with 124 image(s) found.
Output directory set to /home/helpthx/TCC-1/TCC-1-UnB/downloads/                     Basal cell carcinoma/resnet_augmented.Initialised with 49 image(s) found.
Output directory set to /home/helpthx/TCC-1/TCC-1-UnB/downloads/                     Dermatofibroma/resnet_augmented.Initialised with 60 image(s) found.
Output directory set to /home/helpthx/TCC-1/TCC-1-UnB/downloads/                     Squamous cell carcinoma/resnet_augmented.Initialised with 24 image(s) found.
Output directory set to /home/helpthx/TCC-1/TCC-1-UnB/downloads/Actinic Keratosis/resnet_augmented.Initialised with 42 image(s) found.
Output directory set to /home/helpthx/TCC-1/TCC-1-UnB/downloads/                     Pyogenic granuloma/resnet_augmented.Initialised with 64 image(s) found.
Output directory set to /home/helpthx/TCC-1/TCC-1-UnB/downloads/                     Seborrheic keratosis/resnet_augmented.Initialised with 52 image(s) found.
Output directory set to /home/helpthx/

## Counting elements inside each class

In [8]:
classes_count = []
for p in pipelines.values():
    print("Class '{}' has {} samples".format(p.augmentor_images[0].class_label,
                                           len(p.augmentor_images)))
  
    classes_count.append(len(p.augmentor_images))

Class '                     Basal cell carcinoma' has 124 samples
Class '                     Dermatofibroma' has 49 samples
Class '                     Squamous cell carcinoma' has 60 samples
Class 'Actinic Keratosis' has 24 samples
Class '                     Pyogenic granuloma' has 42 samples
Class '                     Seborrheic keratosis' has 64 samples
Class '                     Melanocytic nevus' has 52 samples
Class '                     Wart' has 90 samples
Class '                     Lentigo' has 57 samples
Class '                     Bowen’s disease' has 19 samples
Class '                     Intraepithelial carcinoma' has 3 samples
Class '                    Hemangioma' has 40 samples
Class '                     Malignant melanoma' has 45 samples


## Making pipeline transformations based on the following roles

Table 1 - Based values for each kid of Transformation

| Transformation     | Probability |
|--------------------|-------------|
| Rotation           | 0.5         |
| Random Zoom        | 0.4         |
| Flip Horizontally  | 0.7         |
| Flip Vertically    | 0.5         |
| Random Distortion  | 0.8         |
| Lightning Variance | 0.5         |

In [9]:
# Instantiating Lighthing Class with 50 % probability 
lightning = Lightning(probability=0.5)

Creating the pipeline based on tables 1 probability's values

In [10]:
for p in pipelines.values():
  # 50 % of rotation the imagem with max left and max right
    p.rotate(probability=0.5, max_left_rotation=10, max_right_rotation=10)
  
  # 40 % of zoom inside the imagem with a 90 % cover area
    p.zoom_random(probability=0.4, percentage_area=0.9)
  
  # 70 % of mirror vertical imagem for 50 % left or rigth 
    p.flip_left_right(probability=0.7)

  # 50 % of mirror horizontal
    p.flip_top_bottom(probability=0.5)

  # Appling some distortion in the imagem
    p.random_distortion(probability=0.8, grid_width=5, grid_height=5, magnitude=15)
  
  # Custom lightning of 50 %
    p.add_operation(lightning)

  # Rezise all the imagens size for default restnet 224x224
    p.resize(probability=1.0, width=224, height=224)

## Scaling by the multiplier factor

In [12]:
# If a equal sampling of the lesions is needed
# Mind that the final MULTIPLIER can scale many times if True
SAME_SAMPLING = False
for p in pipelines.values():
    if SAME_SAMPLING:
        try: 
            diff = max(classes_count) - len(p.augmentor_images)
            p.sample((len(p.augmentor_images) + diff)*MULTIPLIER + diff)
        except:
            print(p.sample((len(p.augmentor_images) + diff)*MULTIPLIER + diff))
            pass
    else:
        try:
            p.sample(len(p.augmentor_images)*MULTIPLIER)
        except:
            print(p.sample(len(p.augmentor_images)*MULTIPLIER))
            pass

Processing <PIL.Image.Image image mode=RGB size=224x224 at 0x7F9E2C341DD8>: 100%|██████████| 3596/3596 [01:00<00:00, 59.30 Samples/s] 
Processing <PIL.Image.Image image mode=RGB size=224x224 at 0x7F9E2C3A25C0>: 100%|██████████| 1421/1421 [00:21<00:00, 66.50 Samples/s] 
Processing <PIL.Image.Image image mode=RGB size=224x224 at 0x7F9E2CC3DC50>: 100%|██████████| 1740/1740 [00:46<00:00, 37.58 Samples/s] 
Processing <PIL.Image.Image image mode=RGB size=224x224 at 0x7F9E2D7ECCF8>: 100%|██████████| 696/696 [00:22<00:00, 30.70 Samples/s] 
Processing <PIL.Image.Image image mode=RGB size=224x224 at 0x7F9E2D7398D0>:   2%|▏         | 29/1218 [00:05<03:25,  5.78 Samples/s] 
Processing <PIL.Image.Image image mode=RGB size=224x224 at 0x7F9E2DA749B0>:   1%|          | 14/1218 [00:02<03:35,  5.60 Samples/s]


AttributeError: 'OSError' object has no attribute 'message'