# Mask R-CNN - Analyzing the data

## Importing libraries

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
import os
import numpy as np
import cv2
from imgaug import augmenters as iaa
from imgaug.augmentables.segmaps import SegmentationMapsOnImage

## Data aug

Rotação: Rotacionar a imagem em diferentes ângulos para simular variações de orientação.

Espelhamento horizontal/vertical: Espelhar a imagem horizontal ou verticalmente para criar variações de posição.

Corte aleatório: Realizar cortes aleatórios na imagem para enfatizar diferentes áreas.

Alterações de brilho e contraste: Ajustar o brilho e o contraste da imagem para simular diferentes condições de iluminação.

Alterações de cor: Alterar os componentes de cor (por exemplo, matiz, saturação, valor) para criar variações de cor.

Zoom: Aplicar zoom in/out na imagem para simular diferentes distâncias de captura.

Adição de ruído: Adicionar ruído gaussiano, por exemplo, para simular imperfeições no processo de captura.

Transformações elásticas: Aplicar transformações elásticas locais para simular deformações da pele.

Ruído de fundo: Adicionar um ruído de fundo sutil para aumentar a robustez do modelo.

Inversões: Inverter a imagem horizontal ou verticalmente.

Distorções: Aplicar distorções geométricas leves para simular variações no posicionamento da câmera.

Adição de manchas: Adicionar manchas ou marcas artificiais à imagem para simular diferentes características.

In [None]:
def apply_data_augmentation(image_name, image_dataset_path, mask_dataset_path, output_dir, num_augmentations=2, aux=0): #https://imgaug.readthedocs.io/en/latest/source/examples_segmentation_maps.html
  image_name = image_name.split(".")[0]
  output_image_path = output_dir + image_dataset_path.split("/")[-2] + "/"
  output_mask_path = output_dir + mask_dataset_path.split("/")[-2] + "/"

  if not os.path.exists(output_image_path):
    os.makedirs(output_image_path)
  if not os.path.exists(output_mask_path):
    os.makedirs(output_mask_path)

  # Carrega a imagem e as máscaras
  image = cv2.imread(f"{image_dataset_path}{image_name}.jpg")

  attributes = ["_attribute_globules", "_attribute_milia_like_cyst", "_attribute_negative_network", "_attribute_pigment_network", "_attribute_streaks"]
  masks = []

  for attribute in attributes:
    mask_path = mask_dataset_path + image_name + attribute + ".png"
    masks.append(cv2.imread(mask_path, cv2.IMREAD_GRAYSCALE))


  #https://imgaug.readthedocs.io/en/latest/source/overview_of_augmenters.html
  seq = iaa.Sequential([
      iaa.Crop(px=(32, 32, 32, 32)),               # Corte aleatório
      iaa.ContrastNormalization((0.8, 1.2)),       # Contraste
      iaa.Multiply((0.8, 1.2), per_channel=0.2),   # Alterações de brilho
      iaa.AddToHueAndSaturation((-10, 10)),        # Alterações de cor (Hue e Saturation)
      iaa.OneOf(
        [
          iaa.Affine(rotate=45),
          iaa.Affine(rotate=270),
          iaa.Affine(cval=0),
          iaa.Affine(scale=(0.8, 1.2)),
          iaa.Affine(shear=(-16, 16)),
          iaa.PerspectiveTransform(scale=(0.01, 0.15)),
          iaa.PiecewiseAffine(scale=(0.01, 0.05))
        ]
      ),
      iaa.Fliplr(0.5),
      iaa.Flipud(0.5),
      iaa.ElasticTransformation(alpha=50, sigma=5), # Transformações elásticas
      # iaa.Cutout(fill_mode="constant", cval=0)
  ], random_order=True)

  # Lista para armazenar as imagens aumentadas e suas máscaras correspondentes
  augmented_images = []
  augmented_masks = []

  # Empacota as máscaras individuais em um único array tridimensional
  masks_stacked = np.stack(masks, axis=-1)
  segmaps = SegmentationMapsOnImage(masks_stacked, shape=image.shape)

  # Aplica as transformações de data augmentation
  for _ in range(num_augmentations):
    augmented_image, augmented_segmaps = seq(image=image, segmentation_maps=segmaps)
    augmented_images.append(augmented_image)
    augmented_masks.append(augmented_segmaps.get_arr())

  # Salva as imagens aumentadas e suas máscaras
  for idx, (aug_img, aug_masks) in enumerate(zip(augmented_images, augmented_masks)):
    aug_img_filename = f"{output_image_path}{image_name}_aug_{idx+aux}.jpg"
    cv2.imwrite(aug_img_filename, aug_img)

    print(output_mask_path)

    for mask_idx, aug_mask in enumerate(np.moveaxis(aug_masks, -1, 0)):
      aug_mask_filename = f"{output_mask_path}{image_name}_aug_{idx+aux}{attributes[mask_idx]}.png"
      cv2.imwrite(aug_mask_filename, aug_mask)

  del image_name
  del output_image_path
  del output_mask_path
  del image
  del attributes
  del masks
  del seq
  del augmented_images
  del augmented_masks
  del masks_stacked
  del segmaps

  print(f"{num_augmentations} conjuntos de imagens aumentadas foram salvos em '{output_dir}'.")

### Train

In [None]:
image_dataset_path = "/content/drive/MyDrive/skin_cancer/assets/data_aug_dataset/ISIC2018_Task1-2_Training_Input/"
mask_dataset_path = "/content/drive/MyDrive/skin_cancer/assets/data_aug_dataset/ISIC2018_Task2_Training_GroundTruth_v3/"

output_dir = '/content/drive/MyDrive/skin_cancer/assets/missing_dataset/'

In [None]:
missing_training_attribute_negative_network = ['ISIC_0015189.jpg', 'ISIC_0000249.jpg', 'ISIC_0013793.jpg', 'ISIC_0013403.jpg', 'ISIC_0000092.jpg', 'ISIC_0013518.jpg', 'ISIC_0013671.jpg', 'ISIC_0011317.jpg', 'ISIC_0009934.jpg', 'ISIC_0013425.jpg', 'ISIC_0010364.jpg', 'ISIC_0012318.jpg', 'ISIC_0012768.jpg', 'ISIC_0015625.jpg', 'ISIC_0014144.jpg', 'ISIC_0001128.jpg', 'ISIC_0010241.jpg', 'ISIC_0011329.jpg', 'ISIC_0015167.jpg', 'ISIC_0015353.jpg', 'ISIC_0014833.jpg', 'ISIC_0013842.jpg', 'ISIC_0010227.jpg', 'ISIC_0000546.jpg', 'ISIC_0012221.jpg', 'ISIC_0010358.jpg', 'ISIC_0013572.jpg', 'ISIC_0000331.jpg', 'ISIC_0001163.jpg', 'ISIC_0010237.jpg', 'ISIC_0001385.jpg', 'ISIC_0000045.jpg', 'ISIC_0010361.jpg', 'ISIC_0010487.jpg', 'ISIC_0000024.jpg', 'ISIC_0010025.jpg'] #600 - 190 = 410 num_augmentations
for image_name in missing_training_attribute_negative_network: # len: 36
  apply_data_augmentation(image_name, image_dataset_path, mask_dataset_path, output_dir, num_augmentations=12)

# missing_training_attribute_streaks = ['ISIC_0014667.jpg', 'ISIC_0000098.jpg', 'ISIC_0010255.jpg', 'ISIC_0000198.jpg', 'ISIC_0011143.jpg', 'ISIC_0010587.jpg', 'ISIC_0013026.jpg', 'ISIC_0001102.jpg', 'ISIC_0014770.jpg', 'ISIC_0010251.jpg', 'ISIC_0000390.jpg', 'ISIC_0006114.jpg', 'ISIC_0000210.jpg', 'ISIC_0001100.jpg', 'ISIC_0014790.jpg']
missing_training_attribute_streaks = ['ISIC_0014667.jpg', 'ISIC_0000098.jpg', 'ISIC_0010255.jpg', 'ISIC_0000198.jpg', 'ISIC_0011143.jpg', 'ISIC_0010587.jpg', 'ISIC_0013026.jpg', 'ISIC_0001102.jpg', 'ISIC_0014770.jpg', 'ISIC_0010251.jpg', 'ISIC_0000390.jpg', 'ISIC_0006114.jpg', 'ISIC_0000210.jpg', 'ISIC_0001100.jpg'] #600 - 100 = 500 num_augmentations
for image_name in missing_training_attribute_streaks: # len: 14
  print(f"image_name: {image_name}")
  apply_data_augmentation(image_name, image_dataset_path, mask_dataset_path, output_dir, num_augmentations=12)

for image_name in missing_training_attribute_streaks: # len: 14
  print(f"image_name: {image_name}")
  apply_data_augmentation(image_name, image_dataset_path, mask_dataset_path, output_dir, num_augmentations=12, aux=12)

for image_name in missing_training_attribute_streaks: # len: 14
  print(f"image_name: {image_name}")
  apply_data_augmentation(image_name, image_dataset_path, mask_dataset_path, output_dir, num_augmentations=12, aux=24)

### Val

In [None]:
image_dataset_path = "/content/drive/MyDrive/skin_cancer/assets/data_aug_dataset/ISIC2018_Task1-2_Validation_Input/"
mask_dataset_path = "/content/drive/MyDrive/skin_cancer/assets/data_aug_dataset/ISIC2018_Task2_Validation_GroundTruth/"

output_dir = '/content/drive/MyDrive/skin_cancer/assets/missing_dataset/'

In [None]:
missing_val_attribute_globules = ['ISIC_0036236.jpg', 'ISIC_0020233.jpg'] # 20 - 19 = 1 num_augmentations
for image_name in missing_val_attribute_globules: # len: 2
  apply_data_augmentation(image_name, image_dataset_path, mask_dataset_path, output_dir, num_augmentations=1)

missing_val_attribute_milia_like_cyst = ['ISIC_0015552.jpg'] #20 - 6 = 14 num_augmentations
for image_name in missing_val_attribute_milia_like_cyst: # len: 1
  apply_data_augmentation(image_name, image_dataset_path, mask_dataset_path, output_dir, num_augmentations=14)

missing_val_attribute_negative_network = ['ISIC_0021202.jpg'] #20 - 9 = 11 num_augmentations
for image_name in missing_val_attribute_negative_network: # len: 1
  apply_data_augmentation(image_name, image_dataset_path, mask_dataset_path, output_dir, num_augmentations=11)

missing_val_attribute_streaks = ['ISIC_0019794.jpg'] #20 - 6 = 14 num_augmentations
for image_name in missing_val_attribute_streaks: # len: 1
  apply_data_augmentation(image_name, image_dataset_path, mask_dataset_path, output_dir, num_augmentations=14)


/content/drive/MyDrive/skin_cancer/assets/missing_dataset/ISIC2018_Task2_Validation_GroundTruth/
1 conjuntos de imagens aumentadas foram salvos em '/content/drive/MyDrive/skin_cancer/assets/missing_dataset/'.
/content/drive/MyDrive/skin_cancer/assets/missing_dataset/ISIC2018_Task2_Validation_GroundTruth/
1 conjuntos de imagens aumentadas foram salvos em '/content/drive/MyDrive/skin_cancer/assets/missing_dataset/'.
/content/drive/MyDrive/skin_cancer/assets/missing_dataset/ISIC2018_Task2_Validation_GroundTruth/
/content/drive/MyDrive/skin_cancer/assets/missing_dataset/ISIC2018_Task2_Validation_GroundTruth/
/content/drive/MyDrive/skin_cancer/assets/missing_dataset/ISIC2018_Task2_Validation_GroundTruth/
/content/drive/MyDrive/skin_cancer/assets/missing_dataset/ISIC2018_Task2_Validation_GroundTruth/
/content/drive/MyDrive/skin_cancer/assets/missing_dataset/ISIC2018_Task2_Validation_GroundTruth/
/content/drive/MyDrive/skin_cancer/assets/missing_dataset/ISIC2018_Task2_Validation_GroundTruth/
