# Image Augmentations for Model Training Data

## 1. Imports and GPU Check

In [None]:
# Hide output of this cell
%%capture

# Install packages
!pip install fastcore fastai --upgrade
!pip install rasterio
!pip install geopandas

# Import packages
from fastai.vision.all import *
from PIL import Image
from torchvision.transforms import ToTensor
import albumentations as A
from sklearn.model_selection import train_test_split
from skimage.io import imread, imsave
import imageio
from skimage.transform import rotate
from skimage.util import random_noise
import cv2
import rasterio.features
import shapely.geometry
import geopandas as gpd
import numpy as np
import random
import os
import shutil
from tqdm import tqdm
from matplotlib import pyplot as plt
import glob
import pytz
from datetime import datetime
from google.colab import drive

In [None]:
# Check, which GPU was allocated 
!nvidia-smi

Mon Mar 14 09:21:23 2022       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 460.32.03    Driver Version: 460.32.03    CUDA Version: 11.2     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  Tesla V100-SXM2...  Off  | 00000000:00:04.0 Off |                    0 |
| N/A   34C    P0    23W / 300W |      2MiB / 16160MiB |      0%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces

## 2. Define Required Functions and Set Parameters

In [None]:
def timestamp():
  tz = pytz.timezone('Europe/Berlin')
  date = str(datetime.now(tz)).split(" ")
  date_time = f"{date[0]}_{date[1].split('.')[0][:5]}"
  return date_time

# Mount Google Drive for Data Access
drive.mount('/content/drive')

def informal_pixels_from_file(file):
  with rasterio.open(file, 'r') as mask:
    mask = mask.read()
    settlement_pixels = np.count_nonzero(mask == 255)
    total_pixels = mask.size
    settlement_share = settlement_pixels / total_pixels
    return settlement_share

Mounted at /content/drive


## 2. Create Image Augmentations

### 2.1. Create undersampled training data for further augmentations

In [None]:
imagery_type = "aerial"
tile_type = "512_512 stride"
mask_type = "buildings"
mask_subfolder = "2019_10cm_RGB_BE_67"
path = Path(f"/content/drive/MyDrive/Segmentation Data/{imagery_type}")

dir_img = f'{path}/image_tiles/{mask_subfolder}/{tile_type}'
dir_msk = f'{path}/{mask_type}_mask_tiles/{mask_subfolder}/{tile_type}'
lbl_names = get_image_files(dir_msk)

# Drop tiles without building pixels and store in new folder '512_512 undersampled'
building_share = []
for fn in lbl_names:
  building_share.append(informal_pixels_from_file(fn))

index = 0
indices = []
for mask in building_share:
  if mask > 0:
    indices.append(index)
  index += 1

dir_img_augm = f'{path}/image_tiles/{mask_subfolder}/512_512 undersampled'
dir_msk_augm = f'{path}/{mask_type}_mask_tiles/{mask_subfolder}/512_512 undersampled'

for index in indices:
  shutil.copyfile(fnames[index], f"{dir_img_augm}/{str(fnames[index]).split('/')[-1]}")
  shutil.copyfile(lbl_names[index], f"{dir_msk_augm}/{str(lbl_names[index]).split('/')[-1]}")

### 2.2. Create Augmentations

In [None]:
## Create new folders for different train/test splits and augmentations
fnames = get_image_files(dir_img_augm)
lbl_names = get_image_files(dir_msk_augm)
root = '/content/drive/MyDrive/Segmentation Data/aerial/augmented/8'

train_img = []
train_msk = []

for i in [0.05, 0.2]:
  for tile in ['img', 'lbl']:
    for folder in ["train", "valid"]:
      local_dir = f'{root}/{i}/{tile}/{folder}'
      if not os.path.exists(local_dir):
          os.makedirs(local_dir)
      # Split images and masks
      img_train, img_test, msk_train, msk_test = train_test_split(fnames, lbl_names, test_size = i, random_state = 42)

      # Copy validation images and masks (no augmentation)
      for file in img_test:
        shutil.copyfile(file, f"{local_dir}/{str(file).split('/')[-1]}")
      for file in msk_test:
        shutil.copyfile(file, f"{local_dir}/{str(file).split('/')[-1]}")

      ## Augment training images and masks
      # Sanity check
      if len(img_train) != len(msk_train):
        print(len(img_train), len(msk_train))

      # Read images and masks in and store in lists
      train_img = []
      train_msk = []
      
      for img_path in img_train:
        img = imread(img_path)
        train_img.append(img)
      for msk_path in msk_train:
        img = imread(msk_path)
        train_msk.append(img)

      # Convert lists of read-in inmages to np arrays to speed up augmentations 
      train_img = np.array(train_img)
      train_msk = np.array(train_msk)

      # Create augmentations
      final_train_data = []
      final_target_train = []

      for i in tqdm(range(train_img.shape[0])):
          final_train_data.append(train_img[i]) # original image
          final_train_data.append(rotate(train_img[i], angle = 90)) # 90 degree flipped
          final_train_data.append(np.fliplr(train_img[i])) # left-right flipped
          final_train_data.append(np.flipud(train_img[i])) # up-down flipped
          final_train_data.append(random_noise(train_img[i], var = 0.2**2)) # random noise added
          final_train_data.append(np.fliplr(rotate(train_img[i], angle = 90))) # rotated and left-right flipped
          final_train_data.append(np.flipud(rotate(train_img[i], angle = 90))) # rotated and up-down flipped

          final_target_train.append(train_msk[i]) # original mask
          final_target_train.append(rotate(train_msk[i], angle = 90)) # 90 degree flipped
          final_target_train.append(np.fliplr(train_msk[i])) # left-right flipped
          final_target_train.append(np.flipud(train_msk[i])) # up-down flipped
          final_target_train.append(train_msk[i]) # use orignal mask since position of buildings has not changed
          final_target_train.append(np.fliplr(rotate(train_msk[i], angle = 90))) # rotated and left-right flipped
          final_target_train.append(np.flipud(rotate(train_msk[i], angle = 90))) # rotated and up-down flipped

          index = 0
          for img in final_train_data:
            imageio.imwrite(f'{dir_augm}/img/train/{index}.png', img*255.astype(np.uint8))
            index += 1

          index = 0
          for msk in final_target_train:
            imageio.imwrite(f'{dir_augm}/lbl/train/{index}.png', msk*255).astype(np.uint8))
            index += 1

100%|██████████| 207/207 [00:43<00:00,  4.72it/s]
