# Lab 1: Image Augmentation Lab

In this lab we will walkthrough a typical Amazon SageMaker Datawrangler flow for creating augmented images.

In [None]:
%matplotlib inline

In [None]:
import sagemaker
import matplotlib.pyplot as plt
from PIL import Image
bucket=sagemaker.Session().default_bucket()
prefix = 'computer_vision-unicorn_gym/image_augmentation_data'

In [None]:
!mkdir image_augmentation_data

Unpack our dataset

In [None]:
!tar -xzvf 130_tree_sparrow.tar.gz -C ./image_augmentation_data --no-same-owner

## Upload dataset to S3

In [None]:
!aws s3 cp --recursive  image_augmentation_data s3://{bucket}/{prefix}

In [None]:
# !rm -rf image_augmentation_data

In [None]:
# Your S3 Bucket
print("Your S3 Bucket Name: ", bucket)

## Image Processing in Data Wrangler

1. Right click on `Image-DataWrangler_Capabilities.flow` file, open with `Editor`
2. Replace the **all** instances of the string `BUCKET_NAME` with your bucket name for the `s3Uri` or `output_path` variable

![Update Bucket](img/update_bucket.png)

3. Save the bucket file and open the `Image-DataWrangler_Capabilities.flow` file with Amazon SageMaker DataWrangler
4. Create a data wrangler flow job

## Download and preview augmented images

In [None]:
import os

augmented_images_base = 'augmented_data'

color_suffix = 'color'
blur_suffix = 'blur'
corrupt_suffix = 'corrupt'
enhanced_suffix = 'enhanced'

color_dir = os.path.join(augmented_images_base, color_suffix)
blur_dir = os.path.join(augmented_images_base, blur_suffix)
corrupt_dir = os.path.join(augmented_images_base, corrupt_suffix)
enhanced_dir = os.path.join(augmented_images_base, enhanced_suffix)

if not os.path.exists(augmented_images_base):
    os.mkdir(augmented_images_base)

if not os.path.exists(color_dir):
    os.mkdir(color_dir)

if not os.path.exists(blur_dir):
    os.mkdir(blur_dir)
    
if not os.path.exists(corrupt_dir):
    os.mkdir(corrupt_dir)

if not os.path.exists(enhanced_dir):
    os.mkdir(enhanced_dir)

In [None]:
prefix = 'computer_vision-unicorn_gym/image_augmentation_output_data_wrangler'

!aws s3 cp --recursive s3://{bucket}/{prefix}/{color_suffix} {color_dir}
!aws s3 cp --recursive s3://{bucket}/{prefix}/{blur_suffix} {blur_dir}
!aws s3 cp --recursive s3://{bucket}/{prefix}/{corrupt_suffix} {corrupt_dir}
!aws s3 cp --recursive s3://{bucket}/{prefix}/{enhanced_suffix} {enhanced_dir}

In [None]:
import matplotlib.pyplot as plt
from PIL import Image

In [None]:
sample_filename = 'Tree_Sparrow_0005_122949.jpg'

In [None]:
def get_sample(input_dir, sample_name):
    for root, dirs, files in os.walk(input_dir):
        for file in files:
            if sample_name == file:
                return os.path.join(root, file)

In [None]:
get_sample(color_dir, sample_filename)

In [None]:
original_img = Image.open(os.path.join('image_augmentation_data', '130_tree_sparrow', sample_filename))
blur_img = Image.open(get_sample(blur_dir, sample_filename))
color_img = Image.open(get_sample(color_dir, sample_filename))
corrupt_img = Image.open(get_sample(corrupt_dir, sample_filename))
enhanced_img = Image.open(get_sample(enhanced_dir, sample_filename))

In [None]:
plt.figure(figsize=(15,50))
plt.subplot(1,5,1)
plt.imshow(original_img)
plt.title("Original image")
plt.axis("off")
plt.subplot(1,5, 2)
plt.imshow(blur_img)
plt.title("Blur image")
plt.axis("off")
plt.subplot(1,5, 3)
plt.imshow(color_img)
plt.title("Colorized image")
plt.axis("off")
plt.subplot(1,5, 4)
plt.imshow(corrupt_img)
plt.title("Corrupt image")
plt.axis("off")
plt.subplot(1,5, 5)
plt.imshow(enhanced_img)
plt.title("Enhanced image")
plt.axis("off")
plt.show()

## Clean up resource

In [None]:
!aws s3 rm --recursive s3://{BUCKET_NAME}/computer_vision-unicorn_gym/image_augmentation_data/
!aws s3 rm --recursive s3://{bucket}/{prefix}/{color_suffix}/
!aws s3 rm --recursive s3://{bucket}/{prefix}/{blur_suffix}/
!aws s3 rm --recursive s3://{bucket}/{prefix}/{corrupt_suffix}/
!aws s3 rm --recursive s3://{bucket}/{prefix}/{enhanced_suffix}/