In [None]:
# install dependencies
!pip3 install tqdm      #shows a smart progress meter while executing the loops in code
!pip3 install pillow --upgrade      #for image processing

In [None]:
# import libraries

%matplotlib inline

import numpy as np
import os
import tarfile    #for reading and writing tar archive files
import urllib     #for handling urls
import shutil     #for high level operations on files and collection of files
import json       #javascript object notation
import random
import boto3      #to create, configure and maange aws services such as EC2 and S3
import sagemaker  #for training and deploying machine learning models on amazon sagemaker

from tqdm import tqdm
from sagemaker.amazon.amazon_estimator import get_image_uri
import matplotlib.pyplot as plt
from xml.etree import ElementTree as ET     #to read and manipulate xml tree structures
from PIL import Image, ImageDraw, ImageFont

In [None]:
## Download and Extract the Data

In [None]:
# url for the oxford IIIT pets dataset
urls = ["http://www.robots.ox.ac.uk/~vgg/data/pets/data/images.tar.gz",
       "http://www.robots.ox.ac.uk/~vgg/data/pets/data/annotations.tar.gz"]

In [None]:
#function to download and extract the data

def download_and_extract(data_dir, download_dir):
    
    """Function to download and extract the Oxford IIIT Pets Dataset.
    Input args: data_dir = directory where the data will be downloaded
                download_dir = url for the data to be downloaded"""
    
    for url in urls:
        target_file = url.split("/")[-1]    #access the file name from the url
        if target_file not in os.listdir(download_dir):
            print("Downloading", url)
            urllib.request.urlretrieve(url, os.path.join(download_dir, target_file))  #retrieve the url in a temporary location in disk
            tf = tarfile.open(url.split("/")[-1])
            tf.extractall(data_dir)
        else:
            print("Data already downloaded", url)

In [None]:
# make a data directory if it does not exist
if not os.path.isdir("data"):
    os.mkdir("data")

In [None]:
# download and extract the data
download_and_extract("data", ".")

In [None]:
## Extract the annotations from the XML Format

In [None]:
Now that the data is downloaded we have acquired the images for training our model. But the annotations are in XML format. If we look into one of the XML files we will see it contains the object class information and also the bound boxes which indicates the size of the face of the pet object that we will train our model on. So, the next step becomes extracting these information from the xml files.

In [None]:
xml_dir = "data/annotations/xmls"    #define the xml directory
xml_files = [os.path.join(xml_dir, x) for x in os.listdir(xml_dir) if x[-3:] == "xml"]  #make a list of paths of xml files
xml_files[0]   #print the first element of the list

In [None]:
len(xml_files)

In [None]:
# define the classes of cat and dog and give them corresponding class id
classes = ["cat", "dog"]
categories = [
    {
        "class_id":0,
        "name":"cat"
    },
    {
        "class_id":1,
        "name":"dog"
    }
]

In [None]:
In the next step we extract the files from xml_files and convert them in a json format. This is because when we use mxnet in aws sagemaker it expects the data to be structured in a json file format. So, now we convert the xml annotations to json annotations.

In [None]:
def convert_annotations_xml_to_json(xml_file_path):
    
    """Function to convert the xml annotations into json annotations"""
    
    tree = ET.parse(xml_file_path)
    root = tree.getroot()
    annotation = {}
    
    annotation["file"] = root.find("filename").text
    annotation["categories"] = categories
    size = root.find("size")
    annotation["image_size"] = [{
        "width": int(size.find("width").text),
        "height": int(size.find("height").text),
        "depth": int(size.find("depth").text)
    }]
    
    annotation["annotations"] = []
    
    for item in root.iter("object"):
        class_id = classes.index(item.find("name").text)
        ymin, xmin, ymax, xmax = None, None, None, None
        
        for box in item.findall("bndbox"):
            xmin = int(box.find("xmin").text)
            ymin = int(box.find("ymin").text)
            xmax = int(box.find("xmax").text)
            ymax = int(box.find("ymax").text)
            
            if all([xmin, ymin, xmax, ymax]) is not None:
                annotation["annotations"].append({
                    "class_id":class_id,
                    "left":xmin,
                    "top":ymin,
                    "width":xmax - xmin,
                    "height":ymax - ymin
                })
                
    return annotation

In [None]:
convert_annotations_xml_to_json(xml_files[0])

In [None]:
## Data Visualization

In [None]:
def plot_image(plt, annot, image_file_path, rows, cols, index):
    
    """Function to plot image from the data"""
    
    img = Image.open(image_file_path)
    draw = ImageDraw.Draw(img)
    font = ImageFont.truetype("/usr/share/fonts/dejavu/DejaVuSerif-Bold.ttf", 20)
    
    for a in annot["annotations"]:
        box = [
            int(a["left"]), int(a["top"]),
            int(a["left"]) + int(a["width"]),
            int(a["top"]) + int(a["height"])
        ]
        draw.rectangle(box, outline = "yellow", width = 4)
        draw.text((box[0], box[1]), classes[int(a["class_id"])], font = font)
    plt.subplot(rows, cols, index + 1)
    plt.xticks([])
    plt.yticks([])
    plt.imshow(img)
    
    return plt

In [None]:
def show_random_annotations(plt):
    
    """Function to show random annotations from the data"""
    
    plt.figure(figsize=(16,16))
    for i in range(0,9):
        index = random.randint(0, len(xml_files)-1)
        annot = convert_annotations_xml_to_json(xml_files[index])
        image_file_path = os.path.join("data/images/", annot["file"])
        
        plt = plot_image(plt, annot, image_file_path, 3, 3, i)
        
    plt.show()

In [None]:
We see from the random image plots that all the faces are annotated. The bound box allows us to annotate only the faces. We also see that the boxes also contain the class of the image i.e., cat and dog.

In [None]:
## Setting up SageMaker

In [None]:
role = sagemaker.get_execution_role()     # set up an execution role
bucket = "intekhabpetsdata"    # bucket to store all the training data

# training image is a docker image where all our code will be executed during training
training_image = get_image_uri(boto3.Session().region_name, "object-detection", repo_version = "latest")
print(training_image)

In [None]:
# create blank folders according to the sagemaker object-detection algorithm. the structure of the folders is unique
# and must be the same for every case

folders = ["train", "train_annotation", "validation", "validation_annotation"]

for folder in folders:
    if os.path.isdir(folder):
        shutil.rmtree(folder)
    os.mkdir(folder)

In [None]:
After doing this we need to create the bucket in S3 so that we can store all our data there. At this stage we go to AWS console and create a bucket with the same name that we already created in the code.

In [None]:
## Preparation of Data for SageMaker

In [None]:
# check the length of our dataset
print("Total number of samples:", len(xml_files))

In [None]:
for xml_file in tqdm(xml_files):
    target_set = "train" if random.randint(0,99) < 75 else "validation" # randomly select samples for train and validation
    annot = convert_annotations_xml_to_json(xml_file) # extract the annotations for each file
    image_file_path = os.path.join("data/images/", annot["file"])
    image_target_path = os.path.join(target_set, annot["file"])
    shutil.copy(image_file_path, image_target_path)
    json_file_path = os.path.join(target_set + "_annotation", annot["file"][:-3] + "json")
    with open(json_file_path, "w") as f:
        json.dump(annot, f)

In [None]:
train_images = os.listdir("train")
train_annots = os.listdir("train_annotation")
print(len(train_images), len(train_annots))

In [None]:
# check if the image name in train and annotation folder correspondingly match with each other
for image in train_images:
    key = image.split(".")[0]
    json_file = key + ".json"
    if json_file not in train_annots:
        print("Not found", json_file)

In [None]:
## Upload the Data to S3 Bucket

In [None]:
We upload the data to S3 now so that when we start our training session with SageMaker, SageMaker will create a virtual machine. All our computations will be carried out inside this virtual machine and the data required for the training process will be directly downloaded from the S3 bucket to the virtual machine.

In [None]:
sess = sagemaker.Session()    #create a sagemaker session to upload the data

print("Uploading data...")
s3_train_path = sess.upload_data(path = "train", bucket = bucket, key_prefix = "train")
print("Training images uploaded!")
s3_validation_path = sess.upload_data(path = "validation", bucket = bucket, key_prefix = "validation")
print("Validation images uploaded!")
s3_train_annot_path = sess.upload_data(path = "train_annotation", bucket = bucket, key_prefix = "train_annotation")
print("Train annontations uploaded!")
s3_validation_annot_path = sess.upload_data(path = "validation_annotation", bucket = bucket,
                                            key_prefix = "validation_annotation")
print("Validation annotations uploaded!")

In [None]:
s3_train_path

In [None]:
s3_validation_path

In [None]:
s3_train_annot_path

In [None]:
s3_validation_annot_path

In [None]:
## SageMaker Estimator
SageMaker Estimator is a high level API which will handle the training process.

In [None]:
model = sagemaker.estimator.Estimator(
    training_image,
    role = role,
    train_instance_type = "ml.p3.2xlarge",   #16 GB GPU
    train_instance_count = 1,
    train_volume_size = 100,                 #100 GB
    train_max_run = 36000,                   # After 36000 seconds force stop
    input_mode = "File",                     # When using json annotations we use file input mode
    output_path = "s3://intekhabpetsdata/output",    # Store the data in a new folder called output in the bucket
    sagemaker_session = sess
)

In [None]:
## Set up the Hyperparameters
These hyperparameters are specific to the object-detection algorithm in SageMaker

In [None]:
model.set_hyperparameters(
    base_network = "resnet-50",    # for objet detection we can use resnet-50 or vgg
    num_classes = 2,               # cat and dog
    use_pretrained_model = 1,      # 1 means we will load the pretrained weights
    mini_batch_size = 16,
    epochs = 15,
    learning_rate = 0.001,
    optimizer = "sgd",             # stochastic gradient descent
    lr_scheduler_step = "10",
    lr_scheduler_factor = 0.1,
    momentum = 0.9,
    weight_decay = 0.0005,
    overlap_threshold = 0.5,
    nms_threshold = 0.45,
    image_shape = 512,
    num_training_samples = len(train_annots)
)

In [None]:
## Specifying the Data Channels
We need to specify the paths to access the data for training the model.

In [None]:
train_data = sagemaker.session.s3_input(s3_train_path, distribution = "FullyReplicated",
                                       content_type = "application/x-image", s3_data_type = "S3Prefix")
validation_data = sagemaker.session.s3_input(s3_validation_path, distribution = "FullyReplicated",
                                       content_type = "application/x-image", s3_data_type = "S3Prefix")
train_annotation_data = sagemaker.session.s3_input(s3_train_annot_path, distribution = "FullyReplicated",
                                       content_type = "application/x-image", s3_data_type = "S3Prefix")
validation_annotation_data = sagemaker.session.s3_input(s3_validation_annot_path, distribution = "FullyReplicated",
                                       content_type = "application/x-image", s3_data_type = "S3Prefix")

In [None]:
data_channels = {
    "train":train_data,
    "validation":validation_data,
    "train_annotation":train_annotation_data,
    "validation_annotation":validation_annotation_data
}

In [None]:
data_channels

In [None]:
## Training the Model

In [None]:
model.fit(inputs=data_channels, logs=True)