# Train, evaluate, and test


## Lab setup

### Install Custom Vision Service SDK

In [None]:
# Install Custom Vision Service SDK  in the current Jupyter kernel
import sys
!{sys.executable} -m pip install azure-cognitiveservices-vision-customvision

### Get example images


In [None]:
%%sh
wget -nv https://azureailabs.blob.core.windows.net/aerialsamples/aerial.zip
unzip aerial.zip

### Get the training and prediction keys
To get the keys used in this example, navigate to Custom Vision Training blade in Azure Portal and retrieve the training key from the **Keys** page under **Resource Management** section. Repeat for Custom Vision Prediction service.

In [None]:
from azure.cognitiveservices.vision.customvision.training import training_api

training_key = '<your training key>'
prediction_key = '<your prediction key>'

trainer = training_api.TrainingApi(training_key)

### Create a Custom Vision Service project

In [None]:
project_name = 'AerialClassifier'

# Check if the project with that name exists
project_id = None
for project in trainer.get_projects():
    if project.name == project_name:
        project_id = project.id
        print("Found existing project: {0}".format(project_id))
        break
# Create a new project        
if project_id == None:
    print("Creating a new project")
    project = trainer.create_project(project_name)
    project_id = project.id


## Train

### Upload and tag images 
The API used to upload images `create_images_from_files` uploads a batch of images at a time. The maximum supported batch size is 64. 

In [None]:
import os
from azure.cognitiveservices.vision.customvision.training.models import ImageFileCreateEntry, Region

# Define a utility function to upload a list of images
def upload_images(training_key, project_id, image_list, batch_size=64):
    trainer = training_api.TrainingApi(training_key)
    print("Starting upload ...")
    image_batches = [image_list[start: start+batch_size] for start in range(0, len(image_list), batch_size)]
    for batch in image_batches:
        image_entry_batch = []
        for tag, pathname, file_name in batch:
            with open(pathname, mode='rb') as image_contents:
                image_entry_batch.append(ImageFileCreateEntry(name=file_name, contents=image_contents.read(), tag_ids=[tag]))
        summary = trainer.create_images_from_files(project_id, images=image_entry_batch)
    print("Done.")
    return summary


In [None]:
# Create tags. Check for existing tags before creating new ones
tags = trainer.get_tags(project_id)
if len(tags) == 0:
    tags = [trainer.create_tag(project_id, tag) for tag in ['Barren', 'Developed', 'Cultivated']]

tag_map = {tag.name: tag.id for tag in tags}

# Upload images
base_folder = 'aerial/train'
image_list = [(tag_map[folder], os.path.join(base_folder, folder, filename), filename)  for folder in ['Barren','Cultivated', 'Developed'] for filename in os.listdir(os.path.join(base_folder, folder))]
summary = upload_images(training_key, project_id, image_list, batch_size = 64)


### Train the first iteration of the project


In [None]:
import time

def train(training_key, project_id):
    trainer = training_api.TrainingApi(training_key)
    print("Starting training...")
    try:
        iteration = trainer.train_project(project.id)
    except:
        print("No need to retrain")
        return
    
    while (iteration.status != "Completed"):
        iteration = trainer.get_iteration(project.id, iteration.id)
        print ("Training status: " + iteration.status)
        time.sleep(2)

    # The iteration is now trained. Make it the default project endpoint
    trainer.update_iteration(project_id, iteration.id, is_default=True)
    print("Done")
    return iteration.id

In [None]:
# Start training
iteration_id = train(training_key, project_id)

### Get iteration performance 

In [None]:
def display_iteration_performance(training_key, project_id, iteration_id):
    trainer = training_api.TrainingApi(training_key)
    performance = trainer.get_iteration_performance(project_id, iteration_id)
    print("Overall Precision: {0:<10}".format(performance.precision))
    print("Overall Recall:    {0:<10}".format(performance.recall))
    for tag_perf in performance.per_tag_performance:
        print("Tag: {0:<15} Precision: {1:<10}   Recall: {2:<10}".format(tag_perf.name, tag_perf.precision, tag_perf.recall))

In [None]:
display_iteration_performance(training_key, project_id, iteration_id)

### Upload additional images

In [None]:
# Upload images
base_folder = 'aerial/train'
folder = 'Developed-SecondBatch'
image_list = [(tag_map['Developed'], os.path.join(base_folder, folder, filename), filename)  for filename in os.listdir(os.path.join(base_folder, folder))]

summary = upload_images(training_key, project_id, image_list, batch_size = 64)

### Re-train the project


In [None]:
# Start training
iteration_id = train(training_key, project_id)

In [None]:
display_iteration_performance(training_key, project_id, iteration_id)

## Test

### Download test images

In [None]:
%%sh
mkdir test_images
cd test_images
wget -nv https://github.com/jakazmie/AIDays/raw/master/DeveloperTrack/01-CustomVisionService/samples/barren-1.png
wget -nv https://github.com/jakazmie/AIDays/raw/master/DeveloperTrack/01-CustomVisionService/samples/cultivated-1.png
wget -nv https://github.com/jakazmie/AIDays/raw/master/DeveloperTrack/01-CustomVisionService/samples/developed-1.png

### Display test images

The images we will use for testing are located in the `samples` folder.

In [None]:
import os
import numpy as np
from PIL import Image
import matplotlib.pyplot as plt
from matplotlib.pyplot import imshow
%matplotlib inline

images_dir = 'test_images'
images = [os.path.join(images_dir, file) for file in os.listdir(images_dir)]

figsize=(10, 8)
fig, axis = plt.subplots(len(images)//3, 3, figsize=figsize)
fig.tight_layout()
for ax, image_path in zip(axis.flat[0:], images):
    image = Image.open(image_path)
    ax.get_xaxis().set_visible(False)
    ax.get_yaxis().set_visible(False)
    ax.imshow(image)

 ### Test with `curl`
 

In [None]:
%env PROJECT_ID=$project_id
%env PREDICTION_KEY=$prediction_key

In [None]:
%%sh

curl -X POST https://southcentralus.api.cognitive.microsoft.com/customvision/v2.0/Prediction/$PROJECT_ID/image -H "Prediction-Key: $PREDICTION_KEY"  -H "Content-Type: application/octet-stream" --data-binary @test_images/developed-1.png

### Call the prediction endpoint using Python SDK

Python SDK wraps the prediction endpoint in the `prediction_endpoint` class. The class exposes the `predict_image` method that takes a Python File Object as parameter. The following code snippet defines a utility function `classify_image` that invokes the prediction endpoint and parses the results returned from the service.

In [None]:
from azure.cognitiveservices.vision.customvision.prediction import prediction_endpoint
from azure.cognitiveservices.vision.customvision.prediction.prediction_endpoint import models

def classify_image(project_id, prediction_key, image_path):
    predictor = prediction_endpoint.PredictionEndpoint(prediction_key)
    with open(image_path, mode='rb') as image:
      result = predictor.predict_image(project_id, image)    
    probs = [prediction.probability for prediction in result.predictions]
    max_prob = max(probs)
    max_index = probs.index(max_prob)
    tag = result.predictions[max_index].tag_name

    return tag, max_prob

We will now invoke the prediction endpoint and display the results returned by the service.

In [None]:
figsize=(10, 8)
fig, axis = plt.subplots(len(images)//3, 3, figsize=figsize)
fig.tight_layout()
for ax, image_path in zip(axis.flat[0:], images):
    tag, prob = classify_image(project_id, prediction_key, image_path)
    ax.set_title(tag + ': ' + str(prob))
    ax.get_xaxis().set_visible(False)
    ax.get_yaxis().set_visible(False)
    image = Image.open(image_path)
    ax.imshow(image)