<h1>Data Management</h1>

<h2>Library Imports & Configurations</h2>

In [86]:
from azure.cognitiveservices.vision.customvision.training import (
    CustomVisionTrainingClient
)
from azure.cognitiveservices.vision.customvision.prediction import (
    CustomVisionPredictionClient
)
from azure.cognitiveservices.vision.customvision.training.models import (
    ImageUrlCreateBatch,
    ImageUrlCreateEntry, 
    Region,
    Tag
)
from azure.storage.blob import ContainerClient
from msrest.authentication import ApiKeyCredentials
import os, time, uuid, json, urllib, glob, shutil
import matplotlib.pyplot as plt
import matplotlib.image as mpimg

In [78]:
base_dir = "/home/otiose/repos/epita/iazu/data-management/"
config_path = base_dir + "config.json"
dtmgt_config = json.load(open(config_path, 'r'))

<h2>Listing Files in Storage Account Container</h2>

In [71]:
container = ContainerClient.from_container_url(config["container_url"])

for tag in config["tags"]:
    blobs_list = container.list_blobs(name_starts_with=config["image_prefix"] + 
                                  tag["dir"])
    blobs = [blob.name for blob in blobs_list]

In [72]:
config_path = "/home/otiose/repos/epita/iazu/cognitive-api/config.json"

In [73]:
config = json.load(open(config_path, 'r'))

In [74]:
credentials = ApiKeyCredentials(in_headers={"Training-key": 
                                            config["training_key"]})
trainer = CustomVisionTrainingClient(config["endpoint"], credentials)
prediction_credentials = ApiKeyCredentials(
    in_headers={"Prediction-key": config["prediction_key"]}
)
predictor = CustomVisionPredictionClient(config["endpoint"], 
                                         prediction_credentials)

In [75]:
print ("Creating project...")

domains = trainer.get_domains()
project = trainer.create_project(
    "Mushroom Classifier Final", 
    classification_type="Multiclass",
    domain_id = domains[8].id
)

Creating project...


<h2>Uploading Images from Storage Container to Custom Vision</h2>

In [97]:
from os import listdir
from os.path import isfile, join

image_list = []

# deleting tags
for tag in trainer.get_tags(project.id):
    trainer.delete_tag(project.id, tag.id)
    
for tag_entry in dtmgt_config["tags"]:
    tag = trainer.create_tag(project.id, tag_entry["tag"])
    blobs_list = container.list_blobs(
        name_starts_with=dtmgt_config["image_prefix"] + tag_entry["dir"]
    )
    blob_urls = [blob.name for blob in blobs_list]
    for url in blob_urls[1:]:
        image_list.append(ImageUrlCreateEntry(
            url=dtmgt_config["container_url"] + url, 
            tag_ids=[tag.id]
        ))
        

In [98]:
batch_size = 64
image_batches = [image_list[i:i + batch_size] for i in range(0, len(image_list), 
                                                             batch_size)] 

In [103]:
print("Uploading images...")

# delete previous images
trainer.delete_images(project.id, all_images=True, all_iterations=True)

for batch in image_batches:
    upload_result = trainer.create_images_from_urls(
        project.id,
        ImageUrlCreateBatch(images=batch)
    )

Uploading images...


In [104]:
print ("Training...")
iteration = trainer.train_project(project.id)
while (iteration.status != "Completed"):
    iteration = trainer.get_iteration(project.id, iteration.id)
    time.sleep(10)

Training...


In [105]:
publish_iteration_name = "Mushroom Classifier"
trainer.publish_iteration(project.id, iteration.id, publish_iteration_name,
                          config["prediction_resource_id"]);

In [115]:
iteration_performance = trainer.get_iteration_performance(project.id, iteration.id, threshold=0.5)
print("Precision:", iteration_performance.precision)
print("Recal:", iteration_performance.recall)
print("Average Precision:", iteration_performance.average_precision)

Precision: 0.8385995
Recal: 0.77261996
Average Precision: 0.86605173
