In [1]:
# Using Object detection feature of Custom Vision Services 
# In this demo we'll use json output of VoTT Tool 
# Before starting you need to install customvision api using pip install

In [4]:
!pip install azure-cognitiveservices-vision-customvision



In [26]:
#Create Custom Vision Service Project
from azure.cognitiveservices.vision.customvision.training import CustomVisionTrainingClient
from azure.cognitiveservices.vision.customvision.training.models import ImageFileCreateEntry, Region

ENDPOINT = "https://southcentralus.api.cognitive.microsoft.com"

# My Custom Vision AI Keys
training_key = "MY_TRAINING_KEY"
prediction_key = "MY_PREDICTION_KEY"

trainer = CustomVisionTrainingClient(training_key, endpoint=ENDPOINT)

# Find the object detection domain
obj_detection_domain = next(domain for domain in trainer.get_domains() if domain.type == "ObjectDetection")

# Create a new project
print ("Creating project...")
project = trainer.create_project("OfficeDrink", domain_id=obj_detection_domain.id)

Creating project...


In [27]:
import json
import os
import sys

#FileName of your folder and jsonfile created by VoTT
FileName = "Office"

#import the json file
json_file = FileName + ".json"
with open(json_file,'r',encoding="utf-8") as f:
    data = json.load(f)

#import all tags in json file
inputTags = data["inputTags"].split(",")
print(inputTags)

['Tea', 'TurkishCoffee']


In [28]:
#Create all Tags
for tag in inputTags:
    trainer.create_tag(project.id, tag)
    print(tag)

Tea
TurkishCoffee


In [29]:
imgtags = {}

#Get all tagsIDs created on custom vision and add into a dictionary
for tag in trainer.get_tags(project.id):
    imgtags[tag.name] = tag.id

print(imgtags)

{'TurkishCoffee': '1db90cae-6c57-4bd4-a44f-fa7a7d7e3f2d', 'Tea': '986a6b45-62f0-424c-87b9-53da918076cf'}


In [30]:
#Help to sort alphanumeric
import re

def sorted_aphanumeric(data):
    convert = lambda text: int(text) if text.isdigit() else text.lower()
    alphanum_key = lambda key: [ convert(c) for c in re.split('([0-9]+)', key) ] 
    return sorted(data, key=alphanum_key)

In [31]:
imgtagdic = {}

# Group all the photos by tags using their ROIs(Region of Images)
# image pixel normalization for Custom Vision
# tleft = x1/w 
# ttop = y1/h
# twidth = (x2-x1)/w
# theight = (y2-y1)/h
for a in data["visitedFrames"]:
    print(a)
    try:
        for key in data["frames"][str(a)]:
            print(key)
            x1 = key["x1"]
            y1 = key["y1"]
            x2 = key["x2"]
            y2 = key["y2"]        
            h = key["height"]
            w = key["width"]
            tleft = x1/w 
            ttop = y1/h
            twidth = (x2-x1)/w
            theight = (y2-y1)/h
            try:
                tag = key["tags"][0]
                # Defining UniqueID per tags in photo below  
                imgtagdic[str(a)+','+ str(key["name"])+ tag] = [imgtags[tag],tleft,ttop,twidth,theight]
            except:
                print(str(a)+ tag +"error")
    except:
        print("Error Occured")

#Print all 
print(imgtagdic)

0
{'width': 789, 'tags': ['Tea'], 'x2': 547, 'y1': 78, 'type': 'Rectangle', 'height': 592, 'y2': 404, 'id': 0, 'x1': 94, 'name': 1}
{'width': 789, 'tags': ['TurkishCoffee'], 'x2': 553, 'y1': 123, 'type': 'Rectangle', 'height': 592, 'y2': 498, 'id': 52, 'x1': 68, 'name': 1}
1
{'width': 789, 'tags': ['Tea'], 'x2': 564, 'y1': 131, 'type': 'Rectangle', 'height': 592, 'y2': 478, 'id': 1, 'x1': 87, 'name': 1}
2
{'width': 789, 'tags': ['Tea'], 'x2': 550, 'y1': 139, 'type': 'Rectangle', 'height': 592, 'y2': 488, 'id': 2, 'x1': 87, 'name': 1}
3
{'width': 789, 'tags': ['Tea'], 'x2': 553, 'y1': 123, 'type': 'Rectangle', 'height': 592, 'y2': 498, 'id': 3, 'x1': 68, 'name': 1}
4
{'width': 789, 'tags': ['Tea'], 'x2': 564, 'y1': 143, 'type': 'Rectangle', 'height': 592, 'y2': 408, 'id': 4, 'x1': 162, 'name': 1}
5
{'width': 789, 'tags': ['Tea'], 'x2': 517, 'y1': 178, 'type': 'Rectangle', 'height': 592, 'y2': 491, 'id': 5, 'x1': 74, 'name': 1}
6
{'width': 789, 'tags': ['Tea'], 'x2': 515, 'y1': 163, 'typ

In [32]:
len(imgtagdic)

47

In [33]:
imgtagdic

{'0,1Tea': ['986a6b45-62f0-424c-87b9-53da918076cf',
  0.11913814955640051,
  0.13175675675675674,
  0.5741444866920152,
  0.5506756756756757],
 '0,1TurkishCoffee': ['1db90cae-6c57-4bd4-a44f-fa7a7d7e3f2d',
  0.08618504435994931,
  0.20777027027027026,
  0.614702154626109,
  0.6334459459459459],
 '1,1Tea': ['986a6b45-62f0-424c-87b9-53da918076cf',
  0.11026615969581749,
  0.22128378378378377,
  0.6045627376425855,
  0.5861486486486487],
 '10,1Tea': ['986a6b45-62f0-424c-87b9-53da918076cf',
  0.21166032953105196,
  0.2922297297297297,
  0.4740177439797212,
  0.4239864864864865],
 '11,1Tea': ['986a6b45-62f0-424c-87b9-53da918076cf',
  0.2256020278833967,
  0.27533783783783783,
  0.46514575411913817,
  0.41047297297297297],
 '12,1Tea': ['986a6b45-62f0-424c-87b9-53da918076cf',
  0.26878238341968913,
  0.30569948186528495,
  0.422279792746114,
  0.37132987910189985],
 '13,1Tea': ['986a6b45-62f0-424c-87b9-53da918076cf',
  0.08365019011406843,
  0.23986486486486486,
  0.5627376425855514,
  0.52027

In [34]:
import os

allphotos = sorted_aphanumeric(os.listdir(FileName+ "/"))

# Go through the data table above and create the images
print ("images are ready")

images are ready


In [35]:
print(len(allphotos))

46


In [36]:
print(allphotos)

['IMG_0758.JPG', 'IMG_0759.JPG', 'IMG_0760.JPG', 'IMG_0761.JPG', 'IMG_0762.JPG', 'IMG_0763.JPG', 'IMG_0764.JPG', 'IMG_0765.JPG', 'IMG_0766.JPG', 'IMG_0767.JPG', 'IMG_0768.JPG', 'IMG_0769.JPG', 'IMG_0770.JPG', 'IMG_0771.JPG', 'IMG_0772.JPG', 'IMG_0773.JPG', 'IMG_0774.JPG', 'IMG_0775.JPG', 'IMG_0776.JPG', 'IMG_0777.JPG', 'IMG_0778.JPG', 'IMG_0779.JPG', 'IMG_0780.JPG', 'IMG_0781.JPG', 'IMG_0782.JPG', 'IMG_0784.JPG', 'IMG_0785.JPG', 'IMG_0786.JPG', 'IMG_0787.JPG', 'IMG_0788.JPG', 'IMG_0789.JPG', 'IMG_0790.JPG', 'IMG_0791.JPG', 'IMG_0792.JPG', 'IMG_0793.JPG', 'IMG_0794.JPG', 'IMG_0795.JPG', 'IMG_0796.JPG', 'IMG_0797.JPG', 'IMG_0798.JPG', 'IMG_0799.JPG', 'IMG_0800.JPG', 'IMG_0801.JPG', 'IMG_0802.JPG', 'IMG_0803.JPG', 'IMG_0804.JPG']


In [43]:
tagged_images_with_regions = []

for key in sorted_aphanumeric(imgtagdic):
    tagid,x,y,w,h = imgtagdic[key]
    regions = [ Region(tag_id=tagid, left=x,top=y,width=w,height=h) ]

    try:
        print(key.replace(',','-'), len(regions))
        print(key + " / " + allphotos[int(key.split(',')[0])])
        print("-----")
        with open(FileName+"/" + allphotos[int(key.split(',')[0])], mode="rb") as image_contents:
            tagged_images_with_regions.append(ImageFileCreateEntry(name=key.replace(',','-'), contents=image_contents.read(), regions=regions))
    except:
        print("Error occured")


0-1Tea 1
0,1Tea / IMG_0758.JPG
-----
0-1TurkishCoffee 1
0,1TurkishCoffee / IMG_0758.JPG
-----
1-1Tea 1
1,1Tea / IMG_0759.JPG
-----
2-1Tea 1
2,1Tea / IMG_0760.JPG
-----
3-1Tea 1
3,1Tea / IMG_0761.JPG
-----
4-1Tea 1
4,1Tea / IMG_0762.JPG
-----
5-1Tea 1
5,1Tea / IMG_0763.JPG
-----
6-1Tea 1
6,1Tea / IMG_0764.JPG
-----
7-1Tea 1
7,1Tea / IMG_0765.JPG
-----
8-1Tea 1
8,1Tea / IMG_0766.JPG
-----
9-1Tea 1
9,1Tea / IMG_0767.JPG
-----
10-1Tea 1
10,1Tea / IMG_0768.JPG
-----
11-1Tea 1
11,1Tea / IMG_0769.JPG
-----
12-1Tea 1
12,1Tea / IMG_0770.JPG
-----
13-1Tea 1
13,1Tea / IMG_0771.JPG
-----
14-1Tea 1
14,1Tea / IMG_0772.JPG
-----
15-1Tea 1
15,1Tea / IMG_0773.JPG
-----
16-1Tea 1
16,1Tea / IMG_0774.JPG
-----
17-1Tea 1
17,1Tea / IMG_0775.JPG
-----
18-1Tea 1
18,1Tea / IMG_0776.JPG
-----
19-1Tea 1
19,1Tea / IMG_0777.JPG
-----
20-1Tea 1
20,1Tea / IMG_0778.JPG
-----
21-1Tea 1
21,1Tea / IMG_0779.JPG
-----
22-1Tea 1
22,1Tea / IMG_0780.JPG
-----
23-1Tea 1
23,1Tea / IMG_0781.JPG
-----
24-1TurkishCoffee 1
24,1Tur

In [38]:
len(tagged_images_with_regions)

47

In [39]:
# For large photosets
# Upload all photos with groups of 10 photos  
for a in range(int(len(tagged_images_with_regions)/10)+1):  
        print("Progress: " + str(a) +"/"+str(int(len(tagged_images_with_regions)/10)))
        trainer.create_images_from_files(project.id, images=tagged_images_with_regions[(a*10):((a*10)+10)])
        

Progress: 0/4
Progress: 1/4
Progress: 2/4
Progress: 3/4
Progress: 4/4


In [40]:
# Training part of the project
# Just make sure everyphoto has minimum 15 photos
# If not it'll breake
import time

print ("Training...")
iteration = trainer.train_project(project.id)
while (iteration.status != "Completed"):
    iteration = trainer.get_iteration(project.id, iteration.id)
    print ("Training status: " + iteration.status)
    time.sleep(1)

# The iteration is now trained. Make it the default project endpoint
trainer.update_iteration(project.id, iteration.id, is_default=True)
print ("Done!")

Training...
Training status: Training
Training status: Training
Training status: Training
Training status: Training
Training status: Training
Training status: Training
Training status: Training
Training status: Training
Training status: Training
Training status: Training
Training status: Training
Training status: Training
Training status: Training
Training status: Training
Training status: Training
Training status: Training
Training status: Training
Training status: Training
Training status: Training
Training status: Training
Training status: Training
Training status: Training
Training status: Training
Training status: Training
Training status: Training
Training status: Training
Training status: Training
Training status: Training
Training status: Training
Training status: Training
Training status: Training
Training status: Training
Training status: Training
Training status: Training
Training status: Training
Training status: Training
Training status: Training
Training status: Training


In [41]:
from azure.cognitiveservices.vision.customvision.prediction import CustomVisionPredictionClient

# Now there is a trained endpoint that can be used to make a prediction
predictor = CustomVisionPredictionClient(prediction_key, endpoint=ENDPOINT)

# Open the sample image and get back the prediction results.
with open("test.jpeg", mode="rb") as test_data:
    results = predictor.predict_image(project.id, test_data, iteration.id)

# Display the results.
for prediction in results.predictions:
    print ("\t" + prediction.tag_name + ": {0:.2f}%".format(prediction.probability * 100), prediction.bounding_box.left, prediction.bounding_box.top, prediction.bounding_box.width, prediction.bounding_box.height)

	TurkishCoffee: 94.15% 0.4842975 0.127631515 0.378270984 0.681416631
	TurkishCoffee: 93.35% 0.02848728 0.226901934 0.337339163 0.363547981
