# Making sense of the world through vision

In [None]:
# Computer Vision
from azure.cognitiveservices.vision.computervision import ComputerVisionClient
from azure.cognitiveservices.vision.computervision.models import OperationStatusCodes
from azure.cognitiveservices.vision.computervision.models import VisualFeatureTypes
from msrest.authentication import CognitiveServicesCredentials

# Custom Vision
from azure.cognitiveservices.vision.customvision.training import CustomVisionTrainingClient
from azure.cognitiveservices.vision.customvision.prediction import CustomVisionPredictionClient
from azure.cognitiveservices.vision.customvision.training.models import ImageFileCreateEntry, ImageFileCreateBatch
from msrest.authentication import ApiKeyCredentials

# Face API
from azure.cognitiveservices.vision.face import FaceClient
from msrest.authentication import CognitiveServicesCredentials
from azure.cognitiveservices.vision.face.models import TrainingStatusType, Person

# Speech API
from azure.cognitiveservices.speech import AudioDataStream, SpeechConfig, SpeechSynthesizer, SpeechSynthesisOutputFormat
from azure.cognitiveservices.speech.audio import AudioOutputConfig
import azure.cognitiveservices.speech as speechsdk

# Other
import matplotlib.pyplot as plt
import matplotlib.patches as patches
import random
import numpy as np
import time
import os
from PIL import Image #, ImageDraw
from mpl_toolkits.axes_grid1 import AxesGrid
from IPython.display import display
from IPython.display import Image as IPythonImage
from IPython.display import Audio as IPythonAudio

In [None]:
# Login into our Azure Subscription
# az login --use-device-code

# Create a resource group
! az group create -n Azure-Friday_RG -l westeurope --output table

In [None]:
! az cognitiveservices account create \
    --name AF-ComputerVision \
    --resource-group Azure-Friday_RG \
    --kind ComputerVision \
    --sku S1 \
    --location westeurope \
    --yes \
    --output table

! az cognitiveservices account keys list \
    --name AF-ComputerVision --resource-group Azure-Friday_RG \
    --query key1

! az cognitiveservices account show \
    --name AF-ComputerVision --resource-group Azure-Friday_RG \
    --query properties.endpoint

# 




# Computer vision

In [None]:

subscription_key = "<INSERT KEY>"

endpoint = "https://westeurope.api.cognitive.microsoft.com/" 

computervision_client = ComputerVisionClient(endpoint, CognitiveServicesCredentials(subscription_key))

## Describe what is on an image

In [None]:
image_url = "images/amsterdam-gaa24fa0bd_1280.jpg"

In [None]:
display(IPythonImage(filename=image_url))

In [None]:
with open(os.path.join(image_url), "rb") as image_stream:
    description_results = computervision_client.describe_image_in_stream(image_stream)

    for description in description_results.captions:
        print(description.text, "| Confidence: ","%.2f" % description.confidence)

## Detect what is on the image

In [None]:
# Detect objects in the Images
with open(os.path.join(image_url), "rb") as image_stream:
    detect_objects_results_remote = computervision_client.detect_objects_in_stream(image_stream)

    im = plt.imread(image_url)

    # Create figure and axes
    fig = plt.figure(figsize = (im.shape[1]/80, im.shape[0]/80))
    ax = plt.axes((0,0,1,1))

    # Display the image
    ax.imshow(im,origin='upper')

    # Overlay the information
    for object in detect_objects_results_remote.objects:
        color = (np.random.rand(),np.random.rand(),np.random.rand())
        rect = patches.Rectangle((object.rectangle.x, object.rectangle.y), 
                                 object.rectangle.w, object.rectangle.h, 
                                 linewidth=2, edgecolor=color, facecolor='none')
        ax.add_patch(rect)

        ax.text(
            (1/im.shape[1]*object.rectangle.x), 1-(1/im.shape[0]*object.rectangle.y), object.object_property,
            horizontalalignment='left',
            verticalalignment='bottom',
            fontsize=16,
            color='w',
            backgroundcolor=color,
            transform=ax.transAxes
        )
    ax.axis('off')
    plt.show()

In [None]:
bank_note_img = "bank-notes/Test/2Thousandnote/3.jpg"

with open(os.path.join(bank_note_img), "rb") as image_stream:
    
    # detect objects in the image
    detect_objects_results_remote = computervision_client.detect_objects_in_stream(image_stream)

    im = plt.imread(bank_note_img)

    # Create figure and axes
    fig = plt.figure(figsize = (im.shape[1]/100, im.shape[0]/100))
    ax = plt.axes((0,0,1,1))

    # Display the image
    ax.imshow(im,origin='upper')

    # Overlay the information
    for object in detect_objects_results_remote.objects:
        color = (np.random.rand(),np.random.rand(),np.random.rand())
        rect = patches.Rectangle((object.rectangle.x, object.rectangle.y), 
                                 object.rectangle.w, object.rectangle.h, 
                                 linewidth=2, edgecolor=color, facecolor='none')
        ax.add_patch(rect)

        ax.text(
            (1/im.shape[1]*object.rectangle.x), 1-(1/im.shape[0]*object.rectangle.y), object.object_property,
            horizontalalignment='left',
            verticalalignment='bottom',
            fontsize=16,
            color='w',
            backgroundcolor=color,
            transform=ax.transAxes
        )
    ax.axis('off')
    plt.show()

# 




# Detect your own objects

## Train our own model using Azure Custom Vision

In [None]:
! az cognitiveservices account create \
    --name AF-CustomVision \
    --kind CustomVision.Training \
    --sku S0 \
    --resource-group Azure-Friday_RG \
    --location westeurope \
    --yes \
    --output table

! az cognitiveservices account keys list \
    --name AF-CustomVision \
    --resource-group Azure-Friday_RG \
    --query key1
    
! az cognitiveservices account show  \
    --name AF-CustomVision  \
    --resource-group Azure-Friday_RG  \
    -o json  \
    --query properties.endpoint


In [None]:
# Training credentials
training_cv_key = "<INSERT KEY>"
cv_endpoint = "https://westeurope.api.cognitive.microsoft.com"

# Location for the training images
training_images = "bank-notes/train"

In [None]:
# Connect to the training endpoint
credentials = ApiKeyCredentials(in_headers={"Training-key": training_cv_key})
trainer = CustomVisionTrainingClient(endpoint=cv_endpoint, credentials=credentials)

In [None]:
for domain in trainer.get_domains():
    print(domain.id, "\t", domain.name) 

In [None]:
# Create a new project using the standard domain
project = trainer.create_project("Indian Bank Notes V1", domain_id="0732100f-1a38-4e49-a514-c9b44c697ab5")

In [None]:
# Show the dataset

path = r"bank-notes/train"
random_filenames = []
for tag in os.listdir(path):
    random_filenames.append(path+"/"+tag+"/"+random.choice([
        x for x in os.listdir(os.path.join(path,tag))
        if os.path.isfile(os.path.join(path,tag, x))
    ]))

grid = AxesGrid(plt.figure(1, (20,20)), 111, nrows_ncols=(2, 4), axes_pad=0, label_mode="1")

i = 0
for img_name in random_filenames[0:10]:
    im = plt.imread(img_name)
    grid[i].imshow(im,aspect='auto', extent=(0,1,0,0.8), alpha=1, origin='upper', zorder=-1)
    i = i+1

In [None]:
# Upload the images in batches
image_list = []
directories = os.listdir(training_images)

for tagName in directories:
    tag = trainer.create_tag(project.id, tagName)
    images = os.listdir(os.path.join(training_images,tagName))
    for img in images:
        with open(os.path.join(training_images,tagName,img), "rb") as image_contents:
            image_list.append(ImageFileCreateEntry(name=img, contents=image_contents.read(), tag_ids=[tag.id]))  
            
def chunks(l, n):
    for i in range(0, len(l), n):
        yield l[i:i + n]
batchedImages = chunks(image_list, 64)

for batchOfImages in batchedImages:
    upload_result = trainer.create_images_from_files(project.id, ImageFileCreateBatch(images=batchOfImages))
    if not upload_result.is_batch_successful:
        print("Image batch upload failed.")
        for image in upload_result.images:
            print("Image status: ", image.status)
    else:
        print("Batch uploaded successfully")
print("Done uploading")

In [None]:
# Train the model
print ("Start Training...")
iteration = trainer.train_project(project.id)

In [None]:
# Check for completion
while (iteration.status != "Completed"):
    iteration = trainer.get_iteration(project.id, iteration.id)
    print ("Training status: " + iteration.status)
    print ("Waiting 10 seconds...")
    time.sleep(10)
print("Training complete")

In [None]:
platform = "ONNX"
flavor = "ONNX12"
iteration_id =  iteration.id 
project_id =  project.id 
export = trainer.export_iteration(project_id, iteration_id , platform, flavor, raw=True)

In [None]:
while (export.status == "Exporting"):
    print ("Waiting 5 seconds...")
    time.sleep(5)
    exports = trainer.get_exports(project.id, iteration_id)
    # Locate the export for this iteration and check its status  
    for e in exports:
        if e.platform == export.platform and e.flavor == export.flavor:
            export = e
            break
    print("Export status is: ", export.status)

print("Export: done")

In [None]:
# Quickly download a previous export
iteration_id =  "11c31b66-6c9c-4b4f-98bf-84e73827a56d" # iteration.id 
project_id =  "e4165ef1-ab02-4909-a05d-2cf924125ccb" # project.id 

platform = "ONNX"
flavor = "ONNX12"

exports = trainer.get_exports(project_id, iteration_id)
for e in exports:
    if e.platform == platform:
        export = e

In [None]:
import os
import requests
import zipfile

# now we can download it
export_file = requests.get(export.download_uri)
with open("export.zip", "wb") as file:
    file.write(export_file.content)
        
# Unzip the downloaded export
if not os.path.exists("./model"):
    os.mkdir("./model");
zip_ref = zipfile.ZipFile("export.zip", 'r')
zip_ref.extractall("./model")
zip_ref.close()
print("Data extracted in: ./model")

In [None]:
import onnxruntime as nxrun
import numpy as np
import PIL
from PIL import Image

training_images = "./bank-notes/TestImages"
model_path = "./model/model.onnx"

sess = nxrun.InferenceSession(model_path)

testimages = os.listdir(training_images)

grid = AxesGrid(plt.figure(1, (20,20)), 111, nrows_ncols=(1, 4), axes_pad=0, label_mode="1")

i = 0

for image_filepath in testimages[0:5]:
    image = PIL.Image.open(os.path.join(training_images,image_filepath)).resize([224,224])
    input_array = np.array(image, dtype=np.float32)[np.newaxis, :, :, :]
    input_array = input_array.transpose((0, 3, 1, 2))[:, (2, 1, 0), :, :]

    input_name = sess.get_inputs()[0].name
    outputs = sess.run(None, {input_name: input_array.astype(np.float32)})
    
    im = plt.imread(os.path.join(training_images,image_filepath))
    grid[i].imshow(im,aspect='auto', extent=(0,1,0,0.8), alpha=1, origin='upper', zorder=-1)
    
    grid[i].set_title(outputs[0][0][0], fontdict=None, loc='center', color = "k")
    i = i+1

# 





# Get more insights on Faces

In [None]:
! az cognitiveservices account create \
    --name AF-Face \
    --resource-group Azure-Friday_RG \
    --kind Face \
    --sku S0 \
    --location westeurope \
    --yes \
    --output table

! az cognitiveservices account keys list \
    --name AF-Face --resource-group Azure-Friday_RG \
    --query key1

! az cognitiveservices account show \
    --name AF-Face --resource-group Azure-Friday_RG \
    --query properties.endpoint

In [None]:
face_api_key = "<INSERT KEY>"
face_api_endpoint = "https://westeurope.api.cognitive.microsoft.com/"

In [None]:
# Create an authenticated FaceClient.
face_client = FaceClient(face_api_endpoint, CognitiveServicesCredentials(face_api_key))

In [None]:
mf_image = "face-photos/tech-a11y-crew.jpg"
display(IPythonImage(filename=mf_image))

In [None]:
with open(os.path.join(mf_image), "rb") as image_stream:
    
    # Detect faces in images
    detected_faces = face_client.face.detect_with_stream(image_stream, return_face_attributes=[
                    'age',  # Could have been the string 'age'
                    'gender',
                    'smile',
                    'facialHair',
                    'glasses',
                    'emotion',
                    'hair',
                    'makeup',
                    'accessories'
                ])
    
    # Display the result
    pil_img = Image.open(mf_image)
    for face in detected_faces: 
        img2 = pil_img.crop((face.face_rectangle.left, face.face_rectangle.top, face.face_rectangle.left+face.face_rectangle.width, face.face_rectangle.top+face.face_rectangle.height))
        display(img2)
        print (f'Face id: {face.face_id}')
        print (f'Gender: {face.face_attributes.gender}')
        print (f'smile: {face.face_attributes.smile}')
        print (f'age: {face.face_attributes.age}')
        print (f'facial_hair moustache: {face.face_attributes.facial_hair.moustache}')
        print (f'facial_hair beard: {face.face_attributes.facial_hair.beard}')
        print (f'facial_hair sideburns: {face.face_attributes.facial_hair.sideburns}')
        print (f'glasses: {face.face_attributes.glasses}')
        print (f'eye_makeup: {face.face_attributes.makeup.eye_makeup}')
        print (f'lip_makeup: {face.face_attributes.makeup.lip_makeup}')
        print (f'emotion: {face.face_attributes.emotion}')
        print(" ==")

    print()

# Save this ID for use in Find Similar
first_image_face_ID = detected_faces[0].face_id

## Train the face API to recognize people

In [None]:
# Show the dataset
path = r"face-photos/train"
random_filenames = []
for train_img in os.listdir(path):
    random_filenames.append(os.path.join(path, train_img))

grid = AxesGrid(plt.figure(1, (20,20)), 111, nrows_ncols=(1, 5), axes_pad=0, label_mode="1")

i = 0
for img_name in random_filenames[0:10]:
    im = plt.imread(img_name)
    grid[i].imshow(im,aspect='auto', extent=(0,0.8,0,1), alpha=1, origin='upper', zorder=-1)
    i = i+1

In [None]:
PERSON_GROUP_ID = "tech-a11y-crew"
face_client.person_group.delete(person_group_id=PERSON_GROUP_ID)
face_client.person_group.create(person_group_id=PERSON_GROUP_ID, name=PERSON_GROUP_ID)

In [None]:
path = r"face-photos/train"

for person in os.listdir(path):
    name = person.partition(".")[0]
    print("Adding:"+name)
    w = open(os.path.join(path,person), 'r+b')

    # Create a person
    person = face_client.person_group_person.create(PERSON_GROUP_ID, name)

    # Add a face to the person
    face_client.person_group_person.add_face_from_stream(PERSON_GROUP_ID, person.person_id, w)

In [None]:
# Train the person group
face_client.person_group.train(PERSON_GROUP_ID)

while (True):
    training_status = face_client.person_group.get_training_status(PERSON_GROUP_ID)
    print("Training status: {}.".format(training_status.status))
    if (training_status.status is TrainingStatusType.succeeded):
        break
    elif (training_status.status is TrainingStatusType.failed):
        face_client.person_group.delete(person_group_id=PERSON_GROUP_ID)
        sys.exit('Training the person group has failed.')
    time.sleep(2)

## Identify people in the image

In [None]:
with open(os.path.join(mf_image), "rb") as image_stream:
    # Detect faces
    face_ids = []
    # We use detection model 3 to get better performance.
    faces = face_client.face.detect_with_stream(image_stream, detection_model='detection_03')
    for face in faces:
        face_ids.append(face.face_id)
        print(f'found face: {face.face_id}')

In [None]:
# Identify faces
results = face_client.face.identify(face_ids, PERSON_GROUP_ID)

identified_persons = {}

for person in results:
    for candidate in person.candidates:
        identified_person = face_client.person_group_person.get(PERSON_GROUP_ID,candidate.person_id)
        print("Found: "+identified_person.name)
        identified_persons[person.face_id] = identified_person

In [None]:
# Show the result

im = plt.imread(mf_image)

# Create figure and axes
fig = plt.figure(figsize = (im.shape[1]/70, im.shape[0]/70))
ax = plt.axes((0,0,1,1))

# Display the image
ax.imshow(im,origin='upper')

# Overlay the information
for face in faces:
    color = (np.random.rand(),np.random.rand(),np.random.rand())
    rect = patches.Rectangle((face.face_rectangle.left, face.face_rectangle.top), 
                             face.face_rectangle.width, face.face_rectangle.height, 
                             linewidth=3, edgecolor=color, facecolor='none')
    ax.add_patch(rect)

    if face.face_id in identified_persons:
        ax.text(
            (1/im.shape[1]*face.face_rectangle.left), 1-(1/im.shape[0]*face.face_rectangle.top), 
            "{}".format(identified_persons[face.face_id].name),
            horizontalalignment='left', verticalalignment='bottom', fontsize=16, color='w', backgroundcolor=color, transform=ax.transAxes
        )
ax.axis('off')
plt.show()

# 




## Read text in images

In [None]:
handwriting_image_url = "images/handwriting.jpg"

In [None]:
display(IPythonImage(filename=handwriting_image_url))

In [None]:
print("===== Start =====")
# Call API with URL and raw response (allows you to get the operation location)
with open(os.path.join(handwriting_image_url), "rb") as image_stream:
    read_response = computervision_client.read_in_stream(image_stream,  raw=True)

read_operation_location = read_response.headers["Operation-Location"]
# Grab the ID from the URL
operation_id = read_operation_location.split("/")[-1]

# Call the "GET" API and wait for it to retrieve the results 
while True:
    read_result = computervision_client.get_read_result(operation_id)
    if read_result.status not in ['notStarted', 'running']:
        break
    time.sleep(1)

print("===== Done =====")

In [None]:
im = plt.imread(handwriting_image_url)

# Create figure and axes
fig = plt.figure(figsize = (im.shape[1]/100, im.shape[0]/100))
ax = plt.axes((0,0,1,1))

# Display the image
ax.imshow(im,origin='upper')

full_text = ""
for text_result in read_result.analyze_result.read_results:
    for line in text_result.lines:
        color = (np.random.rand(),np.random.rand(),np.random.rand())
        rect = patches.Rectangle((line.bounding_box[0], line.bounding_box[1]), 
                             line.bounding_box[2]-line.bounding_box[0], line.bounding_box[5]-line.bounding_box[1], 
                             linewidth=6, edgecolor=color, facecolor='none')
        ax.add_patch(rect)
        #print(line.text)
        full_text+=line.text + " "
    
print(full_text)
ax.axis('off')
plt.show()

In [None]:
! az cognitiveservices account create \
    --name AF-Speech \
    --resource-group Azure-Friday_RG \
    --kind SpeechServices \
    --sku S0 \
    --location westeurope \
    --yes \
    --output table

! az cognitiveservices account keys list \
    --name AF-Speech --resource-group Azure-Friday_RG \
    --query key1

! az cognitiveservices account show \
    --name AF-Speech --resource-group Azure-Friday_RG \
    --query properties.endpoint

In [None]:
speech_key = "<INSERT KEY>"
service_region = "westeurope"

In [None]:
speech_config = SpeechConfig(subscription=speech_key, region=service_region)
speech_config.speech_synthesis_language = "en-GB" 
speech_config.speech_synthesis_voice_name ="en-GB-LibbyNeural"

In [None]:
sentence = full_text

audio_file=f'{speech_config.speech_synthesis_voice_name}.wav'
audio_config = AudioOutputConfig(filename=audio_file)
synthesizer = SpeechSynthesizer(speech_config=speech_config, audio_config=audio_config)
synthesizer.speak_text(sentence)

In [None]:
IPythonAudio(audio_file,autoplay=True)