In [None]:
from keras.applications.resnet50 import ResNet50
from keras.preprocessing import image
from keras.applications.resnet50 import preprocess_input, decode_predictions
from keras.models import Model
import numpy as np
from os import listdir, walk
from os.path import isfile, join
import itertools

import joblib
import os
from requests import post, put
from PIL import Image
from io import BytesIO
import matplotlib.pyplot as plt


# Extract features and generate pkl file

In [None]:
# Here we extract all the features from our training set and add to pkl file for mounting to the PowerSkill API
def getAllFilesInDirectory(directoryPath: str):
    return [(directoryPath + "/" + f) for f in listdir(directoryPath) if isfile(join(directoryPath, f))]

def predict(img_path : str, model: Model):
    img = image.load_img(img_path, target_size=(224, 224))
    x = image.img_to_array(img)
    x = np.expand_dims(x, axis=0)
    x = preprocess_input(x)
    return model.predict(x)

path = ""
file_name = "../models/stanford_dogs.pkl"
def build_feature_vectors(path, file_name):
    
    feature_vectors: dict = {}
    model = ResNet50(weights='imagenet')
    for img_path in getAllFilesInDirectory("../data"):
        if ".DS_Store" in img_path:   # Ignore the MacOs datastore file
            continue
        print(img_path)
        feature_vectors[img_path] = predict(img_path,model)[0]
    
    joblib.dump(value=feature_vectors, filename=os.path.join(path, file_name))
    
build_feature_vectors(path, file_name)
print(f"Model {file_name} trained")

# Test our dogs on our local running API

In [None]:
API_KEY = "YourSecretKeyCanBeAnything"

def find_similar(dog):

    try:
        headers = {
            # Request headers
            "Content-Type": "application/json",
            "Ocp-Apim-Subscription-Key": API_KEY,
        }

        body = {
        "values": [
            {
                "recordId": "0",
                "data": {
                    "images": {
                        "data": mountain_dog_test
                    }
                }
            }
        ]
    }

        url = f"http://0.0.0.0:5000/api/extraction"

        resp = post(url=url, json=body, headers=headers)

        result_response = resp.json()
    
        return result_response

    except Exception as e:
        print('Exception', e)
        
    return result_response
        
# Download test data 
with open("../data/test/mountain_dog4.txt") as m:
    mountain_dog_test = m.read()
  
result_response = find_similar(mountain_dog_test)
print("You should see three mountain dogs")

for file in result_response['values'][0]['data']['most_similar']:
    image_data = open(file, "rb").read()
    image = Image.open(BytesIO(image_data))
    plt.imshow(image)
    plt.axis("off")
    _ = plt.title("", size="x-large", y=-0.1)
    plt.show()
    


# Deploy the PowerSkill to Azure Search¶

The first step is to upload the data files in the data folder to a container in Azure blob storage and get the connection values to create the ACS data source.
You will need your ACS API Key and the URL for your ACS instance.

## Deploy to Azure Search

### Create the data source

In [None]:
# Let's create a data source
API_KEY = ""   # Your ACS API Key
ACS_URL = "https://[].search.windows.net"   # Your ACS URL format https://[your ACS instance].search.windows.net
DATA_SOURCE = "image-similarity-datasource"  # The name for your data source
CONTAINER_NAME = "dogs"
KEY= "[YourSecretKeyCanBeAnything]" # Set the KEY value you deployed your Web App with [YourSecretKeyCanBeAnything]
WEB_APP_URL = "https://[].azurewebsites.net"  # This is the deployed web app format: URL https://[appname].azurewebsites.net


json_text = {
    "name" : DATA_SOURCE,
    "type" : "azureblob",
    "credentials" : { "connectionString" : ""},    #  This is your azure blob connection string
    "container" : { "name" : CONTAINER_NAME }  # The name of the container where the data files are
}

headers = {
    "api-key": API_KEY,     
    "Content-Type": "application/json",

}

try:
    url = f"{ACS_URL}/datasources?api-version=2020-06-30"
    resp = post(url=url, json=json_text, headers=headers)
    
    result_response = resp.json()
    if resp.status_code == 403:
        print("Authorisation Failed: Check that your API KEY value is correct")
        
        
    if resp.status_code == 201:
        print("Success creating data source")
        
except Exception as e:
    print('Exception creating data source', e)

### Now we create the index

In [None]:
INDEX_NAME = "image-similarity-index"  # The name for the index

json_text = {
      "name" : INDEX_NAME,
      "fields": [
        { "name": "id", "type": "Edm.String", "key": True, "searchable": False },
        { "name": "url", "type": "Edm.String", "searchable": True },
        { "name": "file_name", "type": "Edm.String", "searchable": False },
        { "name": "size", "type": "Edm.Int64", "searchable": False },
        { "name": "last_modified", "type": "Edm.DateTimeOffset", "searchable": False },
        { "name": "content", "type": "Edm.String", "searchable": True, "filterable": False, "sortable": False, "facetable": False },
        { "name": "most_similar", "type": "Collection(Edm.String)", "searchable": True, "filterable": False, "sortable": False, "facetable": False },
        { "name": "images", "type": "Collection(Edm.String)", "searchable": True, "filterable": False, "sortable": False, "facetable": False },
        {
            "name": "categories",
            "type": "Collection(Edm.ComplexType)",
            "fields": [
                {
                    "name": "name",
                    "type": "Edm.String",
                    "searchable": True,
                    "filterable": False,
                    "facetable": False
                },
                {
                    "name": "score",
                    "type": "Edm.Double",
                    "searchable": False,
                    "filterable": False,
                    "facetable": False
                },
                {
                    "name": "detail",
                    "type": "Edm.ComplexType",
                    "fields": [
                        {
                            "name": "celebrities",
                            "type": "Collection(Edm.ComplexType)",
                            "fields": [
                                {
                                    "name": "name",
                                    "type": "Edm.String",
                                    "searchable": True,
                                    "filterable": False,
                                    "facetable": False
                                },
                                {
                                    "name": "faceBoundingBox",
                                    "type": "Collection(Edm.ComplexType)",
                                    "fields": [
                                        {
                                            "name": "x",
                                            "type": "Edm.Int32",
                                            "searchable": False,
                                            "filterable": False,
                                            "facetable": False
                                        },
                                        {
                                            "name": "y",
                                            "type": "Edm.Int32",
                                            "searchable": False,
                                            "filterable": False,
                                            "facetable": False
                                        }
                                    ]
                                },
                                {
                                    "name": "confidence",
                                    "type": "Edm.Double",
                                    "searchable": False,
                                    "filterable": False,
                                    "facetable": False
                                }
                            ]
                        },
                        {
                            "name": "landmarks",
                            "type": "Collection(Edm.ComplexType)",
                            "fields": [
                                {
                                    "name": "name",
                                    "type": "Edm.String",
                                    "searchable": True,
                                    "filterable": False,
                                    "facetable": False
                                },
                                {
                                    "name": "confidence",
                                    "type": "Edm.Double",
                                    "searchable": False,
                                    "filterable": False,
                                    "facetable": False
                                }
                            ]
                        }
                    ]
                }
            ]
        },
        {
            "name": "description",
            "type": "Collection(Edm.ComplexType)",
            "fields": [
                {
                    "name": "tags",
                    "type": "Collection(Edm.String)",
                    "searchable": True,
                    "filterable": False,
                    "facetable": False
                },
                {
                    "name": "captions",
                    "type": "Collection(Edm.ComplexType)",
                    "fields": [
                        {
                            "name": "text",
                            "type": "Edm.String",
                            "searchable": True,
                            "filterable": False,
                            "facetable": False
                        },
                        {
                            "name": "confidence",
                            "type": "Edm.Double",
                            "searchable": False,
                            "filterable": False,
                            "facetable": False
                        }
                    ]
                }
            ]
        },
        {
            "name": "faces",
            "type": "Collection(Edm.ComplexType)",
            "fields": [
                {
                    "name": "age",
                    "type": "Edm.Int32",
                    "searchable": False,
                    "filterable": False,
                    "facetable": False
                },
                {
                    "name": "gender",
                    "type": "Edm.String",
                    "searchable": False,
                    "filterable": False,
                    "facetable": False
                },
                {
                    "name": "faceBoundingBox",
                    "type": "Collection(Edm.ComplexType)",
                    "fields": [
                        {
                            "name": "x",
                            "type": "Edm.Int32",
                            "searchable": False,
                            "filterable": False,
                            "facetable": False
                        },
                        {
                            "name": "y",
                            "type": "Edm.Int32",
                            "searchable": False,
                            "filterable": False,
                            "facetable": False
                        }
                    ]
                }
            ]
        },
        {
            "name": "tags",
            "type": "Collection(Edm.ComplexType)",
            "fields": [
                {
                    "name": "name",
                    "type": "Edm.String",
                    "searchable": True,
                    "filterable": False,
                    "facetable": False
                },
                {
                    "name": "confidence",
                    "type": "Edm.Double",
                    "searchable": False,
                    "filterable": False,
                    "facetable": False
                }
            ]
        }
      ]
}

try:
    url = f"{ACS_URL}/indexes?api-version=2020-06-30"
    resp = post(url=url, json=json_text, headers=headers)
    
    result_response = resp.json()
    if resp.status_code == 403:
        print("Authorisation Failed: Check that your API KEY value is correct")
        
    if resp.status_code == 400:
        print(f"Error", resp.text)    
            
    if resp.status_code == 201:
        print("Success creating index")
        
except Exception as e:
    print('Exception creating index', e)

### Now we create the skill set

In [None]:
# Note we are passing in the secret header key and the inference API URL to the skillset
SKILLSET_NAME = "image-similarity-skillset"  # The name of your skillset
COGSVC_KEY = ""  #  This is your Cognitive Services key that resides in the same region as ACS (used to compare custom vision captions and object detection)
WEB_APP_URL = "https://[].azurewebsites.net"
KEY= "yoursecretkey"

json_text = {
    "description": "Crack documents.",
    "skills": [
        {
            "@odata.type": "#Microsoft.Skills.Vision.ImageAnalysisSkill",
            "context": "/document/normalized_images/*",
            "defaultLanguageCode": "en",
            "visualFeatures": [
                "tags",
                "categories",
                "description",
                "faces",
                "brands"
            ],
            "inputs": [
                {
                    "name": "image",
                    "source": "/document/normalized_images/*"
                }
            ],
            "outputs": [
                {
                    "name": "categories"
                },
                {
                    "name": "tags"
                },
                {
                    "name": "description"
                },
                {
                    "name": "faces"
                },
                {
                    "name": "brands"
                }
            ]
        },
        {
            "@odata.type": "#Microsoft.Skills.Custom.WebApiSkill",
            "description": "A custom skill that finds the topn most similar images",
            "uri": f"{WEB_APP_URL}/api/extraction",
            "timeout": "PT160S",
            "batchSize": 1,
            "context": "/document/normalized_images/*",
            "httpHeaders": {
                "Ocp-Apim-Subscription-Key": KEY
            },
            "httpMethod": "POST",
            "inputs": [
            {
                "name": "images",
                    "source": "/document/normalized_images/*"
                }
            ],
            "outputs": [
                {
                    "name": "most_similar"
                }
            ]
        }
    ],
    "cognitiveServices": {
        "@odata.type": "#Microsoft.Azure.Search.CognitiveServicesByKey",
        "description": "cogsvc",
        "key": COGSVC_KEY
    }
}



try:
    url = f"{ACS_URL}/skillsets/{SKILLSET_NAME}?api-version=2020-06-30"
    resp = put(url=url, json=json_text, headers=headers)
    
    result_response = resp.json()
    if resp.status_code == 403:
        print("Authorisation Failed: Check that your API KEY value is correct")
    
    if resp.status_code == 400:
        print(f"Error", resp.text)

    if resp.status_code == 201:
        print("Success creating skillset")
        
except Exception as e:
    print('Exception creating skillset', e)


### Now we create the indexer

In [None]:
json_text = {
    "name": "image-similarity-indexer",
    "dataSourceName": DATA_SOURCE,
    "targetIndexName": INDEX_NAME,
    "skillsetName": SKILLSET_NAME,
    "parameters": {
        "configuration": {
            "allowSkillsetToReadFileData": True,
            "imageAction": "generateNormalizedImagePerPage"
        }
    },
    "outputFieldMappings": [
        {
            "sourceFieldName": "/document/normalized_images/*/most_similar",
            "targetFieldName": "most_similar"
        },
        {
            "sourceFieldName": "/document/normalized_images/*/categories/*",
            "targetFieldName": "categories"
        },
        {
            "sourceFieldName": "/document/normalized_images/*/tags/*",
            "targetFieldName": "tags"
        },
        {
            "sourceFieldName": "/document/normalized_images/*/description",
            "targetFieldName": "description"
        },
        {
            "sourceFieldName": "/document/normalized_images/*/faces/*",
            "targetFieldName": "faces"
        }
    ]
}


try:
    url = f"{ACS_URL}/indexers?api-version=2020-06-30"
    resp = post(url=url, json=json_text, headers=headers)
    
    result_response = resp.json()
    if resp.status_code == 403:
        print("Authorisation Failed: Check that your API KEY value is correct")

    if resp.status_code == 400:
        print(f"Error", resp.text)
        
    if resp.status_code == 201:
        print("Success creating indexer")
        
except Exception as e:
    print('Exception creating indexer', e)

### Let's go and test the ACS index 

In [None]:
import os
from azure.core.credentials import AzureKeyCredential
from azure.search.documents import SearchClient
import base64
import io
%matplotlib inline
import matplotlib.pyplot as plt
from PIL import Image
from io import BytesIO
from IPython.display import Markdown, display
import ntpath


def printmd(string):
    display(Markdown(string))

SEARCH_ENDPOINT = "https://[].search.windows.net"

search_term = "dog"

index_name = INDEX_NAME
# Get the service endpoint and API key from the environment
endpoint = SEARCH_ENDPOINT

# Create a client
credential = AzureKeyCredential(API_KEY)
client = SearchClient(endpoint=endpoint,
                      index_name=index_name,
                      credential=credential)


results = client.search(search_text=search_term, top=30)

for result in results:
    try:
        dog = str(base64.urlsafe_b64decode(result["id"] + '=' * (4 - len(result["id"]) % 4)))
    except Exception as InvalidBase64:
        continue
    dog_index = str(dog).find("dogs/")
    image_path = "../data/" + dog[dog_index+5:].replace("\\r", "").replace("'", "")
    try:
        image_data = open(image_path, "rb").read()
        image = Image.open(BytesIO(image_data))
    except Exception as FileExt:
        image_data = open(image_path[:-1], "rb").read()
        image = Image.open(BytesIO(image_data))
    plt.imshow(image)
    plt.axis("off")
    _ = plt.title("", size="x-large", y=-0.1)
    plt.show()
    
    printmd('**Custom Vision Tags**')
    printmd(' '.join(result['description'][0]['tags']))
    
    printmd('**Custom Vision Caption**')
    printmd(result['description'][0]['captions'][0]['text'])
    
    printmd('**Most Similar**')

    ms = result['most_similar'][0].replace("[", "").replace("]", "").split(",")
    for similiar_dog in ms:
        if ntpath.basename(similiar_dog).replace('"', '') == ntpath.basename(image_path).replace('"', '') or ntpath.basename(similiar_dog).replace('"', '') == ntpath.basename(image_path)[:-1].replace('"', ''):
            continue
        simage_data = open(os.path.join(similiar_dog.replace('"', '')), "rb").read()
        simage = Image.open(BytesIO(simage_data))
        plt.imshow(simage)
        plt.axis("off")
        _ = plt.title("", size="x-small", y=-0.4)
        plt.show()
    printmd(f'**Next {search_term}**')