### DESCRIPTION:
    This example shows how to use Azure OpenAI GPT3.5 to interpret image tags extracted using Azure Cognitive Services

### REQUIREMENTS:
    Create an .env file with your OpenAI API key and save it in the root directory of this project.




In [1]:
import utils
import os
import azure.ai.vision as visionsdk

def analyze_image(image_url):
    service_options = visionsdk.VisionServiceOptions(utils.AZURE_COMPUTER_VISION_ENDPOINT, utils.AZURE_COMPUTER_VISION_KEY)

    # Specify the image file on disk to analyze. sample.jpg is a good example to show most features
    # vision_source = visionsdk.VisionSource(filename="sample.jpg")

    # Or, instead of the above, specify a publicly accessible image URL to analyze. For example:
    vision_source = visionsdk.VisionSource(url=image_url)

    analysis_options = visionsdk.ImageAnalysisOptions()

    # Mandatory. You must set one or more features to analyze. Here we use the full set of features.
    # Note that "CAPTION" and "DENSE_CAPTIONS" are only supported in Azure GPU regions (East US, France Central,
    # Korea Central, North Europe, Southeast Asia, West Europe, West US). Remove "CAPTION" and "DENSE_CAPTIONS"
    # from the list below if your Computer Vision key is not from one of those regions.
    analysis_options.features = (
        # visionsdk.ImageAnalysisFeature.CROP_SUGGESTIONS |
        visionsdk.ImageAnalysisFeature.CAPTION |
        visionsdk.ImageAnalysisFeature.DENSE_CAPTIONS |
        visionsdk.ImageAnalysisFeature.OBJECTS |
        visionsdk.ImageAnalysisFeature.PEOPLE |
        visionsdk.ImageAnalysisFeature.TEXT |
        visionsdk.ImageAnalysisFeature.TAGS
    )

    # Optional, and only relevant when you select ImageAnalysisFeature.CROP_SUGGESTIONS.
    # Define one or more aspect ratios for the desired cropping. Each aspect ratio needs
    # to be in the range [0.75, 1.8]. If you do not set this, the service will return one
    # crop suggestion with the aspect ratio it sees fit.
    # analysis_options.cropping_aspect_ratios = [0.9, 1.33]

    # Optional. Default is "en" for English. See https://aka.ms/cv-languages for a list of supported
    # language codes and which visual features are supported for each language.
    analysis_options.language = "en"
    analysis_options.model_version = "latest"
    # Set this to "true" to get a gender neutral caption (the default is "false").
    analysis_options.gender_neutral_caption = True

    # Create the image analyzer object
    image_analyzer = visionsdk.ImageAnalyzer(service_options, vision_source, analysis_options)

    # This call creates the network connection and blocks until Image Analysis results
    # return (or an error occurred). Note that there is also an asynchronous (non-blocking)
    # version of this method: image_analyzer.analyze_async().
    result = image_analyzer.analyze()

    # Checks result.
    if result.reason == visionsdk.ImageAnalysisResultReason.ANALYZED:

        print(" Image height: {}".format(result.image_height))
        print(" Image width: {}".format(result.image_width))
        print(" Model version: {}".format(result.model_version))

        if result.caption is not None:
            print(" Caption:")
            print("   '{}', Confidence {:.4f}".format(result.caption.content, result.caption.confidence))

        if result.dense_captions is not None:
            print(" Dense Captions:")
            for caption in result.dense_captions:
                print("   '{}', {}, Confidence: {:.4f}".format(caption.content, caption.bounding_box, caption.confidence))

        if result.objects is not None:
            print(" Objects:")
            for object in result.objects:
                print("   '{}', {}, Confidence: {:.4f}".format(object.name, object.bounding_box, object.confidence))

        if result.tags is not None:
            print(" Tags:")
            for tag in result.tags:
                print("   '{}', Confidence {:.4f}".format(tag.name, tag.confidence))

        if result.people is not None:
            print(" People:")
            for person in result.people:
                print("   {}, Confidence {:.4f}".format(person.bounding_box, person.confidence))

        if result.crop_suggestions is not None:
            print(" Crop Suggestions:")
            for crop_suggestion in result.crop_suggestions:
                print("   Aspect ratio {}: Crop suggestion {}"
                      .format(crop_suggestion.aspect_ratio, crop_suggestion.bounding_box))

        if result.text is not None:
            print(" Text:")
            for line in result.text.lines:
                points_string = "{" + ", ".join([str(int(point)) for point in line.bounding_polygon]) + "}"
                print("   Line: '{}', Bounding polygon {}".format(line.content, points_string))
                for word in line.words:
                    points_string = "{" + ", ".join([str(int(point)) for point in word.bounding_polygon]) + "}"
                    print("     Word: '{}', Bounding polygon {}, Confidence {:.4f}"
                          .format(word.content, points_string, word.confidence))

        result_details = visionsdk.ImageAnalysisResultDetails.from_result(result)
        print(" Result details:")
        print("   Image ID: {}".format(result_details.image_id))
        print("   Result ID: {}".format(result_details.result_id))
        print("   Connection URL: {}".format(result_details.connection_url))
        print("   JSON result: {}".format(result_details.json_result))

    else:
        error_details = visionsdk.ImageAnalysisErrorDetails.from_result(result)
        print(" Analysis failed.")
        print("   Error reason: {}".format(error_details.reason))
        print("   Error code: {}".format(error_details.error_code))
        print("   Error message: {}".format(error_details.message))
        print(" Did you set the computer vision endpoint and key?")

    return result_details.json_result

In [2]:
image_url = "https://aka.ms/azai/vision/image-analysis-sample.jpg"
analyze_image(image_url)

 Image height: 432
 Image width: 648
 Model version: 2023-02-01-preview
 Caption:
   'a person wearing a mask sitting at a table with a laptop', Confidence 1.0000
 Dense Captions:
   'a person wearing a mask sitting at a table with a laptop', Rectangle(x=0, y=0, w=648, h=432), Confidence: 1.0000
   'a person using a laptop', Rectangle(x=220, y=289, w=144, h=73), Confidence: 1.0000
   'a person wearing a colorful face mask', Rectangle(x=285, y=178, w=202, h=249), Confidence: 1.0000
   'a green chair in a room', Rectangle(x=463, y=160, w=117, h=184), Confidence: 1.0000
   'a close-up of a person's hand', Rectangle(x=217, y=162, w=109, h=180), Confidence: 1.0000
   'a person sitting in a chair', Rectangle(x=418, y=320, w=105, h=109), Confidence: 1.0000
   'a blue and green background', Rectangle(x=456, y=163, w=60, h=155), Confidence: 1.0000
   'a close-up of a wooden table', Rectangle(x=59, y=318, w=55, h=58), Confidence: 1.0000
   'a person sitting at a table', Rectangle(x=287, y=315, w