In [1]:
import os
import base64
import io
from PIL import Image
import dotenv
from openai import AzureOpenAI
from azure.ai.inference import ChatCompletionsClient
from azure.core.credentials import AzureKeyCredential


In [3]:
# Azure OpenAI configuration
azure_api_key = os.getenv("AZURE_OPENAI_API_KEY")
azure_endpoint = os.getenv("AZURE_OPENAI_ENDPOINT")
azure_api_version = os.getenv("OPENAI_API_VERSION", "2023-07-01-preview")
model = os.getenv("LLM_MODEL", "gpt-35-turbo")

print(f"Azure Endpoint: {azure_endpoint}")
print(f"Azure API Key available: {'Yes' if azure_api_key else 'No'}")
print(f"Azure API Version: {azure_api_version}")
print(f"Model deployment name: {model}")

# Initialize Azure OpenAI client if enabled

# Create endpoint URL with deployment name
endpoint_url = f"{azure_endpoint}/openai/deployments/{model}"
print(f"Endpoint URL: {endpoint_url}")
client = ChatCompletionsClient(
    endpoint=endpoint_url,
    credential=AzureKeyCredential(azure_api_key),
    api_version=azure_api_version
)

Azure Endpoint: https://aoai-sweden-505.openai.azure.com/
Azure API Key available: Yes
Azure API Version: 2025-03-01-preview
Model deployment name: gpt-4o
Endpoint URL: https://aoai-sweden-505.openai.azure.com//openai/deployments/gpt-4o


In [5]:
payload = {
  "messages": [
    {
      "role": "user",
      "content": "I am going to Paris, what should I see?"
    },
    {
      "role": "assistant",
      "content": "Paris, the capital of France, is known for its stunning architecture, art museums, historical landmarks, and romantic atmosphere. Here are some of the top attractions to see in Paris:\n\n1. The Eiffel Tower: The iconic Eiffel Tower is one of the most recognizable landmarks in the world and offers breathtaking views of the city.\n2. The Louvre Museum: The Louvre is one of the world's largest and most famous museums, housing an impressive collection of art and artifacts, including the Mona Lisa.\n3. Notre-Dame Cathedral: This beautiful cathedral is one of the most famous landmarks in Paris and is known for its Gothic architecture and stunning stained glass windows.\n\nThese are just a few of the many attractions that Paris has to offer. With so much to see and do, it's no wonder that Paris is one of the most popular tourist destinations in the world."
    },
    {
      "role": "user",
      "content": "What is so great about #1?"
    }
  ],
  "max_tokens": 2048
}
response = client.complete(payload)


print("Response:", response.choices[0].message.content)
print("Model:", response.model)
print("Usage:")
print("	Prompt tokens:", response.usage.prompt_tokens)
print("	Total tokens:", response.usage.total_tokens)
print("	Completion tokens:", response.usage.completion_tokens)

Response: The Eiffel Tower is often considered a must-see attraction for a variety of reasons:

1. **Architectural Marvel**: Designed by Gustave Eiffel for the 1889 World's Fair, the tower was initially met with skepticism but quickly became an architectural triumph. Its iconic iron lattice design is both unique and aesthetically striking.

2. **Panoramic Views**: Standing at 324 meters (1,063 feet), the Eiffel Tower offers spectacular panoramic views of Paris. Visitors can ascend to various levels, including an observatory deck at the top, to enjoy breathtaking vistas of the city's landmarks and landscape.

3. **Symbol of Paris**: The Eiffel Tower has become synonymous with Paris itself. Its silhouette is instantly recognizable and is often used to represent the city in art, media, and popular culture.

4. **Historical Significance**: At the time of its completion, the Eiffel Tower was the tallest man-made structure in the world, holding that title for over 40 years. Its construction 

In [1]:
import logging
from typing import Optional

import os
import base64
import io
from PIL import Image
import dotenv
from openai import AzureOpenAI
from azure.ai.inference import ChatCompletionsClient
from azure.core.credentials import AzureKeyCredential


# Azure OpenAI configuration
azure_api_key = os.getenv("AZURE_OPENAI_API_KEY")
azure_endpoint = os.getenv("AZURE_OPENAI_ENDPOINT")
azure_api_version = os.getenv("OPENAI_API_VERSION", "2023-07-01-preview")
model = os.getenv("LLM_MODEL", "gpt-35-turbo")

print(f"Azure Endpoint: {azure_endpoint}")
print(f"Azure API Key available: {'Yes' if azure_api_key else 'No'}")
print(f"Azure API Version: {azure_api_version}")
print(f"Model deployment name: {model}")

# Initialize Azure OpenAI client if enabled

# Create endpoint URL with deployment name
endpoint_url = f"{azure_endpoint}/openai/deployments/{model}"
print(f"Endpoint URL: {endpoint_url}")
client = ChatCompletionsClient(
    endpoint=endpoint_url,
    credential=AzureKeyCredential(azure_api_key),
    api_version=azure_api_version
)


def preprocess_image(image_path: str) -> Optional[Image.Image]:
    """
    Validate if the image is suitable for processing.
    
    Args:
        image_path: Path to the image file
        
    Returns:
        bool: True if image is valid, False otherwise
    """
    try:
        with Image.open(image_path) as img:
            if img.mode not in ('RGB', 'L'):
                img = img.convert('RGB')
            
            max_dimension = 2048
            if img.width > max_dimension or img.height > max_dimension:
                ratio = min(max_dimension / img.width, max_dimension / img.height)
                new_size = (int(img.width * ratio), int(img.height * ratio))
                img = img.resize(new_size, Image.Resampling.LANCZOS)
            
            img_byte_arr = io.BytesIO()
            img.save(img_byte_arr, format='PNG', quality=85)
            img_byte_arr.seek(0)
            
            return Image.open(img_byte_arr)

    except Exception as e:
        print(f"Image preprocessing failed for {image_path}: {str(e)}")
        return None



def encode_image(image_path: str) -> Optional[str]:
    """Encode image as base64 with proper validation and preprocessing."""
    try:
        

        processed_img = preprocess_image(image_path)
        if processed_img is None:
            return None

        img_byte_arr = io.BytesIO()
        processed_img.save(img_byte_arr, format='PNG', quality=85)
        img_byte_arr.seek(0)
        base64_encoded = base64.b64encode(img_byte_arr.getvalue()).decode('utf-8')

        try:
            base64.b64decode(base64_encoded)
            return base64_encoded
        except Exception as e:
            print(f"Base64 validation failed for {image_path}: {str(e)}")
            return None

    except Exception as e:
        print(f"Image encoding failed for {image_path}: {str(e)}")
        return None
    

prompt = "What is in this image?"

# Image file path
IMAGE_PATH = 'temp_image.png'


base64_image = encode_image(IMAGE_PATH)

payload = {
    "messages": [
        {
            "role": "system",
            "content": "you are a helpful assistant that can analyze images and provide information about them."
        },
        {
            "role": "user",
            "content": [
                        {
                            "type": "text",
                            "text": prompt
                        },
                        {
                            "type": "image_url",
                            "image_url": {
                                "url": f"data:image/jpeg;base64,{base64_image}"
                            }
                        }
                    ]
        }
    ],
    "max_tokens": 2048
}
response = client.complete(payload)

print("Response:", response.choices[0].message.content)
print("Model:", response.model)
print("Usage:")
print("	Prompt tokens:", response.usage.prompt_tokens)
print("	Total tokens:", response.usage.total_tokens)
print("	Completion tokens:", response.usage.completion_tokens)

Azure Endpoint: https://aoai-sweden-505.openai.azure.com/
Azure API Key available: Yes
Azure API Version: 2025-03-01-preview
Model deployment name: gpt-4o
Endpoint URL: https://aoai-sweden-505.openai.azure.com//openai/deployments/gpt-4o
Response: The image appears to show a portion of text from a code editor or integrated development environment (IDE) with syntax highlighting. The text includes the word "settings" and a partial sentence referencing "UM_ENABLED" and "UM anal". The different colors of the text indicate syntax elements like keywords, comments, or variable names. The dark background is typical for code editors, designed to reduce eye strain.
Model: gpt-4o-2024-08-06
Usage:
	Prompt tokens: 287
	Total tokens: 366
	Completion tokens: 79


In [15]:
prompt = "What is in this image?"

# Image file path
IMAGE_PATH = 'temp_image.png'


base64_image = encode_image(IMAGE_PATH)

payload = {
    "messages": [
        {
            "role": "system",
            "content": "you are a helpful assistant that can analyze images and provide information about them."
        },
        {
            "role": "user",
            "content": [
                        {
                            "type": "text",
                            "text": prompt
                        },
                        {
                            "type": "image_url",
                            "image_url": {
                                "url": f"data:image/jpeg;base64,{base64_image}"
                            }
                        }
                    ]
        }
    ],
    "max_tokens": 2048
}
response = client.complete(payload)

print("Response:", response.choices[0].message.content)
print("Model:", response.model)
print("Usage:")
print("	Prompt tokens:", response.usage.prompt_tokens)
print("	Total tokens:", response.usage.total_tokens)
print("	Completion tokens:", response.usage.completion_tokens)

Response: This image appears to be a screenshot of a piece of code or text from a coding environment or text editor. The text is formatted with syntax highlighting, which is common in programming. The visible text reads "settings" and part of a sentence, "...UM_ENABLED to true to enable UM analy...", suggesting it is likely part of configuration settings or code related to enabling a feature, possibly abbreviated as "UM." The colors (green and other colors) indicate different types of syntax highlighting typically used to differentiate between various elements such as keywords, strings, and comments in programming code.
Model: gpt-4o-2024-08-06
Usage:
	Prompt tokens: 287
	Total tokens: 401
	Completion tokens: 114


"It seems you've mentioned an image, but since I can't view images directly, could you describe it to me? I'd be happy to help analyze or provide information based on your description!"

In [18]:
import requests


# Image file path
IMAGE_PATH = 'temp_image.png'


# Read the image as binary
with open(IMAGE_PATH, "rb") as image_file:
    image_data = image_file.read()

# Request headers and payload
headers = {
    "Content-Type": "application/json",
    "Authorization": f"Bearer {azure_api_key}"
}

payload = {
    "model": "gpt-4o",
    "messages": [
        {"role": "system", "content": "Describe the image."},
        {"role": "user", "content": image_data.decode("latin-1")}
    ]
}

# Send the request to Azure OpenAI
response = requests.post(azure_endpoint, headers=headers, json=payload)

# Print the response
print(response.json())


{'error': {'code': '404', 'message': 'Resource not found'}}
