In [None]:
import os
import requests
from IPython.display import Image
from dotenv import load_dotenv

# Guide: https://learn.microsoft.com/en-us/azure/ai-services/openai/how-to/gpt-with-vision

# Load .env
load_dotenv(override=True)

# TODO: Update API to work with standard OpenAI (not Azure)
# Configure API
model = "gpt-4-vision"
api_base=os.getenv("AZURE_OPENAI_ENDPOINT")
api_version=os.getenv("AZURE_OPENAI_VERSION")
api_key=os.getenv("AZURE_OPENAI_KEY")
gpt4v_endpoint = f"{api_base}/openai/deployments/{model}/chat/completions?api-version={api_version}"
headers = {"Content-Type": "application/json", "api-key": api_key}

# Reset history
history = []

In [None]:
instructions = """
Du är en hjälpsam assistent.
"""
system_msg = {"role": "system", "content": [{"type": "text", "text": instructions}]}


In [None]:
# NOTE: Keep in mind that large images equals more tokens, so don't use too large images!

# Image from URL
width, height = 250, 250  # Crop images from sahlgrenska.se
images = {
    "vårdmöte01": f"https://www.sahlgrenska.se/contentassets/fac0b962e2724b43b342f6c463518f25/cropped-vgr18-su311732.jpg?w={width}&h={height}&mode=crop&anchor=topcenter",
}
image_url = images["vårdmöte01"]
display(Image(url=image_url))


In [None]:
# Ask a question
question = """Beskriv vad du ser i bilden på ett sådant sätt att den kan återskapas av någon som inte sett den. Var detaljerad och målande."""
user_input = [
  {"type": "text", "text": question},
  {"type": "image_url", "image_url": {"url": image_url}},
]
print(f"user: {question}")

# Create payload for the request
messages = [system_msg, *history, {"role": "user", "content": user_input}]
payload = {
  "messages": messages,
  "temperature": 0.7,
  "top_p": 0.95,
  "max_tokens": 3000,
}

# Send request
try:
    response = requests.post(gpt4v_endpoint, headers=headers, json=payload)
    response.raise_for_status()  # Will raise an HTTPError if the HTTP request returned an unsuccessful status code
    response = response.json()["choices"][0]["message"]
except requests.RequestException as e:
    raise SystemExit(f"Failed to make the request. Error: {e}")
print(f"assistant: {response['content']}")

# Update history
# NOTE: Image should not added to history
history += [{"role": "user", "content": [{"type": "text", "text": question}]}] + [response]


In [None]:
# Print history
for msg in history:
    if isinstance(msg["content"], list):
        print(f"{msg['role']}: {msg['content'][0]['text']}")
    else:
        print(f"{msg['role']}: {msg['content']}")
