In [None]:
from dotenv import load_dotenv
import os

# Load environment variables from .env file (use .env.example as template)
load_dotenv(os.path.abspath(os.path.join(os.getcwd(), "../.env")))

# print gateway related env variables
print(os.getenv("OPENAI_API_VERSION"))
print(os.getenv("AZURE_OPENAI_ENDPOINT"))
print(os.getenv("AZURE_OPENAI_API_KEY"))

# some imports
from langchain_openai import AzureChatOpenAI, AzureOpenAIEmbeddings
from openai import AsyncAzureOpenAI, AzureOpenAI

# list of available models
available_models = [
    "claude-3-7-sonnet-20250219",
    "claude-3-5-sonnet-20241022", 
    "gpt-4o-20241120",
    "gpt-4o-mini-20240718",
    "o1-20241217", 
    "o1-mini-20240912",
    "o3-mini-20250131"
]

# run chat models

In [None]:
for model_name in available_models:
    print(f"Testing model: {model_name}")
    model = AzureChatOpenAI(model=model_name, temperature=0.6 if not model_name.startswith("o") else 1)
    model.invoke("Hello, world!")

# o3-mini with reasoning

In [None]:
model = AzureChatOpenAI(model="o3-mini-20250131", 
                        temperature=1, 
                        reasoning_effort="medium" # "low", "medium", "high"
                        )
res = model.invoke("Hello, world!")

print(f"Response:\n{res.content}\n")
print(f"Reasoning tokens:\n{res.response_metadata['token_usage']['completion_tokens_details']['reasoning_tokens']}")

# claude-3-7-sonnet with reasoning

## using completaion API

In [None]:
client = AzureOpenAI()
chat_completion = client.chat.completions.create(
    model="claude-3-7-sonnet-20250219", # model = "deployment_name".
    messages=[{"role": "user", "content": [{"type": "text", "text": "Explain what is GPT"}]}],
    extra_body={
        "thinking": {
            "type": "enabled",
            "budget_tokens": 8000 # Must be higher than 1024
        }
    },
    max_tokens=15000 # Must be higher than the budget_tokens
)


reasoning_content = chat_completion.choices[0].message.model_extra['reasoning_content']
response_content = chat_completion.choices[0].message.content

print("\n\n")
print("*"*100 + "\nReasoning:\n" + "*"*100 + "\n")
print(reasoning_content)
print("*"*100 + "\nResponse:\n" + "*"*100 + "\n")
print(response_content)

## using completaion Async API

In [None]:
# Async version
async def get_completion():
    async_client = AsyncAzureOpenAI()
    
    chat_completion = await async_client.chat.completions.create(
        model="claude-3-7-sonnet-20250219",
        messages=[{"role": "user", "content": [{"type": "text", "text": "Explain what is GPT"}]}],
        extra_body={
            "thinking": {
                "type": "enabled", 
                "budget_tokens": 8000
            }
        },
        max_tokens=15000,
        stream=True # Enable streaming
    )


    print("\nStreaming response:")
    # Track reasoning content and response content separately
    reasoning_content = ""
    response_content = ""
    
    async for chunk in chat_completion:
        # Check for reasoning content in model_extra
        if chunk.choices[0].delta.model_extra and 'reasoning_content' in chunk.choices[0].delta.model_extra:
            reasoning = chunk.choices[0].delta.model_extra['reasoning_content']
            reasoning_content += reasoning
            print(f"{reasoning}", end="", flush=True)
        
        # Check for regular content
        if chunk.choices[0].delta.content:
            content = chunk.choices[0].delta.content
            response_content += content
            print(content, end="", flush=True)
    
    print("\n\n")
    print("*"*100 + "\nReasoning:\n" + "*"*100 + "\n")
    print(reasoning_content)
    print("*"*100 + "\nResponse:\n" + "*"*100 + "\n")
    print(response_content)

await get_completion()

## using Chat API

In [None]:
# using chat model
llm = AzureChatOpenAI(
    model="claude-3-7-sonnet-20250219",
     extra_body={
            "thinking": {
                "type": "enabled", 
                "budget_tokens": 8000
            }
        },
        max_tokens=15000
    )

# response = await llm.ainvoke("Explain what is GPT")
response = llm.invoke("Explain what is GPT")
print(response.content)