In [None]:
%pip install boto3 openai ipython


In [None]:
import os
import boto3
import json
from openai import OpenAI
from datetime import datetime
from IPython.display import clear_output, display, display_markdown, Markdown


In [None]:
# Model Configuration - Qwen3 Coder Models
QWEN_CODER_480B_MODEL_ID = "qwen.qwen3-coder-480b-a35b-v1:0"  # High-performance coding model
QWEN_CODER_30B_MODEL_ID = "qwen.qwen3-coder-30b-a3b-v1:0"  # Cost-effective coding model

print(f"✅ Using 480B Coder model: {QWEN_CODER_480B_MODEL_ID}")
print(f"✅ Using 30B Coder model: {QWEN_CODER_30B_MODEL_ID}")


In [None]:
# Set environment variables to point to Bedrock
# Note: Change the region in the URL to match your preferred region
os.environ["AWS_BEARER_TOKEN_BEDROCK"] = "<insert your bedrock API key>"
os.environ["OPENAI_API_KEY"] = "<insert your bedrock API key>"
os.environ["OPENAI_BASE_URL"] = "https://bedrock-runtime.us-west-2.amazonaws.com/openai/v1"

print("✅ Environment configured for Bedrock!")
print("📍 Using us-west-2 region - change the URL above to use a different region")


In [None]:
# Initialize both clients
# Note: Change region_name to match your preferred region
client = OpenAI()  # For chat completions API
bedrock_client = boto3.client('bedrock-runtime', region_name='us-west-2')  

print("✅ OpenAI client initialized (pointing to Bedrock)")
print(f"✅ Bedrock client initialized in region: {bedrock_client.meta.region_name}")
print("📍 Change region_name above to use a different supported region")


In [None]:
# Example 1: Qwen3-Coder-30B-A3B-Instruct (cost-effective coding model)
response = client.chat.completions.create(
    model=QWEN_CODER_30B_MODEL_ID,                 
    messages=[
        {"role": "system", "content": "You are a helpful coding assistant. Write clean, efficient code with comments."},
        {"role": "user",   "content": "Please code a quick sort algorithm in Python"}
    ],
    temperature=0,
    max_completion_tokens=2000
)

# Extract and print the response text
print("🤖 Qwen3-Coder-30B-A3B-Instruct response:")
print(response.choices[0].message.content)


In [None]:
# Example 2: Qwen3-Coder-480B-A35B-Instruct (high-performance coding model)
response = client.chat.completions.create(
    model=QWEN_CODER_480B_MODEL_ID,                 
    messages=[
        {"role": "system", "content": "You are an expert coding assistant. Write optimized, well-documented code with best practices."},
        {"role": "user",   "content": "Please code a quick sort algorithm in Python with detailed comments and time complexity analysis"}
    ],
    temperature=0,
    max_completion_tokens=3000
)

# Extract and print the response text
print("🚀 Qwen3-Coder-480B-A35B-Instruct response:")
print(response.choices[0].message.content)


In [None]:
# Streaming with Qwen3-Coder-480B-A35B-Instruct (high-performance model)
streaming_response = client.chat.completions.create(
    model=QWEN_CODER_480B_MODEL_ID,                 
    messages=[
        {"role": "system", "content": "You are an expert coding assistant. Write clean, efficient code."},
        {"role": "user",   "content": "Please code a binary search algorithm in Python"}
    ],
    temperature=0,
    max_completion_tokens=2000,
    stream=True
)

# Extract and print the response text in real-time.
print("🚀 Streaming Qwen3-Coder-480B-A35B-Instruct response:")
for chunk in streaming_response:
    if chunk.choices[0].delta.content is not None:
        print(chunk.choices[0].delta.content, end="")


In [None]:
# Streaming with Qwen3-Coder-30B-A3B-Instruct (cost-effective model)
streaming_response = client.chat.completions.create(
    model=QWEN_CODER_30B_MODEL_ID,                 
    messages=[
        {"role": "system", "content": "You are a helpful coding assistant."},
        {"role": "user",   "content": "Please code a simple bubble sort algorithm in Python"}
    ],
    temperature=0,
    max_completion_tokens=1500,
    stream=True
)

# Extract and print the response text in real-time.
print("🤖 Streaming Qwen3-Coder-30B-A3B-Instruct response:")
for chunk in streaming_response:
    if chunk.choices[0].delta.content is not None:
        print(chunk.choices[0].delta.content, end="")


In [None]:
def execute_python_code(code):
    """
    Execute Python code and return the result.
    This is a mock function that simulates code execution.
    
    Args:
        code (str): Python code to execute
        
    Returns:
        dict: Execution result with output and any errors
    """
    # Mock execution - in a real application, you'd use a secure code execution environment
    try:
        # Simulate successful execution
        if "print" in code:
            return {
                "status": "success",
                "output": "Code executed successfully",
                "result": "Hello, World!" if "Hello" in code else "Output generated"
            }
        else:
            return {
                "status": "success", 
                "output": "Code executed without output",
                "result": "No output to display"
            }
    except Exception as e:
        return {
            "status": "error",
            "output": f"Execution failed: {str(e)}",
            "result": None
        }

# Define the function schema for OpenAI SDK
tools = [{
    "type": "function",
    "function": {
        "name": "execute_python_code",
        "description": "Execute Python code and return the result. Use this to test and validate generated code.",
        "parameters": {
            "type": "object",
            "properties": {
                "code": {
                    "type": "string",
                    "description": "Python code to execute"
                }
            },
            "required": ["code"],
            "additionalProperties": False
        }
    }
}]

print("✅ Code execution function and tools configuration ready!")


In [None]:
def chat_with_coding_functions(client, model, messages, tools, max_iterations=3):
    """
    Chat with function calling support for coding workflows using OpenAI SDK format.
    
    Args:
        client: OpenAI client instance
        model: Model ID to use
        messages: List of conversation messages
        tools: List of available tools/functions
        max_iterations: Maximum number of function call iterations
        
    Returns:
        Final assistant message
    """
    
    for iteration in range(max_iterations):
        print(f"🔄 Iteration {iteration + 1}")
        
        # Make request with tools
        request_params = {
            "model": model,
            "messages": messages,
            "tools": tools,
            "tool_choice": "auto"
        }
        
        response = client.chat.completions.create(**request_params)
        
        assistant_message = response.choices[0].message
        messages.append(assistant_message)
        
        # Check if the model wants to call functions
        if assistant_message.tool_calls:
            print(f"🔧 Model requested {len(assistant_message.tool_calls)} function call(s)")
            
            # Process each function call
            for tool_call in assistant_message.tool_calls:
                function_name = tool_call.function.name
                function_args = json.loads(tool_call.function.arguments)
                
                print(f"🔧 Calling function: {function_name}")
                print(f"🔧 Arguments: {function_args}")
                
                # Call the actual function
                if function_name == "execute_python_code":
                    function_result = execute_python_code(function_args["code"])
                    print(f"🔧 Function result: {function_result}")
                else:
                    function_result = {"error": f"Unknown function: {function_name}"}
                
                # Add function result to conversation
                function_message = {
                    "tool_call_id": tool_call.id,
                    "role": "tool",
                    "content": json.dumps(function_result)
                }
                messages.append(function_message)
                
        else:
            # No more function calls, return final response
            print("✅ No function calls requested, conversation complete")
            return assistant_message
    
    print("⚠️ Maximum iterations reached")
    return assistant_message

print("✅ Enhanced coding function calling handler ready!")


In [None]:
# Test enhanced function calling with both Coder models
coding_questions = [
    "Write a Python function to calculate the factorial of a number and test it",
    "Create a simple calculator function and demonstrate it works",
    "Write a function to reverse a string and test it with 'Hello World'",
    "Create a function to find the maximum number in a list and test it"
]

print("💻 Testing Enhanced Function Calling with Qwen3 Coder Models")
print("=" * 60)

# Test with Qwen3-Coder-30B-A3B-Instruct (cost-effective model)
print("\n🤖 Testing with Qwen3-Coder-30B-A3B-Instruct")
print("-" * 50)

for i, question in enumerate(coding_questions[:2], 1):  # Test first 2 questions
    print(f"\n📝 Test {i}: {question}")
    print("-" * 30)
    
    try:
        # Create conversation messages
        messages = [
            {"role": "system", "content": "You are a helpful coding assistant. Write clean code and use the execute_python_code function to test your code."},
            {"role": "user", "content": question}
        ]
        
        # Call the function calling handler with 30B model
        final_response = chat_with_coding_functions(
            client=client,
            model=QWEN_CODER_30B_MODEL_ID,
            messages=messages,
            tools=tools
        )
        
        # Print the final response
        print("🤖 Final response:")
        print(final_response.content)
        
    except Exception as e:
        print(f"❌ Error: {str(e)}")
    
    print()

# Test with Qwen3-Coder-480B-A35B-Instruct (high-performance model)
print("\n🚀 Testing with Qwen3-Coder-480B-A35B-Instruct")
print("-" * 50)

for i, question in enumerate(coding_questions[2:], 1):  # Test last 2 questions
    print(f"\n📝 Test {i}: {question}")
    print("-" * 30)
    
    try:
        # Create conversation messages
        messages = [
            {"role": "system", "content": "You are an expert coding assistant. Write optimized, well-documented code and use the execute_python_code function to test your code."},
            {"role": "user", "content": question}
        ]
        
        # Call the function calling handler with 480B model
        final_response = chat_with_coding_functions(
            client=client,
            model=QWEN_CODER_480B_MODEL_ID,
            messages=messages,
            tools=tools
        )
        
        # Print the final response
        print("🚀 Final response:")
        print(final_response.content)
        
    except Exception as e:
        print(f"❌ Error: {str(e)}")
    
    print()


In [None]:
# Configure region for Bedrock client
region = None

if region is None:
    target_region = os.environ.get("AWS_REGION", os.environ.get("AWS_DEFAULT_REGION"))
else:
    target_region = "us-west-2"

bedrock_runtime = boto3.client('bedrock-runtime', region_name=region)
print(f"📍 Using region: {target_region} - change the region variable above to use a different supported region")


In [None]:
def invoke_model(body, model_id, accept, content_type):
    """
    Invokes Amazon bedrock model to run an inference
    using the input provided in the request body.
    
    Args:
        body (dict): The invokation body to send to bedrock
        model_id (str): the model to query
        accept (str): input accept type
        content_type (str): content type
    Returns:
        Inference response from the model.
    """

    try:
        response = bedrock_runtime.invoke_model(
            body=json.dumps(body), 
            modelId=model_id, 
            accept=accept, 
            contentType=content_type
        )

        return response

    except Exception as e:
        print(f"Couldn't invoke {model_id}")
        raise e


In [None]:
# Example with Qwen3-Coder-480B-A35B-Instruct (high-performance coding model)
messages = [
    {"role": "system", "content": "You are an expert coding assistant. Write clean, efficient code with comments."},
    {"role": "user",   "content": "Please code a merge sort algorithm in Python"}
]

body = {
    "messages": messages,
    "temperature": 0,
    "max_completion_tokens": 2000
}

accept = "application/json"
contentType = "application/json"

response = invoke_model(body, QWEN_CODER_480B_MODEL_ID, accept, contentType)
response_body = json.loads(response.get("body").read())

print("🚀 Qwen3-Coder-480B-A35B-Instruct response:")
print(response_body['choices'][0]['message']['content'])


In [None]:
# Example with Qwen3-Coder-30B-A3B-Instruct (cost-effective coding model)
messages = [
    {"role": "system", "content": "You are a helpful coding assistant. Write clean, efficient code."},
    {"role": "user",   "content": "Please code a simple insertion sort algorithm in Python"}
]

body = {
    "messages": messages,
    "temperature": 0,
    "max_completion_tokens": 1500
}

accept = "application/json"
contentType = "application/json"

response = invoke_model(body, QWEN_CODER_30B_MODEL_ID, accept, contentType)
response_body = json.loads(response.get("body").read())

print("🤖 Qwen3-Coder-30B-A3B-Instruct response:")
print(response_body['choices'][0]['message']['content'])


In [None]:
# Streaming with Qwen3-Coder-480B-A35B-Instruct (high-performance model)
messages = [
    {"role": "system", "content": "You are an expert coding assistant. Write clean, efficient code."},
    {"role": "user",   "content": "Please code a heap sort algorithm in Python"}
]

body = {
    "messages": messages,
    "temperature": 0,
    "max_completion_tokens": 2000
}

accept = "application/json"
contentType = "application/json"

start_time = datetime.now()

response = bedrock_runtime.invoke_model_with_response_stream(
    body=json.dumps(body), modelId=QWEN_CODER_480B_MODEL_ID, accept=accept, contentType=contentType
)
chunk_count = 0
time_to_first_token = None

# Process the response stream
stream = response.get("body")
if stream:
    print("🚀 Streaming Qwen3-Coder-480B-A35B-Instruct response:")
    for event in stream:
        chunk = event.get("chunk")
        if chunk:
            # Print the response chunk
            chunk_json = json.loads(chunk.get("bytes").decode())
            content_block_delta = chunk_json.get("choices")[0]["delta"].get("content")
            if content_block_delta:
                if time_to_first_token is None:
                    time_to_first_token = datetime.now() - start_time
                    print(f"Time to first token: {time_to_first_token}")

                chunk_count += 1
                print(content_block_delta, end="")
    print(f"\nTotal chunks: {chunk_count}")
else:
    print("No response stream received.")


In [None]:
# Converse API with Qwen3-Coder-30B-A3B-Instruct (cost-effective model)
response = bedrock_client.converse(
    modelId=QWEN_CODER_30B_MODEL_ID,
    messages=[
        {
            "role": "user",
            "content": [{"text": "Please code a simple linear search algorithm in Python"}]
        }
    ],
    system=[{"text": "You are a helpful coding assistant. Write clean, efficient code."}],
    inferenceConfig={
        "temperature": 0,
        "maxTokens": 1500
    }
)

# Final response
print(f"🤖 Qwen3-Coder-30B-A3B-Instruct response:")
print(response['output']['message']['content'][0]['text'])


In [None]:
# Converse API with Qwen3-Coder-480B-A35B-Instruct (high-performance model)
response = bedrock_client.converse(
    modelId=QWEN_CODER_480B_MODEL_ID,
    messages=[
        {
            "role": "user",
            "content": [{"text": "Please code an optimized binary search algorithm in Python with detailed comments"}]
        }
    ],
    system=[{"text": "You are an expert coding assistant. Write optimized, well-documented code with best practices."}],
    inferenceConfig={
        "temperature": 0,
        "maxTokens": 2000
    }
)

# Final response
print(f"🚀 Qwen3-Coder-480B-A35B-Instruct response:")
print(response['output']['message']['content'][0]['text'])


In [None]:
# Streaming through Converse API with Qwen3-Coder-30B-A3B-Instruct (cost-effective model)
def bedrock_model_converse_stream_30b(client, system_prompt, user_prompt, max_tokens=1500, temperature=0):
    response = client.converse_stream(
        modelId=QWEN_CODER_30B_MODEL_ID,
        messages=[  
            {
                "role": "user",
                "content": [
                    {
                        "text": user_prompt
                    }
                ]
            },                        
        ],
        system=[{"text": system_prompt}],
        inferenceConfig={
            "temperature": temperature,
            "maxTokens": max_tokens
        }
    )
    # Extract and print the response text in real-time.
    for event in response['stream']:
        if 'contentBlockDelta' in event:
            chunk = event['contentBlockDelta']
            if chunk['delta'].get('text', None):
                print(chunk['delta']['text'], end="")
    return


In [None]:
# Streaming through Converse API with Qwen3-Coder-480B-A35B-Instruct (high-performance model)
def bedrock_model_converse_stream_480b(client, system_prompt, user_prompt, max_tokens=2000, temperature=0):
    response = client.converse_stream(
        modelId=QWEN_CODER_480B_MODEL_ID,
        messages=[  
            {
                "role": "user",
                "content": [
                    {
                        "text": user_prompt
                    }
                ]
            },                        
        ],
        system=[{"text": system_prompt}],
        inferenceConfig={
            "temperature": temperature,
            "maxTokens": max_tokens
        }
    )
    # Extract and print the response text in real-time.
    for event in response['stream']:
        if 'contentBlockDelta' in event:
            chunk = event['contentBlockDelta']
            if chunk['delta'].get('text', None):
                print(chunk['delta']['text'], end="")
    return


In [None]:
# Example usage of streaming functions

print("\n\n🤖 Streaming with Qwen3-Coder-30B-A3B-Instruct:")
bedrock_model_converse_stream_30b(
    client=bedrock_client,
    system_prompt="You are a helpful coding assistant.",
    user_prompt="Please code a simple selection sort algorithm in Python"
)

print("\n\n🚀 Streaming with Qwen3-Coder-480B-A35B-Instruct:")
bedrock_model_converse_stream_480b(
    client=bedrock_client,
    system_prompt="You are an expert coding assistant. Write optimized, well-documented code.",
    user_prompt="Please code an efficient radix sort algorithm in Python with detailed comments"
)
