In [None]:
%%writefile requirements.txt
strands-agents>=1.9.1
strands-agents-tools>=0.2.8
mlflow>=3.4.0
mlflow-sagemaker>=1.5.11
strands-agents[sagemaker]

In [None]:
pip install -r requirements.txt

In [None]:
cat requirements.txt

## Beginning with Stands Agents

In [None]:
from strands import Agent, tool
from strands_tools import http_request, calculator
from strands.models import BedrockModel

model = BedrockModel(
    model_id="us.anthropic.claude-3-7-sonnet-20250219-v1:0"
)

agent = Agent(model=model, tools=[http_request])
agent("Where is the international space station now?")

## Deploy Model as SageMaker AI Endpoint

In [None]:
# Deploy initial endpoint with Qwen-4B
import boto3
from boto3.session import Session
from sagemaker.jumpstart.model import JumpStartModel

boto_session = Session()
sts = boto3.client('sts')
account_id = sts.get_caller_identity().get("Account")
region = boto_session.region_name

ENDPOINT_NAME = INITIAL_CONFIG_NAME = "llm-endpoint-sagemaker" # We will keep using this endpoint name

model_a = JumpStartModel(
    model_id="huggingface-reasoning-qwen3-4b", 
    model_version="1.0.0",
    name="qwen-4b-model"
)

# Deploy the model to an endpoint
predictor_a = model_a.deploy(
    initial_instance_count=1,
    instance_type="ml.g5.2xlarge",
    endpoint_name=ENDPOINT_NAME
)

## Use SageMaker LLM endpoint with Strands Agent

In [None]:
from strands.models.sagemaker import SageMakerAIModel
from strands import Agent, tool
from strands_tools import http_request, calculator

model_sagemaker = SageMakerAIModel(
    endpoint_config={
        "endpoint_name": ENDPOINT_NAME,
        "region_name": region
    },
    payload_config={
        "max_tokens": 2048,
        "temperature": 0.2,
        "stream": True,
    }
)

# Test the agent
agent = Agent(model=model_sagemaker, tools=[http_request])
agent("Where is the international space station now? (Use: http://api.open-notify.org/iss-now.json")

## Creating MLflow Tracking Server

In [None]:
# MLflow config
# It can be converted to create a new tracking server from code.
import mlflow
import os

# Create MLflow tracking server
response = sagemaker_client.create_mlflow_tracking_server(
    TrackingServerName='strands-mlflow-server',
    ArtifactStoreUri=f's3://{account_id}-mlflow-bucket/artifacts',
    RoleArn=role,
    TrackingServerSize='Small',  # Small, Medium, or Large
    WeeklyMaintenanceWindowStart='Tue:03:30'
)

server_info = sagemaker_client.describe_mlflow_tracking_server(
    TrackingServerName='strands-mlflow-server'
)

tracking_uri = server_info['TrackingServerArn']
os.environ["MLFLOW_TRACKING_URI"] = tracking_uri
# Or you can set the tracking server as below.
#mlflow.set_tracking_uri(tracking_uri)

mlflow.set_tracking_uri(tracking_uri)
print(f"MLflow Tracking Server URL: {tracking_uri}")

In [None]:
# IF MLFLOW TRACKING SERVER ALREADY EXISTS, USE FOLLOWING CODE INSTEAD
import mlflow
tracking_uri = "<TRACKING_SERVER_ARN>"
mlflow.set_tracking_uri(tracking_uri)
print(f"MLflow Tracking Server URL: {tracking_uri}")

## Run the agent with MLflow tracing

In [None]:
# Create a new experiment and start logging
import mlflow
mlflow.set_experiment("Strands_Agents_prod")
mlflow.strands.autolog()

In [None]:
# Run the agent, and apply post-processing to the result
from strands import Agent, tool

def capitalize(response):
    return response.upper()
    
agent = Agent(model=model_sagemaker, tools=[http_request])
response = agent("Where is the international space station now? (Use: http://api.open-notify.org/iss-now.json")
capitalize(response.message['content'][0]['text'])

## Explicit tracing using @mlflow.trace decorator

In [None]:
@mlflow.trace(span_type="func", attributes={"operation": "capitalize"})
def capitalize(response):
    return response.upper()

@mlflow.trace
def run_agent():
    agent = Agent(tools=[http_request])
    response = agent("Where is the international space station now?")
    capitalized_response = capitalize(response.message['content'][0]['text'])

    return capitalized_response

# Execute the traced function
capitalized_response = run_agent()
print(capitalized_response)

## Deploy a new LLM for A/B testing

In [None]:
# Step1: Create a model from JumpStart
import boto3
from sagemaker.jumpstart.model import JumpStartModel
model_b_name  ="sagemaker-strands-demo-qwen3-8b"
model_b_id, model_b_version = "huggingface-reasoning-qwen3-8b", "1.0.0"

model_b = JumpStartModel(
    model_id="huggingface-reasoning-qwen3-8b",  
    model_version="1.0.0",
    name=model_b_name
)
model_b.create(instance_type="ml.g5.2xlarge")

# Step2: Create production variants for A/B testing
# Create production variants for A/B testing
production_variants = [
   # The original model (champion)
   {
        "VariantName": "qwen-4b-variant",
        "ModelName": "qwen-4b-model",
        "InitialInstanceCount": 1,
        "InstanceType": "ml.g5.2xlarge",
        "InitialVariantWeight": 0.5  # It will take 50% of the traffic
    },
   # The new model (challenger)
    {
        "VariantName": "qwen-8b-variant",
        "ModelName": model_b_name,
        "InitialInstanceCount": 1,
        "InstanceType": "ml.g5.2xlarge",
        "InitialVariantWeight": 0.5  # It will take 50% of the traffic
    }
]

# Step3: Create new endpoint configuration
sagemaker_client = boto3.client('sagemaker')
ENDPOINT_CONFIG_AB_TESTING = "llm-endpoint-config-ab"
sagemaker_client.create_endpoint_config(
    EndpointConfigName=ENDPOINT_CONFIG_AB_TESTING,
    ProductionVariants=production_variants
)

# Step4: Update the endpoint with new A/B testing configuration
sagemaker_client.update_endpoint(
    EndpointName=ENDPOINT_NAME, #Remember, the endpoint name stays the same
    EndpointConfigName=ENDPOINT_CONFIG_AB_TESTING
)

# Wait until the update is completed
waiter = boto3.client('sagemaker').get_waiter('endpoint_in_service')
waiter.wait(EndpointName=ENDPOINT_NAME)

## Controlled experiment using explicit variants

In [None]:
# Create SaegMaker models for A/B testing
from strands.models.sagemaker import SageMakerAIModel
from strands import Agent, tool
from strands_tools import http_request, calculator

model_sagemaker_a = SageMakerAIModel(
    endpoint_config={
        "endpoint_name": ENDPOINT_NAME,
        "region_name": region,
        "target_variant":"qwen-4b-variant"
    },
    payload_config={
        "max_tokens": 2048,
        "temperature": 0.2,
        "stream": True,
    }
)

model_sagemaker_b = SageMakerAIModel(
    endpoint_config={
        "endpoint_name": ENDPOINT_NAME,
        "region_name": region,
        "target_variant":"qwen-8b-variant"
    },
    payload_config={
        "max_tokens": 2048,
        "temperature": 0.2,
        "stream": True,
    }
)

In [None]:
import time
mlflow.set_experiment("Strands_Agents_AB_testing") # Start a new experiement

with mlflow.start_span(): # For "A" variant.
    mlflow.update_current_trace(tags={"variant": "qwen-4b"})
    agent = Agent(model=model_sagemaker_a, tools=[http_request])
    agent("Where is the international space station now. (Use: http://api.open-notify.org/iss-now.json)")

print("="*60)
#time.sleep(5) # Pause shortly before running the other variant.

with mlflow.start_span(): # For "B" variant.
    mlflow.update_current_trace(tags={"variant": "qwen-8b"})
    agent = Agent(model=model_sagemaker_b, tools=[http_request])
    agent("Where is the international space station now. (Use: http://api.open-notify.org/iss-now.json)")

## Transition to the new model

In [None]:
production_variants = [
    {
        "VariantName": "qwen-8b-variant",
        "ModelName": model_b_name,
        "InitialInstanceCount": 1,
        "InstanceType": "ml.g5.2xlarge",
        "InitialVariantWeight": 1
    }
]

# Create new endpoint configuration
NEW_ENDPOINT_CONFIG = "llm-endpoint-config-qwen-8b"
sagemaker_client.create_endpoint_config(
    EndpointConfigName=NEW_ENDPOINT_CONFIG,
    ProductionVariants=production_variants
)


# Update the endpoint to use new configuration
sagemaker_client.update_endpoint(
    EndpointName=ENDPOINT_NAME,
    EndpointConfigName=NEW_ENDPOINT_CONFIG
)
# Wait until the update is completed
waiter = boto3.client('sagemaker').get_waiter('endpoint_in_service')
waiter.wait(EndpointName=ENDPOINT_NAME)

## Clean up

In [None]:
sagemaker_client.delete_endpoint(EndpointName=ENDPOINT_NAME)
sagemaker_client.delete_endpoint_config(EndpointConfigName=INITIAL_CONFIG_NAME)
sagemaker_client.delete_endpoint_config(EndpointConfigName=ENDPOINT_CONFIG_AB_TESTING)
sagemaker_client.delete_endpoint_config(EndpointConfigName=NEW_ENDPOINT_CONFIG)