In [None]:
!%pip install sagemaker --upgrade  --quiet

In [None]:
import json
import boto3
import sagemaker
from sagemaker import Model, image_uris, serializers, deserializers

role = sagemaker.get_execution_role()  # execution role for the endpoint
sess = sagemaker.session.Session()  # sagemaker session for interacting with different AWS APIs
region = sess._region_name  # region name of the current SageMaker Studio environment
account_id = sess.account_id()  # account_id of the current SageMaker Studio environment
sm_client = boto3.client("sagemaker")
smr_client = boto3.client("sagemaker-runtime")
print(f"role: {role}\nsess: {sess}\nregion: {region}\naccount_id: {account_id}")

In [None]:
env_trtllm = {"HUGGINGFACE_HUB_CACHE": "/tmp",
              "TRANSFORMERS_CACHE": "/tmp",
              "SERVING_LOAD_MODELS": "test::MPI=/opt/ml/model",
              "OPTION_MODEL_ID": "codellama/CodeLlama-13b-hf",
              "OPTION_TRUST_REMOTE_CODE": "true",
              "OPTION_TENSOR_PARALLEL_DEGREE": "max",
              "OPTION_ROLLING_BATCH": "trtllm",
              "OPTION_MAX_ROLLING_BATCH_SIZE": "32",
              "OPTION_DTYPE":"fp16"
             }

In [None]:
trtllm_image_uri = image_uris.retrieve(
    framework="djl-tensorrtllm",
    region=sess.boto_session.region_name,
    version="0.26.0"
)

In [None]:
env_lmidist = {"HUGGINGFACE_HUB_CACHE": "/tmp",
               "TRANSFORMERS_CACHE": "/tmp",
               "SERVING_LOAD_MODELS": "test::MPI=/opt/ml/model",
               "OPTION_MODEL_ID": "codellama/CodeLlama-13b-hf",
               "OPTION_TRUST_REMOTE_CODE": "true",
               "OPTION_TENSOR_PARALLEL_DEGREE": "max",
               "OPTION_ROLLING_BATCH": "lmi-dist",
               "OPTION_MAX_ROLLING_BATCH_SIZE": "32",
               "OPTION_DTYPE":"fp16"
              }

deepspeed_image_uri = image_uris.retrieve(
    framework="djl-deepspeed", 
    region=sess.boto_session.region_name, 
    version="0.26.0"
)

In [None]:
# - Select the appropriate environment variable which will tune the deployment server.
env = env_trtllm
#env = env_lmidist # use this when generating tokens > 1024  

# - now we select the appropriate container 
#inference_image_uri = deepspeed_image_uri # use this when generating tokens > 1024 
inference_image_uri = trtllm_image_uri

print(f"Environment variables are ---- > {env}")
print(f"Image going to be used is ---- > {inference_image_uri}")

In [None]:
model_name = sagemaker.utils.name_from_base("lmi-codellama-7b")
print(model_name)

create_model_response = sm_client.create_model(
    ModelName=model_name,
    ExecutionRoleArn=role,
    PrimaryContainer={
        "Image": inference_image_uri,
        "Environment": env,
    }
)
model_arn = create_model_response["ModelArn"]

print(f"Created Model: {model_arn}")

In [None]:
endpoint_config_name = f"{model_name}-config"
endpoint_name = f"{model_name}-endpoint"

endpoint_config_response = sm_client.create_endpoint_config(
    EndpointConfigName=endpoint_config_name,
    ProductionVariants=[
        {
            "VariantName": "variant1",
            "ModelName": model_name,
            "InstanceType": "ml.g5.12xlarge",
            "InitialInstanceCount": 1,
            "ContainerStartupHealthCheckTimeoutInSeconds": 2400,
        },
    ],
)
endpoint_config_response

In [None]:
create_endpoint_response = sm_client.create_endpoint(
    EndpointName=f"{endpoint_name}", EndpointConfigName=endpoint_config_name
)
print(f"Created Endpoint: {create_endpoint_response['EndpointArn']}")

In [None]:
import time

resp = sm_client.describe_endpoint(EndpointName=endpoint_name)
status = resp["EndpointStatus"]
print("Status: " + status)

while status == "Creating":
    time.sleep(60)
    resp = sm_client.describe_endpoint(EndpointName=endpoint_name)
    status = resp["EndpointStatus"]
    print("Status: " + status)

print("Arn: " + resp["EndpointArn"])
print("Status: " + status)

In [None]:
prompt = """import argparse
            def main(string: str):
                print(string)
                print(string[::-1])
                if __name__ == "__main__":"""

params = { "max_new_tokens":256, 
              "temperature":0.1}

response_model = smr_client.invoke_endpoint(
    EndpointName=endpoint_name,
    Body=json.dumps(
        {
            "inputs": prompt,
            "parameters": params
        }
    ),
    ContentType="application/json",
)

response_model["Body"].read().decode("utf8")

In [None]:
import time
import boto3
from botocore.exceptions import ClientError

api_name = "lmi-codellama-7b-api"
api_stage = "prod"
api_region = "us-east-1"
api_lambda_name = "lmi-codellama-7b-lambda"
api_lambda_role_name = "lmi-codellama-7b-lambda-role"
api_lambda_policy_name = "lmi-codellama-7b-lambda-policy"

lambda_client = boto3.client("lambda")
iam_client = boto3.client("iam")
apigw_client = boto3.client("apigateway")

In [None]:
# Create IAM role for Lambda
try:
    lambda_role = iam_client.create_role(
        RoleName=api_lambda_role_name,
        AssumeRolePolicyDocument='{"Version": "2012-10-17","Statement": [{"Effect": "Allow","Principal": {"Service": "lambda.amazonaws.com"},"Action": "sts:AssumeRole"}]}'
    )
    print(f"Created IAM role: {api_lambda_role_name}")
except ClientError as e:
    if e.response['Error']['Code'] == 'EntityAlreadyExists':
        print(f"IAM role {api_lambda_role_name} already exists")
    else:
        raise

# Attach policy to IAM role
policy_document = {
    "Version": "2012-10-17",
    "Statement": [
        {
            "Effect": "Allow",
            "Action": [
                "logs:CreateLogGroup",
                "logs:CreateLogStream",
                "logs:PutLogEvents",
                "sagemaker:InvokeEndpoint"
            ],
            "Resource": "*"
        }
    ]
}

try:
    iam_client.put_role_policy(
        RoleName=api_lambda_role_name,
        PolicyName=api_lambda_policy_name,
        PolicyDocument=json.dumps(policy_document)
    )
    print(f"Attached policy to IAM role: {api_lambda_role_name}")
except ClientError as e:
    print(f"Error attaching policy to IAM role: {e}")


In [None]:
# Check the status of the IAM role
response = iam_client.get_role(RoleName=api_lambda_role_name)
role_arn = response["Role"]["Arn"]
print(f"IAM role ARN: {role_arn}")

In [None]:
# Zip the Lambda function
!rm -rf lambda_function.zip
!zip -r lambda_function.zip lambda_function.py

In [None]:
# Create Lambda function
zipfile = open("lambda_function.zip", "rb").read()

try:
    lambda_response = lambda_client.create_function(
        FunctionName=api_lambda_name,
        Runtime='python3.10',
        Role=lambda_role['Role']['Arn'],
        Handler='lambda_function.lambda_handler',
        Code=dict(ZipFile=zipfile),
        Timeout=30,
        Environment={
            'Variables': {
                'SAGEMAKER_ENDPOINT_NAME': endpoint_name,
                'REGION_NAME': api_region
            }
        }
    )
    print(f"Created Lambda function: {api_lambda_name}")
except ClientError as e:
    if e.response['Error']['Code'] == 'ResourceConflictException':
        print(f"Lambda function {api_lambda_name} already exists")
    else:
        raise

In [None]:
# Check the status of the Lambda function
response = lambda_client.get_function(FunctionName=api_lambda_name)
lambda_arn = response["Configuration"]["FunctionArn"]
print(f"Lambda function ARN: {lambda_arn}")

In [None]:
# Create API Gateway
try:
    api_response = apigw_client.create_rest_api(
        name=api_name,
        description='API for CodeLlama 7B model',
        endpointConfiguration={'types': ['REGIONAL']}
    )
    api_id = api_response['id']
    print(f"Created API Gateway: {api_name}")
except ClientError as e:
    print(f"Error creating API Gateway: {e}")
    api_id = None

if api_id:
    # Get API Gateway root resource ID
    resources = apigw_client.get_resources(restApiId=api_id)
    root_id = [resource for resource in resources['items'] if resource['path'] == '/'][0]['id']

    # Create API Gateway method
    apigw_client.put_method(
        restApiId=api_id,
        resourceId=root_id,
        httpMethod='POST',
        authorizationType='NONE'
    )

    # Set up API Gateway integration with Lambda
    apigw_client.put_integration(
        restApiId=api_id,
        resourceId=root_id,
        httpMethod='POST',
        type='AWS_PROXY',
        integrationHttpMethod='POST',
        uri=f"arn:aws:apigateway:{api_region}:lambda:path/2015-03-31/functions/{lambda_arn}/invocations"
    )

    # Deploy API
    apigw_client.create_deployment(
        restApiId=api_id,
        stageName=api_stage
    )

    print(f"API Gateway deployed. Endpoint URL: https://{api_id}.execute-api.{api_region}.amazonaws.com/{api_stage}")
else:
    print("Failed to create API Gateway. Skipping deployment.")

In [None]:
# Add permission for API Gateway to invoke Lambda
try:
    lambda_client.add_permission(
        FunctionName=lambda_arn,
        StatementId=f'apigateway-invoke-{api_id}',
        Action='lambda:InvokeFunction',
        Principal='apigateway.amazonaws.com',
        SourceArn=f"arn:aws:execute-api:{api_region}:{account_id}:{api_id}/*/*"
    )
    print(f"Added permission for API Gateway to invoke Lambda function")
except ClientError as e:
    if e.response['Error']['Code'] != 'ResourceConflictException':
        print(f"Error adding Lambda permission: {e}")
        # You might want to handle this error, possibly by cleaning up created resources


In [None]:
# Invoke the API Gateway using curl and parse the response
!curl -X POST -H "Content-Type: application/json" -d '{"prompt": "import argparse\ndef main(string: str):\n    print(string)\n    print(string[::-1])\n    if __name__ == \"__main__\":", "parameters": {"max_new_tokens": 256, "temperature": 0.1}}' https://46a8ty7965.execute-api.us-east-1.amazonaws.com/prod

In [None]:
# Clean up
# Delete the endpoint
sm_client.delete_endpoint(EndpointName=endpoint_name)

# Delete the endpoint configuration
sm_client.delete_endpoint_config(EndpointConfigName=endpoint_config_name)

# Delete the model
sm_client.delete_model(ModelName=model_name)

# Delete the Lambda function
lambda_client.delete_function(FunctionName=api_lambda_name)

# Detach the policy from the IAM role
iam_client.delete_role_policy(
    RoleName=api_lambda_role_name,
    PolicyName=api_lambda_policy_name
)

# Delete the IAM role
iam_client.delete_role(RoleName=api_lambda_role_name)

# Delete the API Gateway
apigw_client.delete_rest_api(restApiId=api_id)

print("Clean up complete")