## Evo 2 [WORK IN PROGRESS, NOT TESTED]

This notebook shows how to deploy Evo 2 as a `Predictor` using the Sagemaker API

In [None]:
import boto3
import json
import sagemaker
from sagemaker.model import Model
from sagemaker import get_execution_role

In [None]:
role = get_execution_role()  # Or you can specify the role ARN directly
role

## Update IAM role
In order to pull the Docker image, below, the notebook's IAM role may need to be updated. Using the value (shown above) of the `role` variable, use the IAM console to ensure that this role has the permission `AmazonEC2ContainerRegistryPullOnly`

In [None]:
region = 'us-east-1'  # Change to your AWS region, must be same region as ECR image
sagemaker_session = sagemaker.Session(boto3.session.Session(region_name=region))

# ECR Image URI (URI of already registered image)
ecr_image = "111918798052.dkr.ecr.us-east-1.amazonaws.com/evo2"  # Your ECR image URI

# Create Model
model_name = "evo2-40b-model-p5e-sm-docker"
endpoint_name = "evo2-40b-endpoint-p5e-sm-docker"

# Environment variables setup - Required for model loading inside the container
environment = {
    'DEFAULT_MODEL_NAME': 'evo2_40b',  # base model
    'MODEL_PATH': '/opt/ml/model'     # SageMaker's default model path
}

# Create model object
model = Model(
    image_uri=ecr_image,
    model_data=None,  # Set to None when model artifacts are included in the image
    role=role,
    name=model_name,
    env=environment,
    sagemaker_session=sagemaker_session
)

# Deploy endpoint
predictor = model.deploy(
    initial_instance_count=1,
    instance_type='ml.p5e.48xlarge',  # Choose appropriate GPU instance type
    endpoint_name=endpoint_name,
    tags=[{'Key': 'Project', 'Value': 'evo2'}]
)

In [None]:
# Create a wrapper to match the original API
class Evo2Predictor:
    def __init__(self, predictor):
        self.predictor = predictor
        self.predictor.serializer = sagemaker.serializers.JSONSerializer()
        self.predictor.deserializer = sagemaker.deserializers.JSONDeserializer()
    
    def generate(self, input_seqs, n_tokens=500, temperature=1.0, model_name=None):
        """
        Call the SageMaker endpoint with the same API as the local Evo2 model
        
        Args:
            input_seqs (list): List of input sequences
            n_tokens (int): Number of tokens to generate
            temperature (float): Temperature for sampling
            model_name (str, optional): The model name to use. If None, uses the default model.
        """
        payload = {
            'input_seqs': input_seqs,
            'n_tokens': n_tokens,
            'temperature': temperature
        }
        
        # Add model_name to payload if specified
        if model_name:
            payload['model_name'] = model_name

        response = self.predictor.predict(payload)
        return response


# Wrap the predictor with our adapter
evo2_predictor = Evo2Predictor(predictor)

# Test with example input
input_seqs = [
    "GAATAGGAACAGCTCCGGTCTACAGCTCCCAGCGTGAGCGACGCAGAAGACGGTGATTTCTGCATTTCCATCTGAGGTACCGGGTTCATCTCACTAGGGAGTGCCAGACAGTGGGCGCAGGCCAGTGTGTGTGCGCACCGTGCGCGAGCCGAAGCAGGGCGAGGCATTGCCTCACCTGGGAAGCGCAAGGGGTCAGGGAGTTCCCTTTCCGAGTCAAAGAAAGGG",
]

# Example of using the basic model:
print("Using default model:")
generations = evo2_predictor.generate(
    input_seqs,
    n_tokens=500,
    temperature=1.0
)
print(generations)