## Inference Operator PySDK E2E Expereience (JumpStart model)

<b>Prerequisite:</b> Data scientists should list clusters and set cluster context

In [None]:
from sagemaker.hyperpod.hyperpod_manager import HyperPodManager

In [None]:
#Set region 
region = "us-west-2"

In [None]:
# choose the HP cluster user works on
HyperPodManager.set_context('sagemaker-hyperpod-eks-cluster-demo-05-01', region=region)

### Select a JumpStart model from public hub

In [None]:
# Import the helper module
from jumpstart_public_hub_visualization_utils import get_all_public_hub_model_data

# Load and display SageMaker public hub models
get_all_public_hub_model_data(region="us-west-2")

### Create JumpStart model endpoint

#### Create from spec object

In [None]:
from sagemaker.hyperpod.inference.config.hp_jumpstart_endpoint_config import Model, Server,SageMakerEndpoint, TlsConfig, EnvironmentVariables
from sagemaker.hyperpod.inference.hp_jumpstart_endpoint import HPJumpStartEndpoint
import yaml
import time

<b>Note:</b> We auto-generate config class definitions above using script, such as `Model`, `Server`, `SageMakerEndpoint`. This is based on [Inference CRD file](https://code.amazon.com/packages/AWSCrescendoInferenceOperator/blobs/mainline/--/dist/config/crd/inference.sagemaker.aws.amazon.com_jumpstartmodels.yaml).

In [None]:
# create configs
model=Model(
    model_id='deepseek-llm-r1-distill-qwen-1-5b',
    model_version='2.0.4',
)
server=Server(
    instance_type='ml.g5.8xlarge',
)
endpoint_name=SageMakerEndpoint(name='deepsek7bsme-testing-jumpstart-7-1')
tls_config=TlsConfig(tls_certificate_output_s3_uri='s3://tls-bucket-inf1-beta2')

# create spec
js_endpoint=HPJumpStartEndpoint(
    model=model,
    server=server,
    sage_maker_endpoint=endpoint_name,
    tls_config=tls_config,
)

In [None]:
# use spec to deploy
js_endpoint.create()

In [None]:
# poll status
t = 0
timeout = 600  # 600 seconds timeout  
interval = 15  # poll every 15 seconds

while t < timeout:
    # use refresh to fetch latest status
    js_endpoint.refresh()

    try:
        # deployment status will be available immediately
        deployment_status = js_endpoint.status.deploymentStatus.deploymentObjectOverallState
        if deployment_status== 'DeploymentFailed':
            print('Deployment failed!')
            break

        # endpoint status will appear be available from refresh() at some point
        endpoint_status = js_endpoint.status.endpoints.sagemaker.state
        if endpoint_status == 'CreationCompleted':
            print('Endpoint is available!')
            break
    except:
        pass

    time.sleep(interval)
    t += interval
    print('Refreshing instance status...')

if t >= timeout:
    print('Endpoint creation timed out!')

In [None]:
js_endpoint.status.endpoints.sagemaker.state

In [None]:
# print endpoint in yaml
def print_yaml(endpoint):
    print(yaml.dump(endpoint.model_dump(exclude_none=True)))

In [None]:
# output is similar to kubectl get jumpstartmodels
endpoint_list = HPJumpStartEndpoint.list()
print_yaml(endpoint_list[0])

In [None]:
# output is similar to kubectl describe jumpstartmodel
endpoint = HPJumpStartEndpoint.get(name='deepseek-llm-r1-distill-qwen-1-5b')
print_yaml(endpoint)

### Invoke endpoint

In [None]:
# invoke
data='{"inputs":"What is the capital of USA?"}'

endpoint.invoke(body=data).body.read()

In [None]:
# get operator logs
print(js_endpoint.get_operator_logs(since_hours=1))

# get specific pod log
# js_endpoint.get_logs(pod='pod-name')

In [None]:
# delete endpoint
endpoint.delete()