## Clean your endpoint content

#### In this notebook, we will parse through the existing endpoint.json to delete all of the existing endpoints once you are done with running your respective benchmarking tests.

***If you are with running all of the tests, and want to delete the existing endpoints, run this notebook.***

In [2]:
## Import all necessary libraries
import json
import boto3
import logging
from globals import *
from utils import load_config

None of PyTorch, TensorFlow >= 2.0, or Flax have been found. Models won't be available and only tokenizers, configuration and file/data utilities can be used.


In [3]:
## Set your logger to display all of the endpoints being cleaned
logging.basicConfig(format='[%(asctime)s] p%(process)s {%(filename)s:%(lineno)d} %(levelname)s - %(message)s', level=logging.INFO)
logger = logging.getLogger(__name__)

## Load the config file to extract our endpoint.json file and its respective file path
config = load_config(CONFIG_FILE)

In [4]:
## Parse through the existing endpoint.json file
## read the list of deployed endpoints that are active
endpoint_info_list = json.loads(Path(ENDPOINT_LIST_FPATH).read_text())
logger.info(json.dumps(endpoint_info_list, indent=2))

[2024-01-18 17:33:48,333] p1001 {885729754.py:4} INFO - [
  {
    "experiment_name": "llama2-70b-chat-p4d.24xlarge-tgi-inference-2.0.1-tgi0.9.3-gpu-py39-cu118",
    "endpoint": {
      "EndpointName": "huggingface-pytorch-tgi-inference-2024-01-18-13-05-25-407",
      "EndpointArn": "arn:aws:sagemaker:us-east-1:015469603702:endpoint/huggingface-pytorch-tgi-inference-2024-01-18-13-05-25-407",
      "EndpointConfigName": "huggingface-pytorch-tgi-inference-2024-01-18-13-05-25-407",
      "ProductionVariants": [
        {
          "VariantName": "AllTraffic",
          "DeployedImages": [
            {
              "SpecifiedImage": "763104351884.dkr.ecr.us-east-1.amazonaws.com/huggingface-pytorch-tgi-inference:2.0.1-tgi0.9.3-gpu-py39-cu118-ubuntu20.04",
              "ResolvedImage": "763104351884.dkr.ecr.us-east-1.amazonaws.com/huggingface-pytorch-tgi-inference@sha256:947399ae8b3fa131fc6d2da99f56c9c41195c7ce7cbd890e1e6c0dc328d238cd",
              "ResolutionTime": "2024-01-18 13:05:26.

In [14]:
sm_client = boto3.client("sagemaker")
# Iterate over the endpoint_info_list and mark the items for deletion
for item in endpoint_info_list:   
    ep_name = item['endpoint']["EndpointName"]
    try:
        logger.info(f"Going to describing the endpoint -> {ep_name}")
        resp = sm_client.describe_endpoint(EndpointName=ep_name)
        if resp['EndpointStatus'] == 'InService':
            logger.info(f"going to delete {ep_name}")
            sm_client.delete_endpoint(EndpointName=ep_name)
            logger.info(f"deleted {ep_name}")
    except Exception as e:
        logger.error(f"error deleting endpoint={ep_name}, exception={e}")

[2024-01-18 17:45:34,267] p1001 {3637859735.py:6} INFO - Going to describing the endpoint -> huggingface-pytorch-tgi-inference-2024-01-18-13-05-25-407
[2024-01-18 17:45:34,417] p1001 {3637859735.py:13} ERROR - error deleting endpoint=huggingface-pytorch-tgi-inference-2024-01-18-13-05-25-407, exception=An error occurred (ValidationException) when calling the DescribeEndpoint operation: Could not find endpoint "huggingface-pytorch-tgi-inference-2024-01-18-13-05-25-407".
[2024-01-18 17:45:34,418] p1001 {3637859735.py:6} INFO - Going to describing the endpoint -> llama-2-13b-inf2-24xlarge-1705583487
[2024-01-18 17:45:34,579] p1001 {3637859735.py:13} ERROR - error deleting endpoint=llama-2-13b-inf2-24xlarge-1705583487, exception=An error occurred (ValidationException) when calling the DescribeEndpoint operation: Could not find endpoint "llama-2-13b-inf2-24xlarge-1705583487".
[2024-01-18 17:45:34,580] p1001 {3637859735.py:6} INFO - Going to describing the endpoint -> llama-2-13b-inf2-48xlarg