## Email Name: Prepare Training Data & Finetune Mistral-7B

In this notebook, we will prepare the training data and perform instruction fine-tuning the `Mistral-7B` LLM.

- Training data is formatted in JSON lines (`.jsonl`) format, where each line is a dictionary representing a single data sample. All training data must be in a single folder, however it can be saved in multiple `.jsonl` files. 
- The training folder can also contain a `template.json` file describing the input and output formats.

### Setup

In [2]:
import sagemaker
import boto3
sess = sagemaker.Session()

sagemaker_session_bucket='sagemaker-sigparser-caylent-mlops'
if sagemaker_session_bucket is None and sess is not None:
    sagemaker_session_bucket = sess.default_bucket()

try:
    role = sagemaker.get_execution_role()
except ValueError:
    iam = boto3.client('iam')
    role = iam.get_role(RoleName='sagemaker_execution_role')['Role']['Arn']

sess = sagemaker.Session(default_bucket=sagemaker_session_bucket)

print(f"sagemaker role arn: {role}")
print(f"sagemaker session region: {sess.boto_region_name}")
print(f"sagemaker default bucket: {sess.default_bucket()}")

sagemaker.config INFO - Not applying SDK defaults from location: /etc/xdg/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /root/.config/sagemaker/config.yaml
sagemaker role arn: arn:aws:iam::818442660361:role/service-role/AmazonSageMaker-ExecutionRole-20231103T203000
sagemaker session region: us-east-1
sagemaker default bucket: sagemaker-sigparser-caylent-mlops


In [3]:
import pandas as pd
import sys
import boto3
sys.path.append('..')
sys.path.append('../..')
from utils.s3_helper import read_s3_csv_to_dataframe
from prompts.email_names_v2 import prompt_email_names
from sagemaker.s3 import S3Uploader
import json
import datetime
from sagemaker import hyperparameters

### Load Train Data

In [23]:
s3_input_prefix = 'data/email-names/input/raw'
data_timestamp = '2024-04-08'
file_name = f'sp_llm_emailname_training-apr8.csv'

s3_data_path = f"{s3_input_prefix}/{file_name}"

In [24]:
train_df = read_s3_csv_to_dataframe(sess.default_bucket(), s3_data_path)

train_df.shape

(21150, 8)

In [25]:
train_df.head()

Unnamed: 0,Test Group,Email Address,Display Name,First Name,Middle Name,Last Name,Name Prefix,Name Suffix
0,TRAINING,a_anderson@allianca.com,Alex Anderson,Alex,,Anderson,,
1,TRAINING,a_anderson@alliant.com,Alex - Alliant Insurance Ltd. Anderson,Alex,,Anderson,,
2,TRAINING,a_bell@carpenterfarraday.com,ANNA L. BELL,Anna,L.,Bell,,
3,TRAINING,a_bodnar@orquest.com,"BODNAR, Akshay (external)",Akshay,,Bodnar,,
4,TRAINING,a_brown@onesteamboatplace.com,BROWN Adam,Adam,,Brown,,


### Train Data Preprocessing

In [26]:
# finding how many NaN values are there in each column
nan_count_per_column = train_df.isna().sum()

print(f"NaN count per column: {nan_count_per_column}")

NaN count per column: Test Group           0
Email Address        0
Display Name         0
First Name           1
Middle Name      18884
Last Name            0
Name Prefix      21039
Name Suffix      21017
dtype: int64


In [27]:
# fill NaN values with empty string
train_df.fillna("", inplace=True)

print(f"NaN count per column after replacement: {train_df.isna().sum()}")

NaN count per column after replacement: Test Group       0
Email Address    0
Display Name     0
First Name       0
Middle Name      0
Last Name        0
Name Prefix      0
Name Suffix      0
dtype: int64


In [28]:
train_df.head()

Unnamed: 0,Test Group,Email Address,Display Name,First Name,Middle Name,Last Name,Name Prefix,Name Suffix
0,TRAINING,a_anderson@allianca.com,Alex Anderson,Alex,,Anderson,,
1,TRAINING,a_anderson@alliant.com,Alex - Alliant Insurance Ltd. Anderson,Alex,,Anderson,,
2,TRAINING,a_bell@carpenterfarraday.com,ANNA L. BELL,Anna,L.,Bell,,
3,TRAINING,a_bodnar@orquest.com,"BODNAR, Akshay (external)",Akshay,,Bodnar,,
4,TRAINING,a_brown@onesteamboatplace.com,BROWN Adam,Adam,,Brown,,


In [29]:
# cols to check for empty string distribution
columns_to_check = ['First Name', 'Middle Name', 'Last Name', 'Name Prefix', 'Name Suffix']

# calculate the distribution (%) of empty strings "" in specified columns
empty_string_distribution = train_df[columns_to_check].map(lambda x: x == "").mean() * 100

print("Distribution (%) of empty strings in specified columns:")
print(empty_string_distribution)

Distribution (%) of empty strings in specified columns:
First Name      0.004728
Middle Name    89.286052
Last Name       0.000000
Name Prefix    99.475177
Name Suffix    99.371158
dtype: float64


### Prepare Train Data

#### Configure Train Data: System Prompt, Instruction, Context and Response

In [30]:
system_prompt = prompt_email_names["system_prompt"]
instruction = prompt_email_names["instruction"]
prompt_version = prompt_email_names["prompt_version"]
prompt_type = prompt_email_names["prompt_type"]
print(f'prompt_version: {prompt_version}')

prompt_version: version-2-Ryan


In [31]:
def get_context(email_address, display_name):
    email_address = email_address.strip()
    display_name = display_name.strip()
    
    context_input_str = f"""Input:"""
    context_data = f"""{{"Email Address": "{email_address}", "Display Name": "{display_name}"}}"""
    context = context_input_str.strip() + context_data.strip()
    
    return context


contexts = train_df.apply(lambda x: get_context(x['Email Address'], x['Display Name']), axis=1)

In [32]:
contexts[0]

'Input:{"Email Address": "a_anderson@allianca.com", "Display Name": "Alex Anderson"}'

In [33]:
def get_response(first_name, mid_name, last_name, name_prefix, name_suffix):
    first_name = first_name.strip()
    mid_name = mid_name.strip()
    last_name = last_name.strip()
    name_prefix = name_prefix.strip()
    name_suffix = name_suffix.strip()
    
    output = f'''\nFirst Name: {first_name}\nMiddle Name: {mid_name}\nLast Name: {last_name}\nName Prefix: {name_prefix}\nName Suffix: {name_suffix}'''
    return output

responses = train_df.apply(lambda x: get_response(x['First Name'], x['Middle Name'], x['Last Name'], x['Name Prefix'], x['Name Suffix']), axis=1)

In [34]:
responses[0]

'\nFirst Name: Alex\nMiddle Name: \nLast Name: Anderson\nName Prefix: \nName Suffix: '

In [35]:
prompt_train_df = pd.DataFrame({'system_prompt':system_prompt,
                         'instruction':instruction,
                          'context': contexts,
                         'response':responses
                        })
prompt_train_df.head()

Unnamed: 0,system_prompt,instruction,context,response
0,You are a highly skilled assistant specializin...,Please extract the email name components from ...,"Input:{""Email Address"": ""a_anderson@allianca.c...",\nFirst Name: Alex\nMiddle Name: \nLast Name: ...
1,You are a highly skilled assistant specializin...,Please extract the email name components from ...,"Input:{""Email Address"": ""a_anderson@alliant.co...",\nFirst Name: Alex\nMiddle Name: \nLast Name: ...
2,You are a highly skilled assistant specializin...,Please extract the email name components from ...,"Input:{""Email Address"": ""a_bell@carpenterfarra...",\nFirst Name: Anna\nMiddle Name: L.\nLast Name...
3,You are a highly skilled assistant specializin...,Please extract the email name components from ...,"Input:{""Email Address"": ""a_bodnar@orquest.com""...",\nFirst Name: Akshay\nMiddle Name: \nLast Name...
4,You are a highly skilled assistant specializin...,Please extract the email name components from ...,"Input:{""Email Address"": ""a_brown@onesteamboatp...",\nFirst Name: Adam\nMiddle Name: \nLast Name: ...


In [36]:
# dump train JSONL locally
output_data_dir = '../data'
train_filename = f'{output_data_dir}/mistral-7b-fine-tuning-dataset-{prompt_version}-{file_name}.jsonl'
with open(train_filename, "w") as f:
    f.write(prompt_train_df.to_json(orient='records', lines=True, force_ascii=False))

#### Upload Train JSONL and Template to S3

In [37]:
template = {
    "prompt": "{system_prompt}\n\n### Instruction:\n{instruction}\n\n### Input:\n{context}",
    "completion": "{response}",
}

with open(f'{output_data_dir}/template.json', 'w') as f:
    json.dump(template, f)

In [21]:
timestamp = datetime.datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
model = 'Mistral-7B'
s3_train_input_prefix = 'data/email-names/input/training'

object_name = f'{s3_train_input_prefix}/{model}/{timestamp}'

# create the file name as per the task: name-parse, email-signature
file_name = train_filename
train_data_location = f's3://{sess.default_bucket()}/{object_name}'

S3Uploader.upload(file_name, train_data_location)
S3Uploader.upload("template.json", train_data_location)
                  
print(f"Training data uploaded to: s3://{sess.default_bucket()}/{object_name}")

sagemaker.config INFO - Not applying SDK defaults from location: /etc/xdg/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /root/.config/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /etc/xdg/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /root/.config/sagemaker/config.yaml
Training data uploaded to: s3://sagemaker-sigparser-caylent-mlops/data/email-names/input/training/Mistral-7B/2024-04-08_18-48-25


### Model Training

In [4]:
model_id, model_version = "huggingface-llm-mistral-7b", "2.3.0"

#### Training Hyperparameters

- We can use the default hyperparameters but overwrite if needed.
- Note: for now, we will not use `LoRA` for fine-tuning but this can be changed later on as well.

In [23]:
mistral_hyperparameters = hyperparameters.retrieve_default(model_id=model_id, model_version=model_version)

print(f'Default Mistral Training Hyperparameters: {mistral_hyperparameters}')

Default Mistral Training Hyperparameters: {'peft_type': 'None', 'instruction_tuned': 'True', 'chat_dataset': 'False', 'epoch': '1', 'learning_rate': '6e-06', 'lora_r': '64', 'lora_alpha': '16', 'lora_dropout': '0', 'bits': '16', 'double_quant': 'True', 'quant_type': 'nf4', 'per_device_train_batch_size': '2', 'per_device_eval_batch_size': '8', 'add_input_output_demarcation_key': 'True', 'warmup_ratio': '0.1', 'train_from_scratch': 'False', 'fp16': 'False', 'bf16': 'True', 'evaluation_strategy': 'steps', 'eval_steps': '20', 'gradient_accumulation_steps': '8', 'logging_steps': '8', 'weight_decay': '0.2', 'load_best_model_at_end': 'True', 'max_train_samples': '-1', 'max_val_samples': '-1', 'seed': '10', 'max_input_length': '-1', 'validation_split_ratio': '0.2', 'train_data_split_seed': '0', 'preprocessing_num_workers': 'None', 'max_steps': '-1', 'gradient_checkpointing': 'True', 'early_stopping_patience': '3', 'early_stopping_threshold': '0.0', 'adam_beta1': '0.9', 'adam_beta2': '0.999', '

#### Train Model

In [None]:
%%time
from sagemaker.jumpstart.estimator import JumpStartEstimator

output_s3_path = f"s3://{sess.default_bucket()}/model/email-names/{model}/{timestamp}"
estimator = JumpStartEstimator(
    model_id=model_id,
    model_version=model_version,
    environment={"accept_eula": "true"},
    disable_output_compression=True, 
    instance_type = "ml.g5.24xlarge",
    output_path=output_s3_path,
    metric_definitions=[{'Name': 'train:loss', 'Regex': "'loss': ([0-9]+\.[0-9]+)"}]
)

# we can set the hyperparameters below:
estimator.set_hyperparameters(epoch="2", max_input_length="1024")
estimator.fit({"training": train_data_location}, logs=True)

INFO:sagemaker:Creating training-job with name: hf-llm-mistral-7b-2024-04-08-18-50-53-623


2024-04-08 18:50:53 Starting - Starting the training job...
2024-04-08 18:51:07 Pending - Training job waiting for capacity...
2024-04-08 18:51:34 Pending - Preparing the instances for training......
2024-04-08 18:52:30 Downloading - Downloading input data.......................................
2024-04-08 18:59:14 Training - Training image download completed. Training in progress.[34mbash: cannot set terminal process group (-1): Inappropriate ioctl for device[0m
[34mbash: no job control in this shell[0m
[34m2024-04-08 18:59:16,381 sagemaker-training-toolkit INFO     Imported framework sagemaker_pytorch_container.training[0m
[34m2024-04-08 18:59:16,435 sagemaker-training-toolkit INFO     No Neurons detected (normal if no neurons installed)[0m
[34m2024-04-08 18:59:16,444 sagemaker_pytorch_container.training INFO     Block until all host DNS lookups succeed.[0m
[34m2024-04-08 18:59:16,446 sagemaker_pytorch_container.training INFO     Invoking user training script.[0m
[34m2024

#### Training Job Analytics

In [29]:
from sagemaker import TrainingJobAnalytics

training_job_name = estimator.latest_training_job.job_name

training_analytics_df = TrainingJobAnalytics(training_job_name=training_job_name).dataframe()
training_analytics_df.head(10)

sagemaker.config INFO - Not applying SDK defaults from location: /etc/xdg/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /root/.config/sagemaker/config.yaml


Unnamed: 0,timestamp,metric_name,value
0,0.0,train:loss,0.666
1,780.0,train:loss,0.0825
2,1980.0,train:loss,0.073
3,2700.0,train:loss,0.0927
4,3420.0,train:loss,0.0704
5,4680.0,train:loss,0.0658
6,5400.0,train:loss,0.0625
7,6600.0,train:loss,0.0609
8,7320.0,train:loss,0.0607
9,8100.0,train:loss,0.0594


### Copying model files to folder 'model-artifacts' (for deployment purposes only)

In [14]:
s3_client = boto3.client('s3')
#source_path can either be the s3 output location where the model artifacts are stored which is output_s3_path in this notebook, or it can be hard-coded
#source_path = output_s3_path 
source_path = 's3://sagemaker-sigparser-caylent-mlops/model/email-names/Mistral-7B/2024-04-01_06-04-42/hf-llm-mistral-7b-2024-04-01-06-20-01-214/'
source_parts = source_path[5:].split('/', 1)
source_bucket = source_parts[0]
source_prefix = source_parts[1]
destination_prefix = 'model-artifacts/'+ source_prefix.split('model/email-names/')[1]

folders_to_copy = ['debug-output', 'profiler-output', 'output']
for folder in folders_to_copy:
    #List all objects in the source directory containing all three output folders after training
    paginator = s3_client.get_paginator('list_objects_v2')
    for result in paginator.paginate(Bucket=source_bucket, Prefix=source_prefix + folder):
        if 'Contents' in result:
            for obj in result['Contents']:
                key = obj['Key']
                new_key = key.replace(source_prefix, destination_prefix, 1)
                s3_client.copy_object(
                    Bucket=source_bucket,
                    CopySource={'Bucket': source_bucket, 'Key': key},
                    Key=new_key
                )

### Deploy Trained Model Endpoint

In [30]:
finetuned_predictor = estimator.deploy()

INFO:sagemaker.jumpstart:Using model 'huggingface-llm-mistral-7b' with version '2.3.0'. You can upgrade to version '2.3.1' to get the latest model specifications. Note that models may have different input/output signatures after a major version upgrade.
INFO:sagemaker.jumpstart:No instance type selected for inference hosting endpoint. Defaulting to ml.g5.2xlarge.
INFO:sagemaker.jumpstart:No instance type selected for inference hosting endpoint. Defaulting to ml.g5.2xlarge.
INFO:sagemaker:Creating model with name: hf-llm-mistral-7b-2024-04-09-14-04-34-438
INFO:sagemaker:Creating endpoint-config with name hf-llm-mistral-7b-2024-04-09-14-04-34-436
INFO:sagemaker:Creating endpoint with name hf-llm-mistral-7b-2024-04-09-14-04-34-436


------------------------------!

In [28]:
# test finetuned model

input_output_demarkation_key = "\n\n### Response:\n"
context = contexts[1]

payload = {
    "inputs": template["prompt"].format(
        system_prompt=system_prompt, instruction=instruction, context=context
    )
    + input_output_demarkation_key,
    "parameters": {"max_new_tokens": 100, "temperature":0.1, 'top_p':0.1},
}

finetuned_response = finetuned_predictor.predict(payload, custom_attributes="accept_eula=true")

finetuned_response

[{'generated_text': '\nFirst Name: Akshay\nMiddle Name: \nLast Name: Bodnar\nName Prefix: \nName Suffix: \n\n \nFirst Name: Akshay\nMiddle Name: \nLast Name: Bodnar\nName Prefix: \nName Suffix: \n\n \nFirst Name: Akshay\nMiddle Name: \nLast Name: Bodnar\nName Prefix: \nName Suffix'}]

### Incremental training

In [23]:
from sagemaker.jumpstart.estimator import Estimator

incremental_train_data_location='s3://sagemaker-sigparser-caylent-mlops/data/email-names/input/training/Mistral-7B/2024-05-01_10-31-25/mistral-7b-incremental-training-dataset-sp_llm_emailname_training-apr8.jsonl'
timestamp = datetime.datetime.now().strftime("%Y-%m-%d_%H-%M-%S") 
model = 'Mistral-7B'
output_s3_path = f"s3://{sess.default_bucket()}/model/email-names/{model}/{timestamp}"
last_training_job = 'hf-llm-mistral-7b-2024-04-08-18-50-53-623'

In [24]:
#Create a SageMaker client
sagemaker_client = boto3.client('sagemaker')
#Retrieve all details about the previous training job. Several items will be picked up from the resulting JSON object to create the estimator in the code block below.
training_job_info = sagemaker_client.describe_training_job(TrainingJobName=last_training_job)

In [25]:
estimator = Estimator(
    model_id=model_id,
    model_version=model_version,
    role=role,
    environment={"accept_eula": "true"},
    disable_output_compression=True, 
    instance_type = "ml.g5.24xlarge",
    instance_count=1,
    image_uri=training_job_info['AlgorithmSpecification']['TrainingImage'], #Previous training job's image uri
    model_uri=training_job_info['ModelArtifacts']['S3ModelArtifacts'], #Trained model's S3 uri (tar.gz file)
    entry_point="transfer_learning.py",
    source_dir=training_job_info['InputDataConfig'][2]['DataSource']['S3DataSource']['S3Uri'], #Source directory containing transfer_learning.py
    output_path=output_s3_path,
    metric_definitions=[{'Name': 'train:loss', 'Regex': "'loss': ([0-9]+\.[0-9]+)"}]
)

# we can set the hyperparameters below:
estimator.set_hyperparameters(epoch="1", max_input_length="1024")
estimator.fit({"training": incremental_train_data_location}, logs=True, job_name='mistral-incremental-training')

INFO:sagemaker:Creating training-job with name: mistral-incremental-training


2024-05-01 18:09:05 Starting - Starting the training job...
2024-05-01 18:09:33 Pending - Training job waiting for capacity...
2024-05-01 18:09:57 Pending - Preparing the instances for training......
2024-05-01 18:10:50 Downloading - Downloading input data.....................
2024-05-01 18:14:26 Downloading - Downloading the training image...
2024-05-01 18:14:56 Training - Training image download completed. Training in progress........[34mbash: cannot set terminal process group (-1): Inappropriate ioctl for device[0m
[34mbash: no job control in this shell[0m
[34m2024-05-01 18:16:07,406 sagemaker-training-toolkit INFO     Imported framework sagemaker_pytorch_container.training[0m
[34m2024-05-01 18:16:07,459 sagemaker-training-toolkit INFO     No Neurons detected (normal if no neurons installed)[0m
[34m2024-05-01 18:16:07,468 sagemaker_pytorch_container.training INFO     Block until all host DNS lookups succeed.[0m
[34m2024-05-01 18:16:07,470 sagemaker_pytorch_container.train

#### The model artifacts for the above training job are stored in 's3://sagemaker-sigparser-caylent-mlops/model/email-names/Mistral-7B/2024-05-01_18-08-55/mistral-incremental-training/'

### Cleanup

In [15]:
# delete the SageMaker endpoint

finetuned_predictor.delete_model()
finetuned_predictor.delete_endpoint()

----------------------------------------------------------------------------------------------------------------------------

## Miscellaneous Steps

### Default Instance Types

Review sagemaker default instance type given the model id and version

In [40]:
from sagemaker import instance_types

# check default instance type
instance_type = instance_types.retrieve_default(
    model_id=model_id,
    model_version=model_version,
    scope="inference")
print(instance_type)

ml.g5.2xlarge


### Recommended Instance Types

Review sagemaker recommended instance types for inference, given the model id and version

In [42]:
# check all the recommended instance types for inference
instance_type = instance_types.retrieve(
    model_id=model_id,
    model_version=model_version,
    scope="inference")
print(instance_type)

['ml.g5.2xlarge', 'ml.g5.4xlarge', 'ml.g5.8xlarge', 'ml.g5.16xlarge']


### Deploy Uncompressed Model

We can deploy the uncompressed model, which can be used for inferencing. In order to deploy the uncompressed finetuned model, we need the following:
- Inference Image
- Model
- Endpoint Config
- Endpoint

In [21]:
model_id, model_version = "huggingface-llm-mistral-7b", "2.3.0"

#### Inference Image

Fetch the inference image based on the model id and version

In [22]:
# Must specify JumpStart `model_id` and `model_version` when getting specs for JumpStart models.
from sagemaker import image_uris
inference_image_uri = image_uris.retrieve(
    region=None,
    framework=None,  # automatically inferred from model_id
    image_scope="inference",
    model_id=model_id,
    model_version=model_version,
    instance_type="ml.g5.2xlarge",
)
print("inference_image_uri: {}".format(inference_image_uri))

inference_image_uri: 763104351884.dkr.ecr.us-east-1.amazonaws.com/huggingface-pytorch-tgi-inference:2.1.1-tgi1.4.0-gpu-py310-cu121-ubuntu20.04


#### Model

Create model using the inference image and the model artifacts

In [23]:
# create model using the artifacts data
model_name = 'mistral-7b-email-name-finetuned'
sagemaker_role = role
container = inference_image_uri
sagemaker_client = boto3.client('sagemaker')

create_model_response = sagemaker_client.create_model(
    ModelName = model_name,
    ExecutionRoleArn = sagemaker_role,
    PrimaryContainer = {
        "Image": container,
        "ModelDataSource": {
            "S3DataSource": {
                "S3Uri": "s3://sagemaker-sigparser-caylent-mlops/model/email-names/Mistral-7B/2024-04-08_18-48-25/hf-llm-mistral-7b-2024-04-08-18-50-53-623/output/model/", 
                "S3DataType": "S3Prefix",
                "CompressionType": "None",
                "ModelAccessConfig": {
                    "AcceptEula": True
                }
            },
        },
        "Environment": {
            'HF_MODEL_ID': '/opt/ml/model'
        },
    },
)

In [24]:
print(create_model_response)

{'ModelArn': 'arn:aws:sagemaker:us-east-1:818442660361:model/mistral-7b-email-name-finetuned', 'ResponseMetadata': {'RequestId': '9d06df70-ea90-4531-8bb3-f01c0cdc034e', 'HTTPStatusCode': 200, 'HTTPHeaders': {'x-amzn-requestid': '9d06df70-ea90-4531-8bb3-f01c0cdc034e', 'content-type': 'application/x-amz-json-1.1', 'content-length': '93', 'date': 'Tue, 16 Apr 2024 21:58:27 GMT'}, 'RetryAttempts': 0}}


#### Endpoint Config

Create the endpoint config: specify the instance count, type, model name, etc.

In [25]:
endpoint_name = 'mistral-7b-email-name-finetuned'
endpoint_config_name = f'{endpoint_name}-config'

create_endpoint_config_api_response = sagemaker_client.create_endpoint_config(
                                            EndpointConfigName=endpoint_config_name,
                                            ProductionVariants=[
                                                {
                                                    'VariantName': 'dev',
                                                    'ModelName': model_name,
                                                    'InitialInstanceCount': 1,
                                                    'InstanceType': 'ml.g5.2xlarge'
                                                },
                                            ]
                                       )

#### Endpoint

Create the model endpoint by specifying the endpoint name and config

In [26]:
create_endpoint_api_response = sagemaker_client.create_endpoint(
                                    EndpointName=endpoint_name,
                                    EndpointConfigName=endpoint_config_name,
                                )

In [27]:
print(f'create_endpoint API response: {create_endpoint_api_response}')

create_endpoint API response: {'EndpointArn': 'arn:aws:sagemaker:us-east-1:818442660361:endpoint/mistral-7b-email-name-finetuned', 'ResponseMetadata': {'RequestId': '4de0ecba-2049-4569-8373-b11b40ad6b06', 'HTTPStatusCode': 200, 'HTTPHeaders': {'x-amzn-requestid': '4de0ecba-2049-4569-8373-b11b40ad6b06', 'content-type': 'application/x-amz-json-1.1', 'content-length': '99', 'date': 'Tue, 16 Apr 2024 21:58:32 GMT'}, 'RetryAttempts': 0}}


#### Cleanup

Delete the deployed endpoint and the related resources

In [20]:
# Delete endpoint
sagemaker_client.delete_endpoint(EndpointName=endpoint_name)

# Delete endpoint configuration
sagemaker_client.delete_endpoint_config(EndpointConfigName=endpoint_config_name)

# Delete model
sagemaker_client.delete_model(ModelName=model_name)

{'ResponseMetadata': {'RequestId': '30e979c4-efa2-440e-9ac7-c3dddccc517c',
  'HTTPStatusCode': 200,
  'HTTPHeaders': {'x-amzn-requestid': '30e979c4-efa2-440e-9ac7-c3dddccc517c',
   'content-type': 'application/x-amz-json-1.1',
   'date': 'Tue, 16 Apr 2024 21:58:12 GMT',
   'content-length': '0'},
  'RetryAttempts': 0}}