In [None]:
# %pip install -U sagemaker
!pip install scikit-learn==1.2.1

### Create Bucket

Bucket Name Generator:

In [None]:
import string
import random

def bucket_generator():

    str = ''.join(random.choice(string.ascii_uppercase + string.digits) for i in range(6))
    rand_bucket = 'MLE_Crisis'+str
    return rand_bucket

In [None]:
import boto3
import pandas as pd
import numpy as np
import os

# Create an S3 client
s3_client = boto3.client('s3')

# Specify a unique bucket name
bucket_name = bucket_generator()
prefix = "model"

Create S3 Bucket:

In [None]:
# Create the S3 bucket
try:
    # Create the S3 bucket
    s3_client.create_bucket(Bucket=bucket_name)
    print("Bucket created successfully!")
except ClientError as e:
    # Check if the error is due to bucket already existing
    error_code = e.response['Error']['Code']
    if error_code == 'BucketAlreadyOwnedByYou':
        print("Bucket already exists. Continuing with the existing bucket.")
    else:
        print("An error occurred while creating the bucket:", error_code)
        raise

Import Data:

In [None]:
# Read the local file as a DataFrame using pandas
# Specify the local file path and desired S3 object key
local_file_path = 'data.csv'
df = pd.read_csv(local_file_path)
df = df[['target','text']]
df.head()

Upload to S3:

In [None]:
# Create a sagemaker session to upload data to S3
import sagemaker
from sklearn.model_selection import train_test_split
from sagemaker.inputs import TrainingInput

sagemaker_session = sagemaker.Session()
# batch_1, batch_2 = train_test_split(df, test_size=0.2, random_state=2023)

# Save the train and test datasets to local files
# batch_1.to_csv('batch_1.csv', index=False)
# batch_2.to_csv('batch_2.csv', index=False)
batch_1_input_path = sagemaker_session.upload_data("batch_1.csv", bucket_name, os.path.join('batch_1'))
batch_2_input_path = sagemaker_session.upload_data("batch_2.csv", bucket_name, os.path.join('batch_2'))

print(batch_1_input_path)
print(batch_2_input_path)

### Model Training

In [None]:
# Use the current execution role for training. It needs access to S3
role = sagemaker.get_execution_role()
print(role)

Define Model Training Function:

In [None]:
from sagemaker.sklearn import SKLearn

def train_model(train_script, dependencies_script, batch_path):
    # Set up the SKLearn estimator with dependencies
    sk_estimator = SKLearn(
        entry_point=train_script,
        dependencies=dependencies_script,
        role=role,
        instance_count=1,
        instance_type="ml.c5.xlarge",
        framework_version="1.2-1",
        script_mode=True,
        py_version='py3',
        sagemaker_session=sagemaker_session,
        output_path="s3://{}/{}".format(bucket_name, prefix),
        base_job_name= "sagemaker-crisis-detection",
        code_location= "s3://{}/{}".format(bucket_name, "jobs")
    
    )

    # Train the model
    print(batch_path)
    s3_input = TrainingInput(batch_path)
    sk_estimator.fit({'train': s3_input}, wait=False)

    return sk_estimator


Base Model Training:

In [None]:
base_estimator = train_model('train.py', ['utils.py'], batch_1_input_path)

model_data = base_estimator.model_data
image_uri = base_estimator.image_uri
model_role = base_estimator.role

print(f"Model Data: {model_data}\nImage URI: {image_uri}\nModel Role: {model_role}")

Load Model:


In [None]:
import joblib
import tarfile

def load_model(bucket_dir, model_ver):
    s3_client.download_file(bucket_dir, model_ver.split('s3://{}/'.format(bucket_dir))[1], 'model.tar.gz')
    # Extract the model file from the tar.gz archive
    with tarfile.open('model.tar.gz', 'r:gz') as tar:
        tar.extractall('.')
        
    # Load the trained model
    model = joblib.load('model.joblib')

    return model

In [None]:
curr_model = load_model(bucket_name)
print(curr_model)

In [None]:
curr_model.named_steps['clf'].partial_fit(X_train, y_train)

Incremental Training:

In [None]:
incremental_estimator = train_model('incremental.py', ['utils.py'], batch_2_input_path)

new_model_data = incremental_estimator.model_data
new_image_uri = incremental_estimator.image_uri
new_model_role = incremental_estimator.role

print(f"Model Data: {new_model_data}\nImage URI: {new_image_uri}\nModel Role: {new_model_role}")


Deploy Serverless Endpoint:

In [None]:
from sagemaker.serverless import ServerlessInferenceConfig

# Create an empty ServerlessInferenceConfig object to use default values
serverless_config = ServerlessInferenceConfig()

In [None]:
# Specify MemorySizeInMB and MaxConcurrency in the serverless config object
serverless_config = ServerlessInferenceConfig(
  memory_size_in_mb=1024,
  max_concurrency=10,
)

In [None]:
estimator = base_estimator

sk_predictor = serverless_estimator.deploy(serverless_inference_config=serverless_config)

Check list of endpoints:

In [None]:
# Create a Boto3 SageMaker client
sagemaker_client = boto3.client('sagemaker')

# List the endpoints
response = sagemaker_client.list_endpoints()

# Iterate through the endpoints and print their names
for endpoint in response['Endpoints']:
    print(endpoint['EndpointName'])

In [None]:
# import json

# client = sagemaker_session.sagemaker_runtime_client

# # Define the input data in the desired format
# input_data = {"Input": ["This is a disaster","Hello world"]}

# # Convert the input data to JSON payload
# payload = json.dumps(input_data)

# # Invoke the endpoint to get the prediction
# response = client.invoke_endpoint(
#     EndpointName='sagemaker-crisis-detection-2023-06-04-05-43-17-303',
#     ContentType='application/json',
#     Body=payload
# )

# # Parse the prediction response
# response_body = response['Body'].read().decode('utf-8')
# prediction_result = json.loads(response_body)['Output']
# print(response_body)


Endpoint Cleanup:

In [None]:
for endpoint in response['Endpoints']:
    endpoint_name = endpoint['EndpointName']
    sagemaker_client.delete_endpoint(EndpointName=endpoint_name)
    print(f"Deleted endpoint: {endpoint_name}")

In [None]:
response['Endpoints']

### Model Inference

In [None]:
endpoint_name = 'sagemaker-crisis-detection-2023-06-04-05-43-17-303' # Replace with your endpoint name

In [None]:
import json

def call_model(data):
    client = sagemaker_session.sagemaker_runtime_client

    payload = json.dumps(data)

    response = client.invoke_endpoint(
        EndpointName=endpoint_name
        ContentType='application/json',
        Body=payload
    )

    response_body = response['Body'].read().decode('utf-8')
    prediction_result = json.loads(response_body)['Output']

    