In [1]:
import sagemaker
import boto3 #used to connect to sagemaker
import pandas as pd
from sklearn.model_selection import train_test_split

sagemaker.config INFO - Not applying SDK defaults from location: C:\ProgramData\sagemaker\sagemaker\config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: C:\Users\ACNusr2\AppData\Local\sagemaker\sagemaker\config.yaml


In [2]:
sm_boto3 = boto3.client("sagemaker")
sess = sagemaker.Session()
region = sess.boto_session.region_name
bucket = 'handtrackbucket' #Created s3 bucket name
print("Using bucket" , bucket)

sagemaker.config INFO - Not applying SDK defaults from location: C:\ProgramData\sagemaker\sagemaker\config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: C:\Users\ACNusr2\AppData\Local\sagemaker\sagemaker\config.yaml
Using bucket handtrackbucket


In [3]:
df = pd.read_csv("handlandmarks.csv")

In [4]:
df.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,12,13,14,15,16,17,18,19,20,class
0,0,53.338541,99.403219,137.637204,171.819673,129.448832,164.246766,167.886867,162.689889,137.277092,...,259.886514,135.624482,194.177754,229.455878,260.762344,127.318498,174.393234,208.753443,239.883305,okay
1,0,51.0,97.082439,133.962681,165.07574,124.064499,156.66844,160.078106,153.482898,131.734582,...,247.095123,130.980915,189.451841,223.617978,253.568531,124.036285,170.423003,203.482186,233.482333,okay
2,0,63.411355,120.208153,168.291414,211.690812,152.069063,199.248588,204.824315,200.551739,162.249807,...,300.562805,160.252925,225.319773,269.046464,304.041116,148.660687,202.0,239.885389,270.283185,okay
3,0,35.777088,67.119297,95.210294,120.61509,83.815273,111.359777,117.889779,116.846053,93.059121,...,175.114248,97.0,134.134261,158.61904,178.361991,96.509067,126.589889,147.231111,164.778639,okay
4,0,29.154759,56.938563,79.322128,95.131488,69.771054,91.416629,97.200823,96.208108,74.0,...,135.955875,74.330344,102.591423,122.576507,136.795468,71.063352,95.005263,113.004425,126.0,okay


In [5]:
features = df.columns
labels = df["class"].tolist()

#Train test split
X = df.drop(columns = ['class'])
y = labels
X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=0.8, random_state = 0)

In [7]:
trainX = pd.DataFrame(X_train)
trainX["class"] = y_train

testX = pd.DataFrame(X_test)
testX["class"] = y_test

print(trainX.shape)
print(testX.shape)

(192, 22)
(48, 22)


In [8]:
trainX.to_csv("train-V-1.csv",index=False)
testX.to_csv("test-V-1.csv",index=False)

In [5]:
sk_prefix = "sagemaker/hand-gesture-classification/sklearncontainer"
trainpath = sess.upload_data(
    path = "train-V-1.csv", bucket= bucket, key_prefix=sk_prefix
)

testpath = sess.upload_data(
    path = "test-V-1.csv", bucket= bucket, key_prefix=sk_prefix
)

In [10]:
%%writefile script.py

from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix, precision_score
import sklearn
import joblib
import boto3
import pathlib
from io import StringIO
import argparse
import os
import numpy as np
import pandas as pd

def model_fn(model_dir):
    clf = joblib.load(os.path.join(model_dir, "model.joblib"))
    return clf

if __name__ =='__main__':

    print("[INFO] Extracting arguments")
    parser = argparse.ArgumentParser()

    # hyperparameters sent by the client are passed as command-line arguments to the script.
    parser.add_argument('--n_estimators', type=int, default=100)
    parser.add_argument('--random_state', type=int, default=0)
    # parser.add_argument('--learning-rate', type=float, default=0.1)

    # an alternative way to load hyperparameters via SM_HPS environment variable.
    # parser.add_argument('--sm-hps', type=json.loads, default=os.environ['SM_HPS'])

    # input data and model directories
    parser.add_argument('--model-dir', type=str, default=os.environ['SM_MODEL_DIR'])
    parser.add_argument('--train', type=str, default=os.environ['SM_CHANNEL_TRAIN'])
    parser.add_argument('--test', type=str, default=os.environ['SM_CHANNEL_TEST'])
    parser.add_argument('--train-file', type=str, default="train-V-1.csv")
    parser.add_argument('--test-file', type=str, default="test-V-1.csv")

    args, _ = parser.parse_known_args()
    print("SKLearn Version: ", sklearn.__version__)
    print("joblib Version: ", joblib.__version__)

    print("[INFO] Reading data")

    train_df = pd.read_csv(os.path.join(args.train, args.train_file))
    test_df = pd.read_csv(os.path.join(args.test, args.test_file))

    features = list(train_df.columns)
    label = features.pop(-1)

    print("Building train and test datasets")

    #Train test split
    X_train = train_df[features]
    X_test = test_df[features]
    y_train = train_df[label]
    y_test = test_df[label]

    print('Column order: ')
    print(features)

    print("Training random forest model.....")
    model = RandomForestClassifier(n_estimators=args.n_estimators, random_state = args.random_state)
    model.fit(X_train, y_train)

    model_path = os.path.join(args.model_dir, "model.joblib")
    joblib.dump(model, model_path)
    print("Model persisted at", model_path)

    y_pred_test = model.predict(X_test)
    test_acc = accuracy_score(y_test, y_pred_test)
    test_rep = classification_report(y_test, y_pred_test)

    print("----Metrics results for testing data -----")
    print("Test accuracy is: ", test_acc)
    print("Classification report: ")
    print(test_rep)
    

Writing script.py


In [3]:
from sagemaker.sklearn.estimator import SKLearn

FRAMEWORK_VERSION = "0.23-1"

sklearn_estimator = SKLearn(
    entry_point = "script.py",
    role="arn:aws:iam::765477734195:role/AmazonSageMaker-ExecutionRole",
    instance_count = 1,
    instance_type = "ml.m5.large",
    framework_version = FRAMEWORK_VERSION,
    base_job_name = "RF-custom-sklearn",
    hyperparameters={
        "n_estimators":100,
        "random_state":0,
    },
    use_spot_instances = True,
    max_wait = 7200,
    max_run = 3600
)
    
# arn:aws:iam::765477734195:user/Gowri_Admin

sagemaker.config INFO - Not applying SDK defaults from location: C:\ProgramData\sagemaker\sagemaker\config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: C:\Users\ACNusr2\AppData\Local\sagemaker\sagemaker\config.yaml


In [6]:
sklearn_estimator.fit({"train" : trainpath, "test": testpath}, wait=True)

Using provided s3_resource


INFO:sagemaker:Creating training-job with name: RF-custom-sklearn-2023-10-30-17-21-10-992


2023-10-30 17:21:14 Starting - Starting the training job...
2023-10-30 17:21:28 Starting - Preparing the instances for training......
2023-10-30 17:22:49 Downloading - Downloading input data...
2023-10-30 17:23:20 Training - Downloading the training image...
2023-10-30 17:23:50 Training - Training image download completed. Training in progress..[34m2023-10-30 17:23:55,694 sagemaker-containers INFO     Imported framework sagemaker_sklearn_container.training[0m
[34m2023-10-30 17:23:55,697 sagemaker-training-toolkit INFO     No GPUs detected (normal if no gpus installed)[0m
[34m2023-10-30 17:23:55,737 sagemaker_sklearn_container.training INFO     Invoking user training script.[0m
[34m2023-10-30 17:23:55,876 sagemaker-training-toolkit INFO     No GPUs detected (normal if no gpus installed)[0m
[34m2023-10-30 17:23:55,888 sagemaker-training-toolkit INFO     No GPUs detected (normal if no gpus installed)[0m
[34m2023-10-30 17:23:55,899 sagemaker-training-toolkit INFO     No GPUs det

In [7]:
sklearn_estimator.latest_training_job.wait(logs="None")
artifact = sm_boto3.describe_training_job(
    TrainingJobName = sklearn_estimator.latest_training_job.name
)["ModelArtifacts"]["S3ModelArtifacts"]

print("Model artifact persisted as " + artifact)


2023-10-30 17:24:16 Starting - Preparing the instances for training
2023-10-30 17:24:16 Downloading - Downloading input data
2023-10-30 17:24:16 Training - Training image download completed. Training in progress.
2023-10-30 17:24:16 Uploading - Uploading generated training model
2023-10-30 17:24:16 Completed - Training job completed
Model artifact persisted as s3://sagemaker-us-east-2-765477734195/RF-custom-sklearn-2023-10-30-17-21-10-992/output/model.tar.gz


In [8]:
from sagemaker.sklearn.model import SKLearnModel
from time import gmtime, strftime

model_name = "Custom-sklearn-model-" + strftime("%Y-%m-%d-%H-%M-%S", gmtime())
model = SKLearnModel(
    name = model_name,
    model_data = artifact,
    role = "arn:aws:iam::765477734195:role/AmazonSageMaker-ExecutionRole",
    entry_point = "script.py",
    framework_version=FRAMEWORK_VERSION,
)


sagemaker.config INFO - Not applying SDK defaults from location: C:\ProgramData\sagemaker\sagemaker\config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: C:\Users\ACNusr2\AppData\Local\sagemaker\sagemaker\config.yaml


In [9]:
endpoint_name = "Custom-sklearn-model-" + strftime("%Y-%m-%d-%H-%M-%S", gmtime())
print("EndpointName = {}".format(endpoint_name))

predictor = model.deploy(
    initial_instance_count=1,
    instance_type="ml.m4.xlarge",
    endpoint_name=endpoint_name,
)

EndpointName = Custom-sklearn-model-2023-10-30-17-28-33
sagemaker.config INFO - Not applying SDK defaults from location: C:\ProgramData\sagemaker\sagemaker\config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: C:\Users\ACNusr2\AppData\Local\sagemaker\sagemaker\config.yaml


INFO:sagemaker:Creating model with name: Custom-sklearn-model-2023-10-30-17-28-07
INFO:sagemaker:Creating endpoint-config with name Custom-sklearn-model-2023-10-30-17-28-33
INFO:sagemaker:Creating endpoint with name Custom-sklearn-model-2023-10-30-17-28-33


-----!

In [25]:
sm_boto3.delete_endpoint(EndpointName=endpoint_name)

{'ResponseMetadata': {'RequestId': '9f64180d-b0e1-4274-913a-e13ac4efe6ba',
  'HTTPStatusCode': 200,
  'HTTPHeaders': {'x-amzn-requestid': '9f64180d-b0e1-4274-913a-e13ac4efe6ba',
   'content-type': 'application/x-amz-json-1.1',
   'content-length': '0',
   'date': 'Sat, 28 Oct 2023 19:59:42 GMT'},
  'RetryAttempts': 0}}