In [1]:
!pip install sagemaker==1.72.0



In [31]:
import sagemaker
import os
from sagemaker import get_execution_role
from sagemaker.amazon.amazon_estimator import get_image_uri
from sagemaker.predictor import csv_serializer

session = sagemaker.Session()
role = get_execution_role()
bucket = session.default_bucket()

In [30]:
prefix = 'starbucks-offer-benchmark'
data_dir = '/home/ec2-user/SageMaker/ML_project_starbucksOffer/Data'

test_location = session.upload_data(os.path.join(data_dir, 'test.csv'), key_prefix=prefix)
train_location = session.upload_data(os.path.join(data_dir, 'train.csv'), key_prefix=prefix)

In [32]:
from sagemaker.sklearn.estimator import SKLearn

output_path = 's3://{}/{}'.format(bucket, prefix)
#trainScript = os.path.join(data_dir, 'train_Logistic.py')
estimator = SKLearn(
    #container,
    entry_point='train_Logistic.py',
    source_dir=data_dir,
    framework_version = '0.20.0',
    role=role,
    train_instance_count=1,
    train_instance_type='ml.c4.xlarge',
    #sagemaker_session=sagemaker_session,
    #output_path=output_path
)

This is not the latest supported version. If you would like to use version 0.23-1, please add framework_version=0.23-1 to your constructor.


In [33]:
estimator.fit({'train': train_location})


's3_input' class will be renamed to 'TrainingInput' in SageMaker Python SDK v2.


2021-08-09 19:11:16 Starting - Starting the training job...
2021-08-09 19:11:18 Starting - Launching requested ML instances......
2021-08-09 19:12:27 Starting - Preparing the instances for training......
2021-08-09 19:13:39 Downloading - Downloading input data...
2021-08-09 19:13:56 Training - Downloading the training image..[34m2021-08-09 19:14:21,500 sagemaker-containers INFO     Imported framework sagemaker_sklearn_container.training[0m
[34m2021-08-09 19:14:21,502 sagemaker-training-toolkit INFO     No GPUs detected (normal if no gpus installed)[0m
[34m2021-08-09 19:14:21,512 sagemaker_sklearn_container.training INFO     Invoking user training script.[0m
[34m2021-08-09 19:14:22,283 sagemaker-training-toolkit INFO     No GPUs detected (normal if no gpus installed)[0m
[34m2021-08-09 19:14:25,315 sagemaker-training-toolkit INFO     No GPUs detected (normal if no gpus installed)[0m
[34m2021-08-09 19:14:25,328 sagemaker-training-toolkit INFO     No GPUs detected (normal if no 


2021-08-09 19:14:38 Uploading - Uploading generated training model
2021-08-09 19:14:38 Completed - Training job completed
Training seconds: 59
Billable seconds: 59


In [34]:
%%time

predictor = estimator.deploy(initial_instance_count=1, instance_type='ml.m4.xlarge')

Parameter image will be renamed to image_uri in SageMaker Python SDK v2.


------!CPU times: user 132 ms, sys: 8.71 ms, total: 140 ms
Wall time: 3min 2s


In [42]:
def get_recall_precision_matrix(y_test, y_test_pred):
    """
        Compare test label and predicted label and
        reutrn recall / precision score.
    """
    tp = fp = tn = fn = 0
    for i in range(len(y_test_pred)):
        if y_test.values[i] == y_test_pred[i] == 1:
            tp += 1
        elif y_test.values[i] == y_test_pred[i] == 0:
            tn += 1
        elif y_test.values[i] == 1 and y_test_pred[i] == 0:
            fp += 1
        else:
            fn += 1
        
    recall = tp/(tp+fp)
    precision = tp/(tp+fn)

    return {"recall":recall,"precision":precision}

In [39]:
import pandas as pd
test_data = pd.read_csv(os.path.join(data_dir, "test.csv"), header=None, names=None)

# labels are in the first column
test_y = test_data.iloc[:,0]
test_x = test_data.iloc[:,1:]

In [40]:
test_y_preds = predictor.predict(test_x)

In [43]:
#Get recall score
# In this case we will focus on high recall score as we want to maximize the possibilty
# of capturing customers who would like to accept the offer 
get_recall_precision_matrix(test_y,test_y_preds)

{'recall': 0.8032840012963163, 'precision': 0.7192880634552138}

In [44]:
import boto3
deployment_name = 'sagemaker-scikit-learn-2021-08-09-19-11-11-509'
client = boto3.client('sagemaker')
response = client.describe_endpoint_config(EndpointConfigName=deployment_name)
model_name = response['ProductionVariants'][0]['ModelName']

In [45]:
# Clean up resources
client.delete_model(ModelName=model_name)    
client.delete_endpoint(EndpointName=deployment_name)
client.delete_endpoint_config(EndpointConfigName=deployment_name)

{'ResponseMetadata': {'RequestId': 'cd1222f5-73c5-463d-8e82-482ce26b5602',
  'HTTPStatusCode': 200,
  'HTTPHeaders': {'x-amzn-requestid': 'cd1222f5-73c5-463d-8e82-482ce26b5602',
   'content-type': 'application/x-amz-json-1.1',
   'content-length': '0',
   'date': 'Mon, 09 Aug 2021 19:21:02 GMT'},
  'RetryAttempts': 0}}