In [2]:
# Boto3 is the Amazon Web Services (AWS) Software Development Kit (SDK) for Python
# Boto3 allows Python developer to write software that makes use of services like Amazon S3 and Amazon EC2

import sagemaker
import boto3
import os
from sagemaker import Session

# Let's create a Sagemaker session
sagemaker_session = sagemaker.Session()
bucket = Session().default_bucket() 
prefix = 'data' # prefix is the subfolder within the bucket.

# Let's get the execution role for the notebook instance. 
# This is the IAM role that you created when you created your notebook instance. You pass the role to the training job.
# Note that AWS Identity and Access Management (IAM) role that Amazon SageMaker can assume to perform tasks on your behalf (for example, reading training results, called model artifacts, from the S3 bucket and writing training results to Amazon S3). 
role = sagemaker.get_execution_role()
print(role)

arn:aws:iam::363557075783:role/service-role/AmazonSageMaker-ExecutionRole-20210929T104599


#  Upload the data for training

In [3]:
boto3.Session().resource('s3').Bucket(bucket).Object(os.path.join(prefix, 'insurance.csv')).upload_file('insurance.csv')

In [4]:
train_data=s3_train_data = 's3://{}/{}'.format(bucket,prefix)

In [5]:
train_data

's3://sagemaker-us-east-2-363557075783/data'

# Create SageMaker Scikit Estimator

In [3]:
from sagemaker.sklearn.estimator import SKLearn

script_path = 'sagemaker script.py'

sklearn = SKLearn(
    entry_point=script_path,
    instance_type="ml.m4.xlarge",
    framework_version="0.20.0",
    py_version="py3",
    role=role,
    sagemaker_session=sagemaker_session)

# Train SKLearn Estimator on Startup data

In [4]:
sklearn.fit({'train': 's3://sagemaker-us-east-2-363557075783/data'})

2021-10-25 16:57:12 Starting - Starting the training job...
2021-10-25 16:57:35 Starting - Launching requested ML instancesProfilerReport-1635181032: InProgress
...
2021-10-25 16:58:07 Starting - Preparing the instances for training.........
2021-10-25 16:59:39 Downloading - Downloading input data...
2021-10-25 16:59:56 Training - Downloading the training image..[34m2021-10-25 17:00:23,716 sagemaker-containers INFO     Imported framework sagemaker_sklearn_container.training[0m
[34m2021-10-25 17:00:23,718 sagemaker-training-toolkit INFO     No GPUs detected (normal if no gpus installed)[0m
[34m2021-10-25 17:00:23,729 sagemaker_sklearn_container.training INFO     Invoking user training script.[0m
[34m2021-10-25 17:00:24,050 sagemaker-training-toolkit INFO     No GPUs detected (normal if no gpus installed)[0m
[34m2021-10-25 17:00:27,110 sagemaker-training-toolkit INFO     No GPUs detected (normal if no gpus installed)[0m
[34m2021-10-25 17:00:27,132 sagemaker-training-toolkit IN

# Deploy the model

In [5]:
deployment = sklearn.deploy(initial_instance_count=1, instance_type="ml.m4.xlarge")

------!

In [6]:
deployment.endpoint

The endpoint attribute has been renamed in sagemaker>=2.
See: https://sagemaker.readthedocs.io/en/stable/v2.html for details.


'sagemaker-scikit-learn-2021-10-25-17-00-54-996'

In [7]:
import pandas as pd
file_name='test.csv'
X=pd.read_csv(file_name)

In [8]:
deployment.predict(X)

array([3358.21612276])

# Testing Model Performance

In [11]:
import pandas as pd

In [12]:
df = pd.read_csv("insurance.csv")

In [17]:
X=df.iloc[:,:-1].values
y=df.iloc[:,-1].values

In [18]:
y

array([16884.924 ,  1725.5523,  4449.462 , ...,  1629.8335,  2007.945 ,
       29141.3603])

In [19]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split( X, y, test_size=0.2, random_state=42)

In [25]:
y_predict=deployment.predict(X_test)

In [22]:
k = X_test.shape[1]
n = len(X_test)
n

268

In [26]:
from sklearn.metrics import r2_score, mean_squared_error, mean_absolute_error
from math import sqrt
import numpy as np

RMSE = float(format(np.sqrt(mean_squared_error(y_test, y_predict)),'.3f'))
MSE = mean_squared_error(y_test, y_predict)
MAE = mean_absolute_error(y_test, y_predict)
r2 = r2_score(y_test, y_predict)
adj_r2 = 1-(1-r2)*(n-1)/(n-k-1)

print('RMSE =',RMSE, '\nMSE =',MSE, '\nMAE =',MAE, '\nR2 =', r2, '\nAdjusted R2 =', adj_r2) 

RMSE = 5801.121 
MSE = 33653002.77665004 
MAE = 4094.4919695278654 
R2 = 0.7832317053203537 
Adjusted R2 = 0.7782485261323159


In [27]:
deployment.delete_endpoint()