In [1]:
!pip install -U sagemaker



In [2]:
import pandas as pd

  from pandas.core.computation.check import NUMEXPR_INSTALLED


In [3]:
dataset = pd.read_csv('housing.csv')

In [4]:
print(dataset.shape)
dataset[:5]

(506, 13)


Unnamed: 0,crim,zn,indus,chas,nox,age,rm,dis,rad,tax,ptratio,lstat,medv
0,0.00632,18.0,2.31,0,0.538,6.575,65.2,4.09,1,296.0,15.3,4.98,24.0
1,0.02731,0.0,7.07,0,0.469,6.421,78.9,4.9671,2,242.0,17.8,9.14,21.6
2,0.02729,0.0,7.07,0,0.469,7.185,61.1,4.9671,2,242.0,17.8,4.03,34.7
3,0.03237,0.0,2.18,0,0.458,6.998,45.8,6.0622,3,222.0,18.7,2.94,33.4
4,0.06905,0.0,2.18,0,0.458,7.147,54.2,6.0622,3,222.0,18.7,5.33,36.2


In [5]:
# Move 'medv' column to front
dataset = pd.concat([dataset['medv'], dataset.drop(['medv'], axis=1)], axis=1)

In [6]:
from sklearn.model_selection import train_test_split
training_dataset, validation_dataset = train_test_split(dataset, test_size=0.1)

print(training_dataset.shape)
print(validation_dataset.shape)

(455, 13)
(51, 13)


In [8]:
training_dataset.to_csv('training_dataset.csv', index=False, header=False)
validation_dataset.to_csv('validation_dataset.csv', index=False, header=False)

In [9]:
import sagemaker

print(sagemaker.__version__)

sess = sagemaker.Session()
bucket = sess.default_bucket()

prefix = 'boston-housing'
training_data_path = sess.upload_data(path='training_dataset.csv', key_prefix=prefix + '/input/training')
validation_data_path = sess.upload_data(path='validation_dataset.csv', key_prefix=prefix + '/input/validation')

print(training_data_path)
print(validation_data_path)

sagemaker.config INFO - Not applying SDK defaults from location: /etc/xdg/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /home/ec2-user/.config/sagemaker/config.yaml
2.199.0
sagemaker.config INFO - Not applying SDK defaults from location: /etc/xdg/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /home/ec2-user/.config/sagemaker/config.yaml
s3://sagemaker-us-east-1-196871743880/boston-housing/input/training/training_dataset.csv
s3://sagemaker-us-east-1-196871743880/boston-housing/input/validation/validation_dataset.csv


In [11]:
import boto3
from sagemaker import image_uris

region = boto3.Session().region_name    
container = image_uris.retrieve('xgboost', region, version='1.2-1')

print(container)


role = sagemaker.get_execution_role() 

xgb = sagemaker.estimator.Estimator(container,
                                    role,
                                    instance_count=1,
                                    instance_type='ml.m4.xlarge',
                                    output_path='s3://{}/{}/output'.format(bucket, prefix),
                                    #sagemaker_session=sagemaker_session
                                )

# Set hyperparameters
xgb.set_hyperparameters(objective='reg:linear', num_round=100)

683313688378.dkr.ecr.us-east-1.amazonaws.com/sagemaker-xgboost:1.2-1
sagemaker.config INFO - Not applying SDK defaults from location: /etc/xdg/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /home/ec2-user/.config/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /etc/xdg/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /home/ec2-user/.config/sagemaker/config.yaml


In [12]:
training_data_channel   = sagemaker.TrainingInput(s3_data=training_data_path, content_type='text/csv')
validation_data_channel = sagemaker.TrainingInput(s3_data=validation_data_path, content_type='text/csv')

xgb_data = {'train': training_data_channel, 'validation': validation_data_channel}

In [13]:
xgb.fit(xgb_data)

INFO:sagemaker:Creating training-job with name: sagemaker-xgboost-2023-12-10-15-32-41-590


2023-12-10 15:32:41 Starting - Starting the training job......
2023-12-10 15:33:27 Starting - Preparing the instances for training.........
2023-12-10 15:34:51 Downloading - Downloading input data...
2023-12-10 15:35:23 Training - Downloading the training image......
2023-12-10 15:36:39 Uploading - Uploading generated training model[34m[2023-12-10 15:36:31.018 ip-10-2-136-229.ec2.internal:7 INFO utils.py:27] RULE_JOB_STOP_SIGNAL_FILENAME: None[0m
[34mINFO:sagemaker-containers:Imported framework sagemaker_xgboost_container.training[0m
[34mINFO:sagemaker-containers:Failed to parse hyperparameter objective value reg:linear to Json.[0m
[34mReturning the value itself[0m
[34mINFO:sagemaker-containers:No GPUs detected (normal if no gpus installed)[0m
[34mINFO:sagemaker_xgboost_container.training:Running XGBoost Sagemaker in algorithm mode[0m
[34mINFO:root:Determined delimiter of CSV input is ','[0m
[34mINFO:root:Determined delimiter of CSV input is ','[0m
[34mINFO:root:Determ

In [14]:
%%bash -s "$xgb.output_path"
aws s3 ls --recursive $1

2023-12-10 15:10:04          0 boston-housing/output/linear-learner-2023-12-10-15-05-38-375/debug-output/training_job_end.ts
2023-12-10 15:10:03       1023 boston-housing/output/linear-learner-2023-12-10-15-05-38-375/output/model.tar.gz
2023-12-10 15:10:04          0 boston-housing/output/linear-learner-2023-12-10-15-05-38-375/profiler-output/framework/training_job_end.ts
2023-12-10 15:09:01      87174 boston-housing/output/linear-learner-2023-12-10-15-05-38-375/profiler-output/system/incremental/2023121015/1702220820.algo-1.json
2023-12-10 15:09:00     242208 boston-housing/output/linear-learner-2023-12-10-15-05-38-375/profiler-output/system/incremental/2023121015/1702220880.algo-1.json
2023-12-10 15:10:00     239797 boston-housing/output/linear-learner-2023-12-10-15-05-38-375/profiler-output/system/incremental/2023121015/1702220940.algo-1.json
2023-12-10 15:10:04          0 boston-housing/output/linear-learner-2023-12-10-15-05-38-375/profiler-output/system/training_job_end.ts
2023-12

In [15]:
from time import strftime, gmtime
timestamp = strftime('%d-%H-%M-%S', gmtime())

endpoint_name = 'xgb-demo-'+timestamp
print(endpoint_name)

xgb-demo-10-15-39-33


In [16]:
xgb_predictor = xgb.deploy(endpoint_name=endpoint_name, 
                        initial_instance_count=1, 
                        instance_type='ml.t2.medium')

INFO:sagemaker:Creating model with name: sagemaker-xgboost-2023-12-10-15-39-34-067
INFO:sagemaker:Creating endpoint-config with name xgb-demo-10-15-39-33
INFO:sagemaker:Creating endpoint with name xgb-demo-10-15-39-33


------------!

In [17]:
test_sample = '0.00632,18.00,2.310,0,0.5380,6.5750,65.20,4.0900,1,296.0,15.30,4.98'

In [18]:
#xgb_predictor.content_type = 'text/csv'
xgb_predictor.serializer = sagemaker.serializers.CSVSerializer()
xgb_predictor.deserializer = sagemaker.deserializers.CSVDeserializer()

response = xgb_predictor.predict(test_sample)
print(response)

[['24.013858795166016']]


In [19]:
test_samples = ['0.00632,18.00,2.310,0,0.5380,6.5750,65.20,4.0900,1,296.0,15.30,4.98',
                '0.02731,0.00,7.070,0,0.4690,6.4210,78.90,4.9671,2,242.0,17.80,9.14']

response = xgb_predictor.predict(test_samples)
print(response)
print(xgb_predictor.endpoint_name)

[['24.013858795166016', '21.56511116027832']]
xgb-demo-10-15-39-33


In [20]:
runtime = boto3.Session().client(service_name='runtime.sagemaker') 

response = runtime.invoke_endpoint(EndpointName=endpoint_name, 
                                  ContentType='text/csv', 
                                  Body=test_sample)

print(response['Body'].read())

INFO:botocore.credentials:Found credentials from IAM Role: BaseNotebookInstanceEc2InstanceRole


b'24.013858795166016'


In [None]:
xgb_predictor.delete_endpoint()