In [50]:
import sagemaker
import pandas as pd
from sklearn.model_selection import train_test_split
from sagemaker import image_uris
from sagemaker.estimator import Estimator
from sagemaker.inputs import TrainingInput
from sagemaker.serializers import CSVSerializer
from sagemaker.deserializers import CSVDeserializer, JSONDeserializer

In [2]:
session = sagemaker.Session()
bucket = session.default_bucket()

sagemaker.config INFO - Not applying SDK defaults from location: /etc/xdg/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /home/sagemaker-user/.config/sagemaker/config.yaml


In [3]:
df = pd.read_csv("https://raw.githubusercontent.com/sharmaroshan/Heart-UCI-Dataset/master/heart.csv")

In [4]:
target = df.pop("target")

In [5]:
df.insert(0, "target", target)

In [6]:
train, test = train_test_split(df, test_size=0.3)

In [7]:
train.to_csv("train.csv", header=False, index=False)

In [8]:
test.to_csv("test.csv", header=False, index=False)

In [9]:
train_data_uri = session.upload_data("train.csv", bucket, "heart/data/train")

In [10]:
test_data_uri  = session.upload_data("test.csv", bucket, "heart/data/test")

In [11]:
test_data_uri

's3://sagemaker-us-east-1-381492271173/heart/data/test/test.csv'

In [26]:
container = image_uris.retrieve("xgboost", region="us-east-1", version='latest')

INFO:sagemaker.image_uris:Ignoring unnecessary instance type: None.


In [18]:
estimator = Estimator(image_uri=container, role="LabRole", 
                      instance_count=1, instance_type="ml.m5.xlarge", 
                      hyperparameters={
                         "objective": "binary:logistic",
                         "num_round": "50",
                         "eval_metric": "error"
                     })

sagemaker.config INFO - Not applying SDK defaults from location: /etc/xdg/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /home/sagemaker-user/.config/sagemaker/config.yaml


In [19]:
training_input = TrainingInput(train_data_uri, content_type="text/csv")

In [20]:
validation_input = TrainingInput(test_data_uri, content_type="text/csv")

In [21]:
estimator.fit({"train": training_input, "validation": validation_input})

INFO:sagemaker:Creating training-job with name: xgboost-2024-03-07-23-35-49-367


2024-03-07 23:35:49 Starting - Starting the training job...
2024-03-07 23:36:05 Starting - Preparing the instances for training...
2024-03-07 23:36:39 Downloading - Downloading input data...
2024-03-07 23:37:08 Downloading - Downloading the training image..
2024-03-07 23:37:39 Training - Training image download completed. Training in progress.
2024-03-07 23:37:39 Uploading - Uploading generated training model[34mArguments: train[0m
[34m[2024-03-07:23:37:34:INFO] Running standalone xgboost training.[0m
[34m[2024-03-07:23:37:34:INFO] File size need to be processed in the node: 0.01mb. Available memory size in the node: 8241.41mb[0m
[34m[2024-03-07:23:37:34:INFO] Determined delimiter of CSV input is ','[0m
[34m[23:37:34] S3DistributionType set as FullyReplicated[0m
[34m[23:37:34] 212x13 matrix with 2756 entries loaded from /opt/ml/input/data/train?format=csv&label_column=0&delimiter=,[0m
[34m[2024-03-07:23:37:34:INFO] Determined delimiter of CSV input is ','[0m
[34m[23:37:3

In [29]:
predictor = estimator.deploy(initial_instance_count=1,
                 instance_type='ml.m5.xlarge',
                 serializer=CSVSerializer(),
                 deserializer=CSVDeserializer())

INFO:sagemaker:Creating model with name: xgboost-2024-03-08-00-07-26-961
INFO:sagemaker:Creating endpoint-config with name xgboost-2024-03-08-00-07-26-961
INFO:sagemaker:Creating endpoint with name xgboost-2024-03-08-00-07-26-961


----!

In [66]:
predictor.deserializer = JSONDeserializer()

In [62]:
row = df.iloc[298:300, 1:].to_csv(header=None, index=None)

In [63]:
row

'57,0,0,140,241,0,1,123,1,0.2,1,0,3\n45,1,3,110,264,0,1,132,0,1.2,1,0,3\n'

In [67]:
predictor.predict(row)

JSONDecodeError: Extra data: line 1 column 20 (char 19)

In [46]:
df

Unnamed: 0,target,age,sex,cp,trestbps,chol,fbs,restecg,thalach,exang,oldpeak,slope,ca,thal
0,1,63,1,3,145,233,1,0,150,0,2.3,0,0,1
1,1,37,1,2,130,250,0,1,187,0,3.5,0,0,2
2,1,41,0,1,130,204,0,0,172,0,1.4,2,0,2
3,1,56,1,1,120,236,0,1,178,0,0.8,2,0,2
4,1,57,0,0,120,354,0,1,163,1,0.6,2,0,2
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
298,0,57,0,0,140,241,0,1,123,1,0.2,1,0,3
299,0,45,1,3,110,264,0,1,132,0,1.2,1,0,3
300,0,68,1,0,144,193,1,1,141,0,3.4,1,2,3
301,0,57,1,0,130,131,0,1,115,1,1.2,1,1,3
