In [2]:
import pandas as pd
df = pd.read_csv('heart.csv')
df.head()

Unnamed: 0,age,sex,cp,trestbps,chol,fbs,restecg,thalach,exang,oldpeak,slope,ca,thal,target
0,63,1,3,145,233,1,0,150,0,2.3,0,0,1,1
1,37,1,2,130,250,0,1,187,0,3.5,0,0,2,1
2,41,0,1,130,204,0,0,172,0,1.4,2,0,2,1
3,56,1,1,120,236,0,1,178,0,0.8,2,0,2,1
4,57,0,0,120,354,0,1,163,1,0.6,2,0,2,1


In [3]:
df.sex.unique()

array([1, 0])

In [4]:
df = df.rename(
    columns = {'cp':'chest_pain_type', 
               'trestbps':'resting_blood_pressure', 
               'chol': 'cholesterol',
               'fbs': 'fasting_blood_sugar',
               'restecg' : 'resting_electrocardiogram', 
               'thalach': 'max_heart_rate_achieved', 
               'exang': 'exercise_induced_angina',
               'oldpeak': 'st_depression', 
               'slope': 'st_slope', 
               'ca':'num_major_vessels', 
               'thal': 'thalassemia'}, 
    errors="raise")
df.head()

Unnamed: 0,age,sex,chest_pain_type,resting_blood_pressure,cholesterol,fasting_blood_sugar,resting_electrocardiogram,max_heart_rate_achieved,exercise_induced_angina,st_depression,st_slope,num_major_vessels,thalassemia,target
0,63,1,3,145,233,1,0,150,0,2.3,0,0,1,1
1,37,1,2,130,250,0,1,187,0,3.5,0,0,2,1
2,41,0,1,130,204,0,0,172,0,1.4,2,0,2,1
3,56,1,1,120,236,0,1,178,0,0.8,2,0,2,1
4,57,0,0,120,354,0,1,163,1,0.6,2,0,2,1


In [7]:
df.to_csv('heart.csv', index=False)

In [8]:
import sagemaker
sess = sagemaker.Session()
uri = sess.upload_data(path='heart.csv')
print(uri)

sagemaker.config INFO - Not applying SDK defaults from location: /etc/xdg/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /home/ec2-user/.config/sagemaker/config.yaml
s3://sagemaker-us-east-1-179201351861/data/heart.csv


In [9]:
import boto3
from sagemaker import get_execution_role

role = get_execution_role()

In [12]:
df.dropna(inplace=True)

In [16]:
df.columns

Index(['age', 'sex', 'chest_pain_type', 'resting_blood_pressure',
       'cholesterol', 'fasting_blood_sugar', 'resting_electrocardiogram',
       'max_heart_rate_achieved', 'exercise_induced_angina', 'st_depression',
       'st_slope', 'num_major_vessels', 'thalassemia', 'target'],
      dtype='object')

In [17]:
y = df['age']
X = df.drop(['age'],axis=1)

In [19]:
from sklearn.model_selection import train_test_split
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.3,random_state=42)

In [31]:
train_df = pd.concat([X_train, y_train], axis=1)
test_df = pd.concat([X_test, y_test], axis=1)
train_df.to_csv('train_data.csv', index=False, header=0)
test_df.to_csv('test_data.csv', index=False, header=0)

In [32]:
bucket = sagemaker.Session().default_bucket()
boto3.Session().resource('s3').Bucket(bucket).Object('train_data.csv').upload_file('train_data.csv')
boto3.Session().resource('s3').Bucket(bucket).Object('test_data.csv').upload_file('test_data.csv')

INFO:botocore.credentials:Found credentials from IAM Role: BaseNotebookInstanceEc2InstanceRole
INFO:botocore.credentials:Found credentials from IAM Role: BaseNotebookInstanceEc2InstanceRole


In [33]:
s3_input_train = sagemaker.inputs.TrainingInput(s3_data='s3://{}/train_data.csv'.format(bucket),content_type="text/csv",)

In [34]:
from sagemaker import image_uris
container = image_uris.retrieve('xgboost',region='us-east-1',version='latest')

INFO:sagemaker.image_uris:Ignoring unnecessary instance type: None.


In [35]:
sess = sagemaker.Session()
xgb = sagemaker.estimator.Estimator(container,
                                    role,
                                    instance_count=1,
                                    instance_type='ml.m4.xlarge',
                                    output_path=f's3://{bucket}/output',
                                    sagemaker_session=sess)

xgb.set_hyperparameters(max_depth=5,
                        eta=0.2,
                        gamma=4,
                        min_child_weight=6,
                        subsample=0.8,
                        silent=0,
                        objective='binary:logistic',
                        num_round=100)


In [36]:
xgb.fit({"train": s3_input_train})

INFO:sagemaker:Creating training-job with name: xgboost-2024-05-31-14-04-53-281


2024-05-31 14:04:53 Starting - Starting the training job...
2024-05-31 14:05:11 Starting - Preparing the instances for training...
2024-05-31 14:05:45 Downloading - Downloading input data...
2024-05-31 14:06:20 Downloading - Downloading the training image...
2024-05-31 14:06:46 Training - Training image download completed. Training in progress...[34mArguments: train[0m
[34m[2024-05-31:14:07:01:INFO] Running standalone xgboost training.[0m
[34m[2024-05-31:14:07:01:INFO] Path /opt/ml/input/data/validation does not exist![0m
[34m[2024-05-31:14:07:01:INFO] File size need to be processed in the node: 0.01mb. Available memory size in the node: 8483.69mb[0m
[34m[2024-05-31:14:07:01:INFO] Determined delimiter of CSV input is ','[0m
[34m[14:07:01] S3DistributionType set as FullyReplicated[0m
[34m[14:07:01] 212x13 matrix with 2756 entries loaded from /opt/ml/input/data/train?format=csv&label_column=0&delimiter=,[0m
[34m[14:07:01] src/tree/updater_prune.cc:74: tree pruning end, 1 r

In [37]:
xgb_predictor = xgb.deploy(initial_instance_count = 1,instance_type='ml.m4.xlarge')
xgb_predictor

INFO:sagemaker:Creating model with name: xgboost-2024-05-31-14-13-27-288
INFO:sagemaker:Creating endpoint-config with name xgboost-2024-05-31-14-13-27-288
INFO:sagemaker:Creating endpoint with name xgboost-2024-05-31-14-13-27-288


------!

<sagemaker.base_predictor.Predictor at 0x7f7124b81ae0>

In [38]:
endpoint_status = xgb_predictor.endpoint_name
print("Endpoint Status:", endpoint_status)


Endpoint Status: xgboost-2024-05-31-14-13-27-288


In [50]:
xgb_predictor.delete_endpoint()

INFO:sagemaker:Deleting endpoint configuration with name: xgboost-2024-05-31-14-13-27-288
INFO:sagemaker:Deleting endpoint with name: xgboost-2024-05-31-14-13-27-288
