In [None]:
pip install sagemaker --upgrade


Note: you may need to restart the kernel to use updated packages.


In [None]:
import boto3
import pandas as pd
import os
import urllib.request
import sagemaker
from sagemaker import image_uris
from sagemaker.inputs import TrainingInput

In [None]:
session = sagemaker.Session()
region = boto3.Session().region_name
role = sagemaker.get_execution_role()


In [None]:
bucket_name = 'bankapplicationroshan123'
prefix = 'bank-marketing-model'

print(f"Region: {region}")
print(f"Bucket: {bucket_name}")

Region: ap-south-1
Bucket: bankapplicationroshan123


In [None]:
import boto3

bucket_name = 'bankapplicationroshan123'
region = boto3.Session().region_name
s3 = boto3.client('s3')

# ‚úÖ Create the bucket if it doesn't exist
try:
    if region == 'us-east-1':
        s3.create_bucket(Bucket=bucket_name)
    else:
        s3.create_bucket(
            Bucket=bucket_name,
            CreateBucketConfiguration={'LocationConstraint': region}
        )
    print(f"‚úÖ Bucket '{bucket_name}' created successfully in region {region}")
except s3.exceptions.BucketAlreadyOwnedByYou:
    print(f"‚ÑπÔ∏è Bucket '{bucket_name}' already exists and is owned by you.")
except Exception as e:
    print('‚ùå Error creating bucket:', e)


‚úÖ Bucket 'bankapplicationroshan123' created successfully in region ap-south-1


In [None]:
# Load into DataFrame
try:
    model_data = pd.read_csv(file_name, index_col=0)
    print('‚úÖ Success: Data loaded into DataFrame.')
    print(model_data.shape)
except Exception as e:
    print('‚ùå Data load error:', e)


‚úÖ Success: Data loaded into DataFrame.
(41188, 61)


In [None]:
from sklearn.model_selection import train_test_split

train_data, test_data = train_test_split(model_data, test_size=0.2, random_state=42)

train_path = 'train.csv'
test_path = 'test.csv'

In [None]:
s3 = boto3.Session().resource('s3')

s3.Bucket(bucket_name).Object(os.path.join(prefix, 'train/train.csv')).upload_file(train_path)
s3.Bucket(bucket_name).Object(os.path.join(prefix, 'test/test.csv')).upload_file(test_path)

print(f"‚úÖ Uploaded training and test data to s3://{bucket_name}/{prefix}/")


‚úÖ Uploaded training and test data to s3://bankapplicationroshan123/bank-marketing-model/


In [None]:
s3_input_train = TrainingInput(
    s3_data=f's3://{bucket_name}/{prefix}/train',
    content_type='csv'
)

s3_input_test = TrainingInput(
    s3_data=f's3://{bucket_name}/{prefix}/test',
    content_type='csv'
)

print("‚úÖ S3 input objects ready")


‚úÖ S3 input objects ready


In [None]:
container = image_uris.retrieve(
    framework='xgboost',
    region=region,
    version='1.0-1'  # or latest available
)

In [None]:
hyperparameters = {
    "max_depth": "5",
    "eta": "0.2",
    "gamma": "4",
    "min_child_weight": "6",
    "subsample": "0.7",
    "objective": "binary:logistic",
    "num_round": "50"
}

In [None]:
output_path = f's3://{bucket_name}/{prefix}/output'

In [None]:
estimator = sagemaker.estimator.Estimator(
    image_uri=container,
    role=role,
    instance_count=1,
    instance_type='ml.m5.2xlarge',
    volume_size=5,          # GB
    max_run=300,            # seconds
    use_spot_instances=True,
    max_wait=600,           # seconds (for spot)
    output_path=output_path,
    hyperparameters=hyperparameters
)

print("‚úÖ Estimator created successfully")


‚úÖ Estimator created successfully


In [None]:

print("üöÄ Starting training job...")
estimator.fit({'train': s3_input_train, 'validation': s3_input_test})

INFO:sagemaker:Creating training-job with name: sagemaker-xgboost-2025-11-11-19-49-56-656


üöÄ Starting training job...
2025-11-11 19:49:57 Starting - Starting the training job...
2025-11-11 19:50:29 Downloading - Downloading input data...
2025-11-11 19:50:45 Downloading - Downloading the training image...
2025-11-11 19:51:25 Training - Training image download completed. Training in progress...[34m[2025-11-11 19:51:40.291 ip-10-0-213-61.ap-south-1.compute.internal:7 INFO utils.py:27] RULE_JOB_STOP_SIGNAL_FILENAME: None[0m
[34mINFO:sagemaker-containers:Imported framework sagemaker_xgboost_container.training[0m
[34mINFO:sagemaker-containers:Failed to parse hyperparameter objective value binary:logistic to Json.[0m
[34mReturning the value itself[0m
[34mINFO:sagemaker-containers:No GPUs detected (normal if no gpus installed)[0m
[34mINFO:sagemaker_xgboost_container.training:Running XGBoost Sagemaker in algorithm mode[0m
[34mINFO:root:Determined delimiter of CSV input is ','[0m
[34mINFO:root:Determined delimiter of CSV input is ','[0m
[34mINFO:root:Determined del

In [None]:
# Deploy model endpoint
xgb_predictor = estimator.deploy(
    initial_instance_count=1,
    instance_type='ml.m5.large'
)

INFO:sagemaker:Creating model with name: sagemaker-xgboost-2025-11-11-19-52-13-244
INFO:sagemaker:Creating endpoint-config with name sagemaker-xgboost-2025-11-11-19-52-13-244
INFO:sagemaker:Creating endpoint with name sagemaker-xgboost-2025-11-11-19-52-13-244


------!

In [None]:
payload = ','.join(map(str, model_data.drop(['y_no', 'y_yes'], axis=1).iloc[0].tolist()))

response = xgb_predictor.predict(
    payload,
    initial_args={'ContentType': 'text/csv'}
)

print("‚úÖ Prediction response:", response)


‚úÖ Prediction response: b'0.03213543817400932'


In [None]:
import numpy as np
from sagemaker.serializers import CSVSerializer


test_data_array = test_data.drop(['y_no', 'y_yes'], axis=1).values


xgb_predictor.serializer = CSVSerializer()
xgb_predictor.content_type = 'text/csv'


batch_size = 100
predictions = []

for i in range(0, len(test_data_array), batch_size):
    batch = test_data_array[i:i+batch_size]
    payload = '\n'.join([','.join(map(str, row)) for row in batch])
    response = xgb_predictor.predict(payload)
    preds = np.fromstring(response.decode('utf-8'), sep=',')
    predictions.extend(preds)

predictions_array = np.array(predictions)
print("‚úÖ Predictions done! Shape:", predictions_array.shape)


‚úÖ Predictions done! Shape: (8238,)


In [None]:
predictions_array

array([0.10612737, 0.02720331, 0.08856416, ..., 0.05615539, 0.09910017,
       0.03208514])

In [None]:
import pandas as pd
import numpy as np

# Build confusion matrix
cm = pd.crosstab(index=test_data['y_yes'],
                 columns=np.round(predictions_array),
                 rownames=['Observed'],
                 colnames=['Predicted'])

print("\nConfusion Matrix:\n", cm)

# Extract cells safely
tn = cm.iloc[0, 0] if (0 in cm.columns and 0 in cm.index) else 0
tp = cm.iloc[1, 1] if (1 in cm.columns and 1 in cm.index) else 0
fp = cm.iloc[0, 1] if (1 in cm.columns and 0 in cm.index) else 0
fn = cm.iloc[1, 0] if (0 in cm.columns and 1 in cm.index) else 0

# Overall accuracy
p = (tp + tn) / (tp + tn + fp + fn) * 100

print(f"\n{'Overall Classification Rate:':<30}{p:>6.2f}%\n")

# Pretty display
print(f"{'Predicted':<15}{'No Purchase':<15}{'Purchase':>10}")
print("Observed")
print(f"{'No Purchase':<15}{tn/(tn+fp)*100:>6.1f}% ({tn}){fp/(tn+fp)*100:>10.1f}% ({fp})")
print(f"{'Purchase':<15}{fn/(fn+tp)*100:>6.1f}% ({fn}){tp/(fn+tp)*100:>10.1f}% ({tp})\n")




Confusion Matrix:
 Predicted   0.0  1.0
Observed            
0          7193  110
1           748  187

Overall Classification Rate:   89.58%

Predicted      No Purchase      Purchase
Observed
No Purchase      98.5% (7193)       1.5% (110)
Purchase         80.0% (748)      20.0% (187)



In [None]:
import boto3
import sagemaker


try:
    xgb_predictor.delete_endpoint()
    print("‚úÖ Endpoint deleted successfully.")
except Exception as e:
    print("‚ö†Ô∏è Endpoint deletion warning:", e)


bucket_to_delete = boto3.resource('s3').Bucket(bucket_name)

try:
    bucket_to_delete.objects.all().delete()   # delete all files
    bucket_to_delete.delete()                 # delete the bucket itself
    print(f"‚úÖ Bucket '{bucket_name}' and all its contents deleted successfully.")
except Exception as e:
    print("‚ö†Ô∏è Bucket deletion warning:", e)


INFO:sagemaker:Deleting endpoint configuration with name: sagemaker-xgboost-2025-11-11-19-52-13-244
INFO:sagemaker:Deleting endpoint with name: sagemaker-xgboost-2025-11-11-19-52-13-244


‚úÖ Endpoint deleted successfully.
