In [1]:
import sagemaker
import boto3
from sklearn.model_selection import train_test_split
import pandas as pd

sm_boto3 = boto3.client("sagemaker")
session = sagemaker.Session()
region = session.boto_session.region_name
bucket = 'sagemaker-ml-example'
print(f'using region {region}')




sagemaker.config INFO - Not applying SDK defaults from location: /etc/xdg/xdg-ubuntu/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /home/yuxuan/.config/sagemaker/config.yaml


using region us-east-2


In [13]:
df = pd.read_csv("mob.csv")
df.head()

Unnamed: 0,battery_power,blue,clock_speed,dual_sim,fc,four_g,int_memory,m_dep,mobile_wt,n_cores,...,px_height,px_width,ram,sc_h,sc_w,talk_time,three_g,touch_screen,wifi,price_range
0,842,0,2.2,0,1,0,7,0.6,188,2,...,20,756,2549,9,7,19,0,0,1,1
1,1021,1,0.5,1,0,1,53,0.7,136,3,...,905,1988,2631,17,3,7,1,1,0,2
2,563,1,0.5,1,2,1,41,0.9,145,5,...,1263,1716,2603,11,2,9,1,1,0,2
3,615,1,2.5,0,0,0,10,0.8,131,6,...,1216,1786,2769,16,8,11,1,0,0,2
4,1821,1,1.2,0,13,1,44,0.6,141,2,...,1208,1212,1411,8,2,15,1,1,0,1


In [14]:
features = list(df.columns)
label = features.pop(-1)

x, y = df[features], df[label]

In [15]:
X_train, X_test, y_train, y_test = train_test_split(x, y, test_size=0.15, random_state=0)

In [16]:
trainX = pd.DataFrame(X_train)
trainX[label] = y_train
testX = pd.DataFrame(X_test)
testX[label] = y_test

In [None]:
trainX.to_csv("train-V-1.csv", index = False)
testX.to_csv("test-V-1.csv", index = False)

sk_prefix = "sagemaker/mob/sklearncontainer"
train_path = session.upload_data(path="train-V-1.csv", bucket=bucket, key_prefix=sk_prefix)
test_path = session.upload_data(path="test-V-1.csv", bucket=bucket, key_prefix=sk_prefix)


In [19]:
print(train_path)
print(test_path)

s3://sagemaker-ml-example/sagemaker/mob/sklearncontainer/train-V-1.csv
s3://sagemaker-ml-example/sagemaker/mob/sklearncontainer/test-V-1.csv


In [2]:
from sagemaker.sklearn.estimator import SKLearn
prefix = 's3://sagemaker-ml-example/sagemaker/mob/sklearncontainer/'
FRAMEWORK_VERSION = '0.23-1'

sklean_estimator = SKLearn(
    entry_point='script.py',
    role="arn:aws:iam::915713862826:role/service-role/AmazonSageMaker-ExecutionRole-20250403T165574",
    instance_count=1,
    instance_type="ml.m5.large",
    framework_version=FRAMEWORK_VERSION,
    base_job_name="RF-custom-sklearn",
    hyperparameters={
        "n_estimators": 100
    },
    use_spot_instances=True,
    max_wait=7200,
    max_run=3600,
    dependencies=['requirements.txt']
)

sklean_estimator.fit({"train": prefix, "test": prefix}, wait=True)

2025-04-06 01:23:06 Starting - Starting the training job...
2025-04-06 01:23:40 Downloading - Downloading input data...
2025-04-06 01:24:05 Downloading - Downloading the training image.....2025-04-06 01:24:50,104 sagemaker-containers INFO     Imported framework sagemaker_sklearn_container.training
2025-04-06 01:24:50,106 sagemaker-training-toolkit INFO     No GPUs detected (normal if no gpus installed)
2025-04-06 01:24:50,145 sagemaker_sklearn_container.training INFO     Invoking user training script.
2025-04-06 01:24:50,302 sagemaker-training-toolkit INFO     Installing dependencies from requirements.txt:
/miniconda3/bin/python -m pip install -r requirements.txt
Collecting sagemaker
  Downloading sagemaker-2.229.0-py3-none-any.whl (1.5 MB)
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 1.5/1.5 MB 23.6 MB/s eta 0:00:00
Collecting ipykernel
  Downloading ipykernel-6.16.2-py3-none-any.whl (138 kB)
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 138.5/138.5 kB 20.6 MB/s eta 0:00:00
Collecting 

In [4]:
sklean_estimator.latest_training_job.wait(logs="None")
artifact = sm_boto3.describe_training_job(
    TrainingJobName=sklean_estimator.latest_training_job.name
)["ModelArtifacts"]["S3ModelArtifacts"]

print("Model artifact present at: ", artifact)


2025-04-06 01:25:29 Starting - Preparing the instances for training
2025-04-06 01:25:29 Downloading - Downloading the training image
2025-04-06 01:25:29 Training - Training image download completed. Training in progress.
2025-04-06 01:25:29 Uploading - Uploading generated training model
2025-04-06 01:25:29 Completed - Training job completed
Model artifact present at:  s3://sagemaker-us-east-2-915713862826/RF-custom-sklearn-2025-04-06-01-23-03-519/output/model.tar.gz


In [8]:
# Making a copy of the built model for deployment
from sagemaker.sklearn.model import SKLearnModel
from time import gmtime, strftime

model_name = "Custom-sklearn-model" + strftime("%Y-%m-%d-%H-%M-%S", gmtime())
model = SKLearnModel(
    name=model_name,
    model_data=artifact,
    role="arn:aws:iam::915713862826:role/service-role/AmazonSageMaker-ExecutionRole-20250403T165574",
    entry_point="script.py",
    framework_version=FRAMEWORK_VERSION
)

In [9]:
model

<sagemaker.sklearn.model.SKLearnModel at 0x7cfc63728220>

In [10]:
# Endpoint Deployment
endpoint_name = "Custom-sklearn-model" + strftime("%Y-%m-%d-%H-%M-%S", gmtime())
print("EndpointName={}".format(endpoint_name))

predictor = model.deploy(
    initial_instance_count=1,
    instance_type="ml.m4.xlarge",
    endpoint_name=endpoint_name
)

EndpointName=Custom-sklearn-model2025-04-06-01-45-15


------!

In [11]:
endpoint_name

'Custom-sklearn-model2025-04-06-01-45-15'

In [17]:
# First five rows of the test dataset for prediction using the endpoint created above
testX[features][0:5].values.tolist()

[[1454.0,
  1.0,
  0.5,
  1.0,
  1.0,
  0.0,
  34.0,
  0.7,
  83.0,
  4.0,
  3.0,
  250.0,
  1033.0,
  3419.0,
  7.0,
  5.0,
  5.0,
  1.0,
  1.0,
  0.0],
 [1092.0,
  1.0,
  0.5,
  1.0,
  10.0,
  0.0,
  11.0,
  0.5,
  167.0,
  3.0,
  14.0,
  468.0,
  571.0,
  737.0,
  14.0,
  4.0,
  11.0,
  0.0,
  1.0,
  0.0],
 [1524.0,
  1.0,
  1.8,
  1.0,
  0.0,
  0.0,
  10.0,
  0.6,
  174.0,
  4.0,
  1.0,
  154.0,
  550.0,
  2678.0,
  16.0,
  5.0,
  13.0,
  1.0,
  0.0,
  1.0],
 [1807.0,
  1.0,
  2.1,
  0.0,
  2.0,
  0.0,
  49.0,
  0.8,
  125.0,
  1.0,
  10.0,
  337.0,
  1384.0,
  1906.0,
  17.0,
  13.0,
  13.0,
  0.0,
  1.0,
  1.0],
 [1086.0,
  1.0,
  1.7,
  1.0,
  0.0,
  1.0,
  43.0,
  0.2,
  111.0,
  6.0,
  1.0,
  56.0,
  1150.0,
  3285.0,
  11.0,
  5.0,
  17.0,
  1.0,
  1.0,
  0.0]]

In [18]:
print(predictor.predict(testX[features][0:5].values.tolist()))

[3 0 2 1 3]


In [19]:
# Deleting the endpoint to avoid the costs
sm_boto3.delete_endpoint(EndpointName=endpoint_name)

{'ResponseMetadata': {'RequestId': '3de5125d-0845-4876-814f-c6b7ddd9897e',
  'HTTPStatusCode': 200,
  'HTTPHeaders': {'x-amzn-requestid': '3de5125d-0845-4876-814f-c6b7ddd9897e',
   'content-type': 'application/x-amz-json-1.1',
   'date': 'Sun, 06 Apr 2025 01:51:22 GMT',
   'content-length': '0'},
  'RetryAttempts': 0}}