# Setup SageMaker Session

In [3]:
import boto3
import sagemaker
from sagemaker import get_execution_role
from sagemaker.inputs import TrainingInput
from sagemaker.estimator import Estimator

session = sagemaker.Session()
role    = get_execution_role()
region  = boto3.Session().region_name

print(f"Region : {region}")
print(f"Role   : {role}")
print("✅ Session ready")

Region : us-east-1
Role   : arn:aws:iam::504649076991:role/Ali-SageMakerExecutionRole
✅ Session ready


# Set Your S3 Bucket Name

In [4]:
# Adding my bucket name
BUCKET_NAME = "ali-car-price-sagemaker"  

PREFIX      = "car-price"

s3_train  = f"s3://{BUCKET_NAME}/{PREFIX}/train/"
s3_test   = f"s3://{BUCKET_NAME}/{PREFIX}/test/"
s3_output = f"s3://{BUCKET_NAME}/{PREFIX}/output/"

print("Train  :", s3_train)
print("Test   :", s3_test)
print("Output :", s3_output)

Train  : s3://ali-car-price-sagemaker/car-price/train/
Test   : s3://ali-car-price-sagemaker/car-price/test/
Output : s3://ali-car-price-sagemaker/car-price/output/


# Verify Files Exist in S3

In [5]:
s3 = boto3.client('s3')

def check_s3(bucket, prefix):
    resp = s3.list_objects_v2(Bucket=bucket, Prefix=prefix)
    files = [obj['Key'] for obj in resp.get('Contents', [])]
    return files

train_files = check_s3(BUCKET_NAME, f"{PREFIX}/train/")
test_files  = check_s3(BUCKET_NAME, f"{PREFIX}/test/")

print("Train files:", train_files)
print("Test  files:", test_files)

assert len(train_files) > 0, "❌ train.csv not found in S3!"
assert len(test_files)  > 0, "❌ test.csv not found in S3!"
print("\n✅ Both files confirmed in S3!")

Train files: ['car-price/train/', 'car-price/train/train.csv']
Test  files: ['car-price/test/', 'car-price/test/test.csv']

✅ Both files confirmed in S3!


# Get XGBoost Container

In [6]:
container = sagemaker.image_uris.retrieve(
    framework = "xgboost",
    region    = region,
    version   = "1.7-1"
)
print("Container:", container)
print("✅ XGBoost container ready")

Container: 683313688378.dkr.ecr.us-east-1.amazonaws.com/sagemaker-xgboost:1.7-1
✅ XGBoost container ready


# Create the Estimator

In [7]:
estimator = Estimator(
    image_uri         = container,
    role              = role,
    instance_count    = 1,
    instance_type     = "ml.m5.large",
    output_path       = s3_output,
    sagemaker_session = session,
    base_job_name     = "car-price-xgb"
)

print("✅ Estimator created")

✅ Estimator created


# Set Hyperparameters

In [8]:
estimator.set_hyperparameters(
    objective             = "reg:squarederror",  # regression task
    eval_metric           = "rmse",
    num_round             = 150,
    max_depth             = 6,
    eta                   = 0.1,
    subsample             = 0.8,
    colsample_bytree      = 0.8,
    min_child_weight      = 5,
    early_stopping_rounds = 15
)

print("✅ Hyperparameters set")

✅ Hyperparameters set


# Define Training Inputs

In [9]:
train_input = TrainingInput(s3_train, content_type="text/csv")
test_input  = TrainingInput(s3_test,  content_type="text/csv")

print("✅ Inputs defined")

✅ Inputs defined


# Start Training

In [10]:
# This will take about 3-8 minutes
# You will see RMSE printed every round below

estimator.fit(
    inputs = {"train": train_input, "validation": test_input},
    logs   = True
)

print("\n✅ TRAINING COMPLETE!")

INFO:sagemaker:Creating training-job with name: car-price-xgb-2026-02-25-19-42-54-568


2026-02-25 19:42:55 Starting - Starting the training job...
2026-02-25 19:43:11 Starting - Preparing the instances for training...
2026-02-25 19:43:37 Downloading - Downloading input data...
2026-02-25 19:44:22 Downloading - Downloading the training image......
2026-02-25 19:45:28 Training - Training image download completed. Training in progress....
2026-02-25 19:45:52 Uploading - Uploading generated training model
2026-02-25 19:45:52 Completed - Training job completed
..Training seconds: 135
Billable seconds: 135

✅ TRAINING COMPLETE!


# View Results

In [11]:
job_name = estimator.latest_training_job.name
print("Job name     :", job_name)
print("Model saved  :", estimator.model_data)

sm = boto3.client('sagemaker')
desc = sm.describe_training_job(TrainingJobName=job_name)

print("Status       :", desc['TrainingJobStatus'])
print("Duration     :", desc.get('TrainingTimeInSeconds'), "seconds")
print("Billable time:", desc.get('BillableTimeInSeconds'), "seconds")

print("\nFinal Metrics:")
for m in desc.get('FinalMetricDataList', []):
    print(f"  {m['MetricName']:30s} = {m['Value']:.4f}")

Job name     : car-price-xgb-2026-02-25-19-42-54-568
Model saved  : s3://ali-car-price-sagemaker/car-price/output/car-price-xgb-2026-02-25-19-42-54-568/output/model.tar.gz
Status       : Completed
Duration     : 135 seconds
Billable time: 135 seconds

Final Metrics:
  train:rmse                     = 9681.7188
  validation:rmse                = 18887.6152
