# ML Pipeline with SageMaker SDK
Complete ML workflow using SageMaker SDK with MLflow integration

In [None]:
import boto3
import sagemaker
from sagemaker.sklearn.estimator import SKLearn
from sagemaker.sklearn.model import SKLearnModel
import time
import pandas as pd
import numpy as np

In [None]:
# Configuration
role = "arn:aws:iam::058264284947:role/service-role/AmazonSageMaker-ExecutionRole-20241115T121975"
bucket = "demo-bucket-smus-ml-us-west-2"
mlflow_arn = "arn:aws:sagemaker:us-east-1:058264284947:mlflow-tracking-server/wine-classification-mlflow-v2"

session = sagemaker.Session()
print(f"Using bucket: {bucket}")
print(f"Using role: {role}")

In [None]:
# Step 1: Training
sklearn_estimator = SKLearn(
    entry_point='sagemaker_training_script.py',
    source_dir=f's3://{bucket}/training_code.tar.gz',
    framework_version='1.2-1',
    py_version='py3',
    instance_type='ml.m5.large',
    instance_count=1,
    role=role,
    output_path=f's3://{bucket}/model-artifacts/',
    environment={"MLFLOW_TRACKING_SERVER_ARN": mlflow_arn},
    hyperparameters={'n-estimators': 100, 'max-depth': 6, 'random-state': 42}
)

job_name = f"realistic-model-comparison-{int(time.time())}"
print(f"Starting training job: {job_name}")

sklearn_estimator.fit(
    inputs={'training': f's3://{bucket}/training-data/'},
    job_name=job_name
)

print(f"âœ… Training completed: {sklearn_estimator.model_data}")

In [None]:
# Step 2: Batch Transform (Inference)
transformer = sklearn_estimator.transformer(
    instance_count=1,
    instance_type='ml.m5.large',
    output_path=f's3://{bucket}/inference-results/'
)

transform_job_name = f"batch-inference-{int(time.time())}"
print(f"Starting batch transform: {transform_job_name}")

transformer.transform(
    data=f's3://{bucket}/inference-data/',
    content_type='text/csv',
    split_type='Line',
    job_name=transform_job_name
)

print(f"âœ… Batch inference completed: {transformer.output_path}")

In [None]:
# Step 3: Results Summary
print("\nðŸŽ‰ ML Pipeline Completed Successfully!")
print(f"Training Job: {job_name}")
print(f"Transform Job: {transform_job_name}")
print(f"Model Artifacts: {sklearn_estimator.model_data}")
print(f"Inference Results: {transformer.output_path}")
print(f"MLflow Tracking: {mlflow_arn}")