In [1]:
import sagemaker
import boto3
import os
from sagemaker.tensorflow import TensorFlow

sagemaker.config INFO - Not applying SDK defaults from location: C:\ProgramData\sagemaker\sagemaker\config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: C:\Users\blake\AppData\Local\sagemaker\sagemaker\config.yaml


In [2]:
manual_role = 'arn:aws:iam::270803643207:role/sagemaker-dev-execution-role'

In [3]:
try:
    sagemaker_session = sagemaker.Session()
    # Use default bucket from session if available
    bucket = sagemaker_session.default_bucket()
    print(f"SageMaker session established. Default bucket: {bucket}")
except Exception as e:
     print(f"Could not establish SageMaker session: {e}. Ensure AWS credentials and region are configured.")
     # If running locally and session fails, you might need to manually specify bucket too
     # bucket = 'your-sagemaker-bucket-name'

SageMaker session established. Default bucket: sagemaker-us-east-1-270803643207


In [4]:
# --- Define Hyperparameters ---
# (Same as before)
hyperparameters = {
    'epochs': 2, # Use fewer epochs for faster local testing
    'batch-size': 64,
    'learning-rate': 0.001,
    'optimizer': 'adam',
    'filters-conv1': 32,
    'filters-conv2': 64,
    'dense-units': 128,
    'dropout-rate': 0.5
}

# --- Configure the SageMaker Estimator for Local Mode ---
source_directory = './code' # Ensure train.py is in this directory
entry_point_script = 'train.py'

In [5]:
training_instance_type = 'ml.m5.large' # Use 'local_gpu' if you have a GPU setup for Docker

print(f"Running in local mode using instance type: {training_instance_type}")

Running in local mode using instance type: ml.m5.large


In [6]:
estimator = TensorFlow(
    entry_point=entry_point_script,
    source_dir=source_directory,
    instance_type=training_instance_type,
    instance_count=1,
    role=manual_role, # Your IAM role ARN
    framework_version='2.18', # Match your train.py and Docker image compatibility
    py_version='py310',
    hyperparameters=hyperparameters,
    sagemaker_session=sagemaker_session,
    output_path=f's3://{sagemaker_session.default_bucket()}/sagemaker/keras-fashion-mnist/output',
)
estimator.fit()

INFO:sagemaker.image_uris:image_uri is not presented, retrieving image_uri based on instance_type, framework etc.
INFO:sagemaker:Creating training-job with name: tensorflow-training-2025-05-13-04-16-14-152


2025-05-13 04:16:16 Starting - Starting the training job...
2025-05-13 04:16:30 Starting - Preparing the instances for training...
2025-05-13 04:16:52 Downloading - Downloading input data...
2025-05-13 04:17:29 Downloading - Downloading the training image......
2025-05-13 04:18:40 Training - Training image download completed. Training in progress.2025-05-13 04:18:43.060133: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: SSE3 SSE4.1 SSE4.2 AVX AVX2 AVX512F FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
2025-05-13 04:18:47,273 sagemaker-training-toolkit INFO     Imported framework sagemaker_tensorflow_container.training
2025-05-13 04:18:47,275 sagemaker-training-toolkit INFO     No GPUs detected (normal if no gpus installed)
2025-05-13 04:18:47,276 sagemaker-training-toolkit INFO     No Neurons detected (n