## Datasets located in s3 bucket

In [None]:
import sagemaker
from sagemaker.inputs import TrainingInput
bucket_name = "your-s3-bucket-name"
datasets_name = "dataset-folder-name"
train_input = TrainingInput(s3_data = "s3://" + bucket_name + '/' + datasets_name + '/' + 'train')
valid_input = TrainingInput(s3_data = "s3://" + bucket_name + '/' + datasets_name + '/' + 'valid')

## Setting hyperparameters

In [8]:
hyperparameters = {"batch_size": 16,
                    "epochs": 10,
                    "learning_rate": 0.001,
                    "model_name": "DenseNet121",
                    "num_classes": 8,
                    }

In [4]:
#Set up local mode for sagemaker
sagemaker_local_session = sagemaker.local.LocalSession()


In [None]:
from sagemaker.pytorch.estimator import PyTorch
from sagemaker import get_execution_role

# execution_role = "Enter-Your-Execution-Role-Here"
execution_role = get_execution_role()
#Create the estimator object for PyTorch
image_uri = "763104351884.dkr.ecr.us-east-1.amazonaws.com/pytorch-training:2.5.1-gpu-py311-cu124-ubuntu22.04-sagemaker"
estimator = PyTorch(
    source_dir = "code", #directory inside the container
    entry_point = "train.py", # training script
    framework_version = "1.12", #PyTorch Framework version, keep it same as used in default example
    py_version = "py38", # Compatible Python version to use
    image_uri = image_uri,
    instance_count = 1, #number of EC2 instances needed for training
    instance_type = "ml.g5.xlarge", #Type of EC2 instance/s needed for training Use 'local" for local mode
    role = execution_role, #Execution role used by training job
    hyperparameters=hyperparameters,
    dependencies=['requirements.txt'], #Dependencies needed for training
)

inputs = {"train":train_input, "test": valid_input}
#Start the training
estimator.fit(inputs)

2026-01-29 03:59:46 Starting - Starting the training job...

## Deploy the model

In [None]:
# Deploy the model to a SageMaker endpoint (optional)
predictor = estimator.deploy(initial_instance_count=1, instance_type='ml.m5.large')


INFO:sagemaker:Repacking model artifact (s3://sagemaker-us-east-1-575108919340/pytorch-training-2026-01-24-07-48-04-598/output/model.tar.gz), script artifact (s3://sagemaker-us-east-1-575108919340/pytorch-training-2026-01-24-07-48-04-598/source/sourcedir.tar.gz), and dependencies (['requirements.txt']) into single tar.gz file located at s3://sagemaker-us-east-1-575108919340/pytorch-training-2026-01-24-08-09-11-192/model.tar.gz. This may take some time depending on model size...
INFO:sagemaker:Creating model with name: pytorch-training-2026-01-24-08-09-11-192
INFO:sagemaker:Creating endpoint-config with name pytorch-training-2026-01-24-08-09-11-192
INFO:sagemaker:Creating endpoint with name pytorch-training-2026-01-24-08-09-11-192


-------------------------------------------------------

## Run and test the code locally 

In [None]:
from sagemaker.pytorch.estimator import PyTorch
from sagemaker import get_execution_role

execution_role = 'AmazonSageMaker-ExecutionRole-20240907T181142'
#Create the estimator object for PyTorch
image_uri = "763104351884.dkr.ecr.us-east-1.amazonaws.com/pytorch-training:2.5.1-gpu-py311-cu124-ubuntu22.04-sagemaker"
estimator = PyTorch(
    source_dir = "code", #directory inside the container
    entry_point = "train.py", # training script
    framework_version = "1.12", #PyTorch Framework version, keep it same as used in default example
    py_version = "py38", # Compatible Python version to use
    image_uri = image_uri,
    instance_count = 1, #number of EC2 instances needed for training
    instance_type = "local", #Type of EC2 instance/s needed for training Use 'local" for local mode
    disable_profiler = True, #Disable profiler, as not needed
    role = execution_role, #Execution role used by training job
    hyperparameters=hyperparameters,
    dependencies=['requirements.txt'], #Dependencies needed for training
)

inputs = {"train":train_input, "test": valid_input}
#Start the training
estimator.fit(inputs)