# MODEL TRAINING: SageMaker's Image-Classifier (Transfer Learning)

Required Inputs:
* source s3 bucket: images split into train and validation.
* .lst files: train and val

Output:
* output s3 bucket: images from the source bucket are reorganized into a new bucket under subfolder train/ or validation/ according to the split rule
* .lst files for each train and validation folders

---
## Permissions and Environment Variables

In [1]:
import boto3
import sagemaker
from sagemaker import get_execution_role

role = get_execution_role()
print(role)

region = boto3.Session().region_name

s3_client = boto3.client("s3")
sm_client = boto3.client("sagemaker")

sess = sagemaker.Session()

# project bucket
bucket_name = "aai-540-group4"

# image source and lst files
images_prefix = "datasets/cct_resized"
s3_images_location = f"s3://{bucket_name}/{images_prefix}/"
#train_lst_key = "
#val_lst_key = "

# specifiy output location of training data and model
output_prefix = "sg-ic-transfer-learning"

sagemaker.config INFO - Not applying SDK defaults from location: /etc/xdg/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /home/sagemaker-user/.config/sagemaker/config.yaml
arn:aws:iam::324183265896:role/service-role/AmazonSageMaker-ExecutionRole-20250604T045982


In [4]:
# retrieve base SageMakers image-classification model 
from sagemaker import image_uris

training_image = image_uris.retrieve(
    framework = "image-classification", region = sess.boto_region_name, version="latest"
)

print(training_image)

811284229777.dkr.ecr.us-east-1.amazonaws.com/image-classification:1


In [3]:
# Configure input channels
input_data = {
    "train": sagemaker.inputs.TrainingInput(
        s3_data=s3_images_location + 'train/',  
        content_type="application/x-image",
    ),
    "validation": sagemaker.inputs.TrainingInput(
        s3_data=s3_images_location + 'val/',  # Same directory
        content_type="application/x-image",
    ),
    "train_lst": sagemaker.inputs.TrainingInput(
        s3_data=s3_images_location + 'train_lst/' + 'train.lst',
        content_type="application/x-image",
    ),
    "validation_lst": sagemaker.inputs.TrainingInput(
        s3_data=s3_images_location + 'val_lst/' + 'val.lst',
        content_type="application/x-image",
    ),
}

In [5]:
# Configure base image classifier
s3_output_location = f"s3://{bucket_name}/{output_prefix}/output"
ic_estimator = sagemaker.estimator.Estimator(
    image_uri = training_image,
    role = role,
    instance_count=1,
    instance_type="ml.g4dn.xlarge",
    volume_size=50,
    max_run=360000,
    input_mode="File",
    output_path=s3_output_location,
    sagemaker_session=sess,
)

In [6]:
# Configure hyper parameters
ic_estimator.set_hyperparameters(
    num_layers=18, 
    use_pretrained_model=1,
    image_shape="3,224,224",
    num_classes=20,
    num_training_samples=39361,
    mini_batch_size=128,
    epochs=10,
    learning_rate=0.01,
    precision_dtype="float32",
    early_stopping=True
)


In [12]:
role

'arn:aws:iam::324183265896:role/service-role/AmazonSageMaker-ExecutionRole-20250604T045982'

In [7]:
ic_estimator.fit(inputs=input_data, logs=True)

INFO:sagemaker:Creating training-job with name: image-classification-2025-06-12-05-31-48-116


2025-06-12 05:31:51 Starting - Starting the training job...
2025-06-12 05:32:04 Starting - Preparing the instances for training...
2025-06-12 05:32:36 Downloading - Downloading input data......
2025-06-12 05:33:36 Downloading - Downloading the training image............
2025-06-12 05:35:53 Training - Training image download completed. Training in progress...[34mDocker entrypoint called with argument(s): train[0m
[34mRunning default environment configuration script[0m
[34mNvidia gpu devices, drivers and cuda toolkit versions (only available on hosts with GPU):[0m
[34mThu Jun 12 05:36:05 2025       [0m
[34m+-----------------------------------------------------------------------------------------+[0m
[34m| NVIDIA-SMI 550.163.01             Driver Version: 550.163.01     CUDA Version: 12.4     |[0m
[34m|-----------------------------------------+------------------------+----------------------+[0m
[34m| GPU  Name                 Persistence-M | Bus-Id          Disp.A | Volatil