# Classification Training (MHIST)

In [60]:
import sagemaker
from sagemaker.pytorch import PyTorch
from sagemaker.inputs import FileSystemInput

# Initialize the SageMaker session
sagemaker_session = sagemaker.Session()

# Define the EFS file system input
efs_data_input = FileSystemInput(
    file_system_id='fs-0b7a195df6775de4c', # MODIFY
    file_system_type='EFS',
    directory_path='/MHIST',
    file_system_access_mode='ro'
)

efs_model_input = FileSystemInput(
    file_system_id='fs-0b7a195df6775de4c',
    file_system_type='EFS',
    directory_path='/models',
    file_system_access_mode='ro'
)

# Configure the PyTorch estimator
estimator = PyTorch(
    source_dir='train',
    entry_point='train_mhist.py',
    role='arn:aws:iam::713881812217:role/EFS-SM-SageMakerRole-EyrRK8nNZo79',
    instance_count=1,
    instance_type='ml.p3.2xlarge',
    subnets=['subnet-008fa8aee9db06e83'], # MODIFY
    security_group_ids=['sg-09d4640079b19f275'], # MODIFY
    framework_version='2.3',
    py_version='py311',
    hyperparameters={
        'epochs': 20,
        'batch-size': 32,
        'learning-rate': 1e-3,
        'weight-decay':1e-4,
    },
    metric_definitions=[
        {'Name': 'TrainingLoss', 'Regex': 'Training Loss: ([0-9\\.]+)'},
        {'Name': 'ValidationLoss', 'Regex': 'Validation Loss: ([0-9\\.]+)'},
        {'Name': 'ValidationAccuracy', 'Regex': 'Validation Accuracy: ([0-9\\.]+)'}
    ],
    base_job_name='MHIST-Classification'
)

# Start the training job
estimator.fit({'training': efs_data_input, 'models': efs_model_input}, wait=False)

INFO:sagemaker.image_uris:image_uri is not presented, retrieving image_uri based on instance_type, framework etc.
INFO:sagemaker:Creating training-job with name: MHIST-Classification-2024-10-29-23-04-44-207


# Segmentation Training (Lizard)

In [None]:
import sagemaker
from sagemaker.pytorch import PyTorch
from sagemaker.inputs import FileSystemInput

# Initialize the SageMaker session
sagemaker_session = sagemaker.Session()

# Define the EFS file system input
efs_data_input = FileSystemInput(
    file_system_id='fs-0b7a195df6775de4c', # MODIFY
    file_system_type='EFS',
    directory_path='/Lizard',
    file_system_access_mode='ro'
)

efs_model_input = FileSystemInput(
    file_system_id='fs-0b7a195df6775de4c',
    file_system_type='EFS',
    directory_path='/models',
    file_system_access_mode='ro'
)


# Configure the PyTorch estimator
estimator = PyTorch(
    source_dir='train',
    entry_point='train_lizard.py',
    role='arn:aws:iam::713881812217:role/EFS-SM-SageMakerRole-EyrRK8nNZo79',
    instance_count=1,
    instance_type='ml.g5.2xlarge',
    subnets=['subnet-008fa8aee9db06e83'], # MODIFY
    security_group_ids=['sg-09d4640079b19f275'], # MODIFY
    framework_version='2.3',
    py_version='py311',
    hyperparameters={
        'epochs': 200,
        'batch-size': 128,
        'learning-rate': 1e-5
    },
    metric_definitions=[
        {'Name': 'TrainingLoss', 'Regex': 'Training Loss: ([0-9\\.]+)'},
        {'Name': 'ValidationLoss', 'Regex': 'Validation Loss: ([0-9\\.]+)'},
        {'Name': 'ValidationIOU', 'Regex': 'Validation Mean_IOU: ([0-9\\.]+)'},
        {'Name': 'ValidationDice', 'Regex': 'Validation Mean_DICE: ([0-9\\.]+)'}
    ],
    base_job_name='Lizard-Segmentation'
)

# Start the training job
estimator.fit({'training': efs_data_input, 'models': efs_model_input}, wait=False)

INFO:sagemaker.image_uris:image_uri is not presented, retrieving image_uri based on instance_type, framework etc.
INFO:sagemaker:Creating training-job with name: Lizard-Segmentation-2024-11-02-17-33-17-908


# Feature extraction WSI

First build and push the docker image by running the `build_and_push.sh` script: 
* `cd preprocessing`
* `bash ./build_and_push.sh cucim-tiler`

This will build and push the custom Docker image to an ECR registry called `cucim-tiler` and can than be used to run the following custom Estimator

In [None]:
import sagemaker
from sagemaker.estimator import Estimator
from sagemaker.inputs import FileSystemInput

# Initialize the SageMaker session
sagemaker_session = sagemaker.Session()

# Define the EFS file system input
efs_data_input = FileSystemInput(
    file_system_id='fs-0b7a195df6775de4c', # MODIFY
    file_system_type='EFS',
    directory_path='/TCGA-COAD',
    file_system_access_mode='ro'
)

efs_data_output = FileSystemInput(
    file_system_id='fs-0b7a195df6775de4c', # MODIFY
    file_system_type='EFS',
    directory_path='/TCGA-COAD-features',
    file_system_access_mode='rw'
)

estimator = Estimator(
    role='arn:aws:iam::713881812217:role/EFS-SM-SageMakerRole-EyrRK8nNZo79',
    instance_count=1,
    image_uri="713881812217.dkr.ecr.us-west-2.amazonaws.com/cucim-tiler:latest",
    instance_type='ml.g5.2xlarge',
    subnets=['subnet-008fa8aee9db06e83'], # MODIFY
    security_group_ids=['sg-09d4640079b19f275'], # MODIFY
    base_job_name='Tile-Feature-Extraction',
    metric_definitions=[
        {'Name': 'Slide #', 'Regex': 'Processing slide #([0-9\\.]+)'},
    ],
)

# Start the training job
estimator.fit({'dataset': efs_data_input, 'output': efs_data_output}, wait=False)

INFO:sagemaker:Creating training-job with name: Tile-Feature-Extraction-2024-11-18-22-43-40-962


# WSI Prediction (Slide Level)

In [None]:
import sagemaker
from sagemaker.pytorch import PyTorch
from sagemaker.inputs import FileSystemInput

# Initialize the SageMaker session
sagemaker_session = sagemaker.Session()

# Define the EFS file system input
efs_data_input = FileSystemInput(
    file_system_id='fs-0b7a195df6775de4c', # MODIFY
    file_system_type='EFS',
    directory_path='/TCGA-COAD-features',
    file_system_access_mode='ro'
)

efs_model_input = FileSystemInput(
    file_system_id='fs-0b7a195df6775de4c',
    file_system_type='EFS',
    directory_path='/models',
    file_system_access_mode='ro'
)

# Configure the PyTorch estimator
estimator = PyTorch(
    source_dir='train',
    entry_point='train_msi_tcga.py',
    role='arn:aws:iam::713881812217:role/EFS-SM-SageMakerRole-EyrRK8nNZo79',
    instance_count=1,
    instance_type='ml.g5.2xlarge',
    subnets=['subnet-008fa8aee9db06e83'], # MODIFY
    security_group_ids=['sg-09d4640079b19f275'], # MODIFY
    framework_version='2.2',
    py_version='py310',
    hyperparameters={
        'epochs': 100,
        'batch-size': 128,
        'learning-rate': 1e-5,
        'max-tiles': 6000
    },
    metric_definitions=[
        {'Name': 'TrainingLoss', 'Regex': 'Training Loss: ([0-9\\.]+)'},
        {'Name': 'ValidationLoss', 'Regex': 'Validation Loss: ([0-9\\.]+)'},
        {'Name': 'ValidationAccuracy', 'Regex': 'Validation Accuracy: ([0-9\\.]+)'}
    ],
    base_job_name='WSI-Classification',
)

# Start the training job
estimator.fit({'training': efs_data_input, 'models': efs_model_input}, wait=False)

INFO:sagemaker.image_uris:image_uri is not presented, retrieving image_uri based on instance_type, framework etc.
INFO:sagemaker:Creating training-job with name: WSI-Classification-2024-11-04-15-54-54-698
