## This Notebook contains code for tuning the hyper parameters and then training the model.

In [1]:
import sagemaker
from sagemaker import get_execution_role
from sagemaker.amazon.amazon_estimator import get_image_uri

role = get_execution_role()
sess = sagemaker.Session()

training_image = get_image_uri(sess.boto_region_name, 'image-classification', repo_version="latest")

## we need training and validation data in the form of record io format.
Notice s3train_path and s3validation_path contains location of training data and validation data in record io format.

In [2]:

s3train_path = 's3://project-completion-udacity/nsfw_dataset/training'
s3validation_path = 's3://project-completion-udacity/nsfw_dataset/validation'

train_data = sagemaker.session.s3_input(
    s3train_path, 
    distribution='FullyReplicated', 
    content_type='application/x-recordio', 
    s3_data_type='S3Prefix'
)

validation_data = sagemaker.session.s3_input(
    s3validation_path, 
    distribution='FullyReplicated', 
    content_type='application/x-recordio', 
    s3_data_type='S3Prefix'
)

data_channels = {'train': train_data, 'validation': validation_data}


## Specifying the instance for training.

In [3]:
bucket = 'project-completion-udacity'
dataset_name = 'nsfw_dataset'


s3_output_location = 's3://{}/{}/output'.format(bucket, dataset_name)

image_classifier = sagemaker.estimator.Estimator(
    training_image,
    role, 
    train_instance_count=1, 
    train_instance_type='ml.p2.xlarge',
    output_path=s3_output_location,
    sagemaker_session=sess
)

In [4]:
num_classes=5

num_training_samples=! cat nsfw_dataset_train.lst | wc -l
num_training_samples = int(num_training_samples[0])
print(num_training_samples , num_classes)

126254 5


## Declaring base hyperparameters that we dont wish to tune. 

In [5]:

# # Learn more about the Sagemaker built-in Image Classifier hyperparameters here: https://docs.aws.amazon.com/sagemaker/latest/dg/IC-Hyperparameter.html

# # These hyperparameters we won't want to change, as they define things like
# # the size of the images we'll be sending for input, the number of training classes we have, etc.
base_hyperparameters=dict(
    use_pretrained_model=1,
    image_shape='3,224,224',
    num_classes=num_classes,
    num_training_samples=num_training_samples,
    augmentation_type = 'crop_color_transform',
    epochs = 1
    
    
)

# # These are hyperparameters we may want to tune, as they can affect the model training success:
hyperparameters={
    **base_hyperparameters, 
    **dict(
        learning_rate=0.001,
        mini_batch_size=5,
    )
}


image_classifier.set_hyperparameters(**hyperparameters)

# hyperparameters


## Declaring the hyperparameters we wish to tune.  And then creating hyper parameter tuning job.

In [13]:
from sagemaker.tuner import HyperparameterTuner, IntegerParameter, CategoricalParameter, ContinuousParameter
hyperparameter_ranges = {'optimizer': CategoricalParameter(['nag', 'adam']),
                         'learning_rate': ContinuousParameter(0.0001, 0.01),
                         'mini_batch_size': IntegerParameter(15, 32),
                        }

objective_metric_name = 'validation:accuracy'

tuner = HyperparameterTuner(image_classifier,
                            objective_metric_name,
                            hyperparameter_ranges,
                            max_jobs=5,
                            max_parallel_jobs=1)

tuner.fit(inputs=data_channels, logs=True, include_cls_metadata=False)


In [8]:
# best_image_classifier = sagemaker.estimator.Estimator.attach(tuner.best_training_job())


In [None]:
# %%time

# import time
# now = str(int(time.time()))
# training_job_name = 'IC-' + dataset_name.replace('_', '-') + '-' + now

# image_classifier.fit(inputs=data_channels, job_name=training_job_name, logs=True)

# job = image_classifier.latest_training_job
# model_path = f"{base_dir}/{job.name}"

# print(f"\n\n Finished training! The model is available for download at: {image_classifier.output_path}/{job.name}/output/model.tar.gz")
