In [1]:
%%time

import boto3
import sagemaker
from sagemaker import get_execution_role

role = get_execution_role()
print(role)

region = boto3.Session().region_name

s3_client = boto3.client("s3")

sess = sagemaker.Session()
data_bucket_name = 'thesis-bucket-distraction'
dataset_name = 'Distraction-Driver'

arn:aws:iam::957922590568:role/Sagemaker_full_access
CPU times: user 886 ms, sys: 167 ms, total: 1.05 s
Wall time: 30.4 s


In [2]:
from sagemaker.amazon.amazon_estimator import get_image_uri
training_image = sagemaker.image_uris.retrieve("image-classification", sess.boto_region_name)

813361260812.dkr.ecr.eu-central-1.amazonaws.com/image-classification:1


In [3]:
base_dir='/tmp'
%env BASE_DIR = $base_dir
%env S3_DATA_BUCKET_NAME = $data_bucket_name
%env DATASET_NAME = $dataset_name 

import sys,os

suffix='/mxnet/tools/im2rec.py'
im2rec = list(filter((lambda x: os.path.isfile(x+suffix)), sys.path))[0]+suffix
%env IM2REC = $im2rec

!aws s3 sync s3://$S3_DATA_BUCKET_NAME/$DATASET_NAME $BASE_DIR/$DATASET_NAME --quiet

env: BASE_DIR=/tmp
env: S3_DATA_BUCKET_NAME=thesis-bucket-distraction
env: DATASET_NAME=Distraction-Driver
env: IM2REC=/home/ec2-user/anaconda3/envs/amazonei_mxnet_p36/lib/python3.6/site-packages/mxnet/tools/im2rec.py


In [4]:
%%bash

cd $BASE_DIR
rm *.rec
rm *.lst

echo "Creating LST files"

python $IM2REC --list --recursive --pass-through --test-ratio=0.3 --train-ratio=0.7 $DATASET_NAME $DATASET_NAME > ${DATASET_NAME}_classes

echo "Label classes:"
cat ${DATASET_NAME}_classes

echo "Creating RecordIO files"
python $IM2REC --num-thread=4 ${DATASET_NAME}_train.lst $DATASET_NAME
python $IM2REC --num-thread=4 ${DATASET_NAME}_test.lst $DATASET_NAME
ls -lh *.rec

Creating LST files
Label classes:
drinking 0
hair_and_makeup 1
operating_the_radio 2
reaching_behind 3
safe_driving 4
talking_left_hand 5
talking_right_hand 6
talking_to_passenger 7
texting_left_hand 8
texting_right_hand 9
Creating RecordIO files
Creating .rec file from /tmp/Distraction-Driver_train.lst in /tmp
time: 0.6080989837646484  count: 0
time: 4.950589418411255  count: 1000
time: 5.031804800033569  count: 2000
time: 4.985249042510986  count: 3000
time: 4.970013618469238  count: 4000
time: 4.880450963973999  count: 5000
time: 5.009275197982788  count: 6000
time: 4.96434211730957  count: 7000
time: 4.834452390670776  count: 8000
time: 4.961760997772217  count: 9000
time: 4.950659275054932  count: 10000
time: 4.961684226989746  count: 11000
time: 4.970326900482178  count: 12000
time: 4.8817832469940186  count: 13000
time: 5.015281677246094  count: 14000
time: 4.881587028503418  count: 15000
Creating .rec file from /tmp/Distraction-Driver_test.lst in /tmp
time: 0.03490614891052246 

rm: cannot remove ‘*.rec’: No such file or directory
rm: cannot remove ‘*.lst’: No such file or directory


In [5]:
bucket = sess.default_bucket()
s3train_path = 's3://{}/{}/train/'.format(bucket, dataset_name)
s3validation_path = 's3://{}/{}/validation/'.format(bucket, dataset_name)

!aws s3 rm s3://{bucket}/{dataset_name}/train --recursive
!aws s3 rm s3://{bucket}/{dataset_name}/validation --recursive
    
!aws s3 cp /tmp/{dataset_name}_train.rec $s3train_path
!aws s3 cp /tmp/{dataset_name}_test.rec $s3validation_path

delete: s3://sagemaker-eu-central-1-957922590568/Distraction-Driver/train/Distraction-Driver_train.rec
delete: s3://sagemaker-eu-central-1-957922590568/Distraction-Driver/validation/Distraction-Driver_test.rec
upload: ../../../tmp/Distraction-Driver_train.rec to s3://sagemaker-eu-central-1-957922590568/Distraction-Driver/train/Distraction-Driver_train.rec
upload: ../../../tmp/Distraction-Driver_test.rec to s3://sagemaker-eu-central-1-957922590568/Distraction-Driver/validation/Distraction-Driver_test.rec


In [6]:
train_data = sagemaker.inputs.TrainingInput(
    s3train_path,
    distribution='FullyReplicated',
    content_type='application/x-recordio',
    s3_data_type='S3Prefix'
)

validation_data = sagemaker.inputs.TrainingInput(
    s3validation_path,
    distribution='FullyReplicated',
    content_type='application/x-recordio',
    s3_data_type='S3Prefix'
)

data_channels = {'train': train_data, 'validation': validation_data}

In [7]:
s3_output_location = 's3://{}/{}/output'.format(bucket, dataset_name)
image_classifier = sagemaker.estimator.Estimator(
    training_image,
    role,
    instance_count=2, 
    instance_type="ml.p2.xlarge",
    volume_size=50, 
    max_run=360000,
    input_mode="File",
    output_path=s3_output_location,
    sagemaker_session = sess
)

In [8]:
num_classes = ! ls -l {base_dir}/{dataset_name} | wc -l
num_classes = int(num_classes[0]) - 1

num_training_samples = !cat {base_dir}/{dataset_name}_train.lst | wc -l
num_training_samples = int(num_training_samples[0])

image_classifier.set_hyperparameters(
    num_layers=18, 
    use_pretrained_model=1,
    image_shape="3,480,640",
    num_classes=num_classes,
    num_training_samples=num_training_samples,
    early_stopping="True",
    early_stopping_tolerance=0.05, 
    mini_batch_size=128, 
    epochs=8, 
    learning_rate=0.01,
    precision_dtype="float16" 
)

In [10]:
%%time
import time
now = str(int(time.time()))
training_job_name = 'IC-' + dataset_name.replace('_', '-') + '-' + now
image_classifier.fit(inputs=data_channels, job_name=training_job_name, logs=True)
job = image_classifier.latest_training_job
model_path = f"{base_dir}/{job.name}"

2022-02-07 22:12:25 Starting - Starting the training job...
2022-02-07 22:12:48 Starting - Launching requested ML instancesProfilerReport-1644271944: InProgress
.........
2022-02-07 22:14:08 Starting - Preparing the instances for training...
2022-02-07 22:14:49 Downloading - Downloading input data.........
2022-02-07 22:16:13 Training - Downloading the training image.....[34mDocker entrypoint called with argument(s): train[0m
[34m[02/07/2022 22:17:08 INFO 140435680151360] Reading default configuration from /opt/amazon/lib/python3.7/site-packages/image_classification/default-input.json: {'use_pretrained_model': 0, 'num_layers': 152, 'epochs': 30, 'learning_rate': 0.1, 'lr_scheduler_factor': 0.1, 'optimizer': 'sgd', 'momentum': 0, 'weight_decay': 0.0001, 'beta_1': 0.9, 'beta_2': 0.999, 'eps': 1e-08, 'gamma': 0.9, 'mini_batch_size': 32, 'image_shape': '3,224,224', 'precision_dtype': 'float32'}[0m
[34m[02/07/2022 22:17:08 INFO 140435680151360] Merging with provided configuration from 