## SageMaker Experiment with Tensorflow
haimtran 25/05/2023

In [3]:
import sys

In [4]:
# update boto3 and sagemaker to ensure latest SDK version
!{sys.executable} -m pip install --upgrade pip
!{sys.executable} -m pip install --upgrade boto3
!{sys.executable} -m pip install --upgrade sagemaker
!{sys.executable} -m pip install --upgrade tensorflow
!{sys.executable} -m pip install --upgrade keras tensorflow

Collecting boto3
  Downloading boto3-1.26.142-py3-none-any.whl (135 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m135.6/135.6 kB[0m [31m2.2 MB/s[0m eta [36m0:00:00[0m00:01[0m
[?25hCollecting botocore<1.30.0,>=1.29.142 (from boto3)
  Downloading botocore-1.29.142-py3-none-any.whl (10.8 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m10.8/10.8 MB[0m [31m48.7 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
Installing collected packages: botocore, boto3
  Attempting uninstall: botocore
    Found existing installation: botocore 1.29.132
    Uninstalling botocore-1.29.132:
      Successfully uninstalled botocore-1.29.132
  Attempting uninstall: boto3
    Found existing installation: boto3 1.26.132
    Uninstalling boto3-1.26.132:
      Successfully uninstalled boto3-1.26.132
[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency c

In [5]:
import json
import boto3
import sagemaker
from sagemaker.session import Session
from sagemaker import get_execution_role
from sagemaker.experiments.run import Run

In [6]:
sagemaker_session = Session()
boto_sess = boto3.Session()

role = get_execution_role()
default_bucket = sagemaker_session.default_bucket()

sm = boto_sess.client("sagemaker")
region = boto_sess.region_name

INFO:sagemaker:Created S3 bucket: sagemaker-us-east-1-413175686616


In [7]:
region

'us-east-1'

In [8]:
default_bucket

'sagemaker-us-east-1-413175686616'

In [9]:
import numpy as np 
from tensorflow import keras 
from tensorflow.keras import layers
import pandas as pd 
import boto3

2023-05-27 05:10:45.697431: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 AVX512F FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [10]:
!mkdir -p datasets

In [11]:
num_classes = 10 
input_shape = (28, 28, 1)

In [12]:
s3 = boto3.client("s3")

In [13]:
train_path = "datasets/input_train.npy"
test_path = "datasets/input_test.npy"
train_labels_path = "datasets/input_train_labels.npy"
test_labels_path = "datasets/input_test_labels.npy"

In [14]:
s3.download_file("sagemaker-sample-files", "datasets/image/MNIST/numpy/input_train.npy", train_path)

In [15]:
s3.download_file("sagemaker-sample-files", "datasets/image/MNIST/numpy/input_test.npy", test_path)

In [16]:
s3.download_file(
    "sagemaker-sample-files", "datasets/image/MNIST/numpy/input_train_labels.npy", train_labels_path
)
s3.download_file(
    "sagemaker-sample-files", "datasets/image/MNIST/numpy/input_test_labels.npy", test_labels_path
)

## Split train and test data 

In [17]:
x_train = np.load(train_path)

In [18]:
x_test = np.load(test_path)

In [19]:
y_train = np.load(train_labels_path)

In [20]:
y_train.shape

(60000,)

In [21]:
y_train[0]

5

In [22]:
y_test = np.load(test_labels_path)

In [23]:
x_train = x_train.astype("float32") / 255 
x_test = x_test.astype("float32") / 255 

In [24]:
x_train = np.expand_dims(x_train, -1)

In [25]:
x_train.shape

(60000, 28, 28, 1)

In [26]:
x_test = np.expand_dims(x_test, -1)

In [27]:
print("x_train_shape: ", x_train.shape)

x_train_shape:  (60000, 28, 28, 1)


In [28]:
print(x_train.shape[0], "train samples")

60000 train samples


In [29]:
print(x_test.shape[0], "test sample")

10000 test sample


In [30]:
y_train = keras.utils.to_categorical(y_train, num_classes)

In [31]:
num_classes

10

In [32]:
y_train.shape

(60000, 10)

In [33]:
y_train[0,:]

array([0., 0., 0., 0., 0., 1., 0., 0., 0., 0.], dtype=float32)

In [34]:
y_test = keras.utils.to_categorical(y_test, num_classes)

## Construct the model 

In [35]:
def get_model(dropout=0.5):
    """ """
    model = keras.Sequential(
        [
            keras.Input(shape=input_shape),
            layers.Conv2D(32, kernel_size=(3, 3), activation="relu"),
            layers.MaxPooling2D(pool_size=(2, 2)),
            layers.Conv2D(64, kernel_size=(3, 3), activation="relu"),
            layers.MaxPooling2D(pool_size=(2, 2)),
            layers.Flatten(),
            layers.Dropout(dropout),
            layers.Dense(num_classes, activation="softmax"),
        ]
    )
    model.compile(loss="categorical_crossentropy", optimizer="adam", metrics=["accuracy"])

    return model

## Callback to log loss 

In [36]:
class ExperimentCallback(keras.callbacks.Callback):
    """ """

    def __init__(self, run, model, x_test, y_test):
        """Save params in constructor"""
        self.run = run
        self.model = model
        self.x_test = x_test
        self.y_test = y_test

    def on_epoch_end(self, epoch, logs=None):
        """ """
        keys = list(logs.keys())
        for key in keys:
            self.run.log_metric(name=key, value=logs[key], step=epoch)
            print("{} -> {}".format(key, logs[key]))

## Create First Experiment 

In [37]:
from sagemaker.experiments.run import Run

In [38]:
batch_size = 256
epochs = 10 
dropout = 0.5 

In [39]:
model = get_model(dropout)

In [40]:
experiment_name = "local-keras-experiment"

In [41]:
with Run(experiment_name=experiment_name, sagemaker_session=sagemaker_session) as run:
    run.log_parameter("batch_size", batch_size)
    run.log_parameter("epochs", epochs)
    run.log_parameter("dropout", dropout)
    
    run.log_file("datasets/input_train.npy", is_output=False)
    run.log_file("datasets/input_test.npy", is_output=False)
    run.log_file("datasets/input_train_labels.npy", is_output=False)
    run.log_file("datasets/input_test_labels.npy", is_output=False)
    
    model.fit(
        x_train, 
        y_train,
        epochs=epochs, 
        batch_size=batch_size, 
        validation_split=0.1, 
        callbacks=[ExperimentCallback(run, model, x_test, y_test)]
    )
    
    score = model.evaluate(x_test, y_test, verbose=0)
    print("test loss: ", score[0])
    print("test accuracy: ", score[1])
    
    run.log_metric(name="Final test loss: ", value=score[0])
    run.log_metric(name="Final test accuracy: ", value=score[1])


Epoch 1/10


2023-05-27 05:10:53.061020: W tensorflow/tsl/framework/cpu_allocator_impl.cc:83] Allocation of 169344000 exceeds 10% of free system memory.


accuracy -> 0.8505555391311646
val_loss -> 0.10339517891407013
val_accuracy -> 0.9710000157356262
Epoch 2/10
accuracy -> 0.9577407240867615
val_loss -> 0.06954538822174072
val_accuracy -> 0.9815000295639038
Epoch 3/10
accuracy -> 0.9677592515945435
val_loss -> 0.054845165461301804
val_accuracy -> 0.9851666688919067
Epoch 4/10
accuracy -> 0.9741851687431335
val_loss -> 0.047853317111730576
val_accuracy -> 0.9854999780654907
Epoch 5/10
accuracy -> 0.9778333306312561
val_loss -> 0.04451417550444603
val_accuracy -> 0.9866666793823242
Epoch 6/10
accuracy -> 0.9799814820289612
val_loss -> 0.039013899862766266
val_accuracy -> 0.9904999732971191
Epoch 7/10
accuracy -> 0.9818888902664185
val_loss -> 0.03460635989904404
val_accuracy -> 0.9901666641235352
Epoch 8/10
accuracy -> 0.9833518266677856
val_loss -> 0.035988859832286835
val_accuracy -> 0.9894999861717224
Epoch 9/10
accuracy -> 0.9845185279846191
val_loss -> 0.03208088502287865
val_accuracy -> 0.9916666746139526
Epoch 10/10
accuracy -> 0.

2023-05-27 05:16:40.145724: W tensorflow/tsl/framework/cpu_allocator_impl.cc:83] Allocation of 31360000 exceeds 10% of free system memory.


test loss:  0.02979576773941517
test accuracy:  0.9889000058174133
