In [None]:
import boto3
import io
import os
import time
import pandas as pd
import numpy as np
import sagemaker
import sagemaker.amazon.common as smac
from sagemaker.amazon.amazon_estimator import RecordSet
from sagemaker import get_execution_role

# Enter the name of the bucket you used in Chapter 4 here
bucket = 'bucket-name-from-Chapter4'
prefix = 'aiml-book/chapter4/glue-out/'
s3 = boto3.client('s3')

In [None]:
# download the scaled CSV file we created in Chapter 4
s3.download_file(bucket, prefix+'wine_scaled.csv','wine_scaled.csv')

In [None]:
# Let's first load the data into a Pandas dataframe so it is easy for us to work with it
wine_scaled_df = pd.read_csv('./wine_scaled.csv', sep=',',header=0)
wine_scaled_df.shape

In [None]:
# display the first few rows
wine_scaled_df.head()

In [None]:
# now we have our data, lets get rid of the country columns and select a thousand rows to make it more understandable
wine_alg_df = wine_scaled_df.iloc[0:1000,0:6]
wine_alg_df.head()

In [None]:
col_ord = ['price','points','last_year_points','designation_freq','winery_freq','variety_transformed']
# reorder to move label (we want to predict the price of the wine) to first position
wine_alg_ord_df = wine_alg_df.reindex(columns=col_ord)
wine_alg_ord_df.head()

We will now use a couple of methods to **train a regression model to predict the price for a bottle of wine** based on points, last years points, winery, designation and grape variety features that we engineered in Chapters 3 and 4 of the book

### Algorithm 1 - Linear Learner
#### Algorithm Type = Statistical/Math Function
#### ML use case = Regressor
#### ML topic = Tabular data

In [None]:
# we need the IAM role from our notebook to run our training job
role = get_execution_role()
print(role)

Split our dataset into train and validation datasets in prep for model training

In [None]:
split_list = np.random.rand(wine_alg_ord_df.shape[0])
t_list = split_list < 0.9
v_list = split_list >= 0.9

train_ds = wine_alg_ord_df[t_list]
val_ds = wine_alg_ord_df[v_list]

Now we extract the label and input features for both training and validation

In [None]:
train_label = train_ds.iloc[:, 0].to_numpy()
train_features = train_ds.iloc[:, 1:].to_numpy()

val_label = val_ds.iloc[:, 0].to_numpy()
val_features = val_ds.iloc[:, 1:].to_numpy()

#### IMPORTANT - We are using a m5.xlarge instance here to run our training, this will incur costs to your AWS account

In [None]:
# Get the linear learner estimator and specify hyperparameters
estimator = sagemaker.LinearLearner(
    role=role,
    instance_count=1,
    instance_type="ml.m5.xlarge",
    predictor_type="regressor",
    epochs=10,
    loss="squared_loss",
)

Convert our datasets into RecordSets as expected by SageMaker

In [None]:
train_records = estimator.record_set(train_features.astype("float32"), train_label.astype("float32"), channel="train")
val_records = estimator.record_set(val_features.astype("float32"), val_label.astype("float32"), channel="validation")

#### Now start training by executing fit on our estimator object

In [None]:
estimator.fit([train_records, val_records],mini_batch_size=50, wait=False)

#### Navigate to Amazon SageMaker console and click on Training Jobs on the left to check the status of your training

### Algorithm 2 - Multi-layer Perceptron or MLP
#### Algorithm Type = Neural Network
#### ML Framework = Tensorflow
#### ML use case = Regression
#### ML topic = Tabular data

We will reuse the train and validation features from our linear learner example above. We are trying to predict the price of wine based on input features such as points, winery, designation, and grape variety

In [None]:
print("training label shape is: " + str(train_label.shape))
print("training features shape is: " + str(train_features.shape))
print("validation label shape is: " + str(val_label.shape))
print("validation features shape is: " + str(val_features.shape))

### Define the neural network architecture

In [None]:
from tensorflow.keras import Sequential
from tensorflow.keras.layers import Dense
# set up the neural network layer by layer with 5 neurons in 1st hidden layer
# 3 neurons in second hidden layer
# output is 1 neuron
model = Sequential()
model.add(Dense(5, activation='relu', kernel_initializer='random_normal', input_shape=(train_features.shape[1],)))
model.add(Dense(3, activation='relu', kernel_initializer='random_normal'))
model.add(Dense(1))

### Compile and fit the model

In [None]:
# We will use the mean squared error as the calculated loss between the label and predictions
# the model will try to minimize this loss during training
# we will use the stochastic gradient descent as the optimizer method for learning
model.compile(optimizer='sgd', loss='mse')
# fit the model
model.fit(train_features.astype("float32"), train_label.astype("float32"), epochs=25, batch_size=50)

### Evaluate the model

In [None]:
import math
val_results = model.evaluate(val_features.astype("float32"), val_label.astype("float32"))
print('Root Mean Squared Error or RMSE is: ' + str(math.sqrt(val_results)))

#### The section below is optional in case you want to learn a different method to set up training in SageMaker

### OPTIONAL - Training using Create Training Job SageMaker API

Create RecordIO protobuf format of our dataset for faster training performance

In [None]:
t_file = "train.data"

linear_prefix = 'aiml-book/chapter5/linear-learner'

t_fil = io.BytesIO()
smac.write_numpy_to_dense_tensor(t_fil, train_features.astype("float32"), train_label.astype("float32"))
t_fil.seek(0)

boto3.Session().resource("s3").Bucket(bucket).Object(
    os.path.join(linear_prefix, "train", t_file)
).upload_fileobj(t_fil)

In [None]:
v_file = "validation.data"

linear_prefix = 'aiml-book/chapter5/linear-learner'

v_fil = io.BytesIO()
smac.write_numpy_to_dense_tensor(v_fil, train_features.astype("float32"), train_label.astype("float32"))
v_fil.seek(0)

boto3.Session().resource("s3").Bucket(bucket).Object(
    os.path.join(linear_prefix, "validation", v_file)
).upload_fileobj(v_fil)

Now get the linear learner algorithm image from SageMaker Elastic Container Repository

In [None]:
from sagemaker import image_uris

container = image_uris.retrieve(region=boto3.Session().region_name, framework="linear-learner")

*** IMPORTANT - We are using a m5.xlarge instance here to run our training, this will incur costs to your AWS account

### Specify training data and hyperparameters 

In [None]:
ll_train_job = "Chapter5-linear-learner-" + time.strftime("%Y-%m-%d-%H-%M-%S", time.gmtime())

print("Job name is:", ll_train_job)

ll_training = {
    "RoleArn": role,
    "TrainingJobName": ll_train_job,
    "AlgorithmSpecification": {"TrainingImage": container, "TrainingInputMode": "File"},
    "ResourceConfig": {"InstanceCount": 1, "InstanceType": "ml.m5.xlarge", "VolumeSizeInGB": 10},
    "InputDataConfig": [
        {
            "ChannelName": "train",
            "DataSource": {
                "S3DataSource": {
                    "S3DataType": "S3Prefix",
                    "S3Uri": "s3://{}/{}/train/".format(bucket, linear_prefix),
                }
            },
        },
        {
            "ChannelName": "validation",
            "DataSource": {
                "S3DataSource": {
                    "S3DataType": "S3Prefix",
                    "S3Uri": "s3://{}/{}/validation/".format(bucket, linear_prefix),
                }
            },
        },
    ],
    "OutputDataConfig": {"S3OutputPath": "s3://{}/{}/".format(bucket, linear_prefix)},
    "HyperParameters": {
        "feature_dim": "auto",
        "mini_batch_size": "50",
        "predictor_type": "regressor",
        "epochs": "10",
        "loss": "auto",
    },
    "StoppingCondition": {"MaxRuntimeInSeconds": 60 * 60},
}

### Start Training

In [None]:
%%time

sagemaker = boto3.client("sagemaker")

sagemaker.create_training_job(**ll_training)

status = sagemaker.describe_training_job(TrainingJobName=ll_train_job)["TrainingJobStatus"]
print(status)

#### Navigate to Amazon SageMaker console and click on Training Jobs on the left to check the status of your training

### END OF NOTEBOOK, please return to Chapter 5 in the book to proceed further