## Import Libraries

In [26]:
import re
import boto3
import sagemaker
import numpy as np
from sagemaker import get_execution_role
from sklearn.model_selection import train_test_split


# Work with data
import io # The io module allows for dealing with various types of I/O (text I/O, binary I/O and raw I/O). 
import sagemaker.amazon.common as smac # sagemaker common libary

## Variables configuration

In [27]:
bucket = sess.default_bucket()
prefix = "linear-learner"
output_location = f"s3://{bucket}/{prefix}/output"

RANDOM_SEED = 42
dataset = 'keypoint.csv'

# Define IAM role
role = get_execution_role()

sess = sagemaker.Session()
region = boto3.Session().region_name

## Dataset

In [62]:
X_dataset = np.loadtxt(dataset, delimiter=',', dtype='float32', usecols=list(range(1, (21 * 2) + 1)))
y_dataset = np.loadtxt(dataset, delimiter=',', dtype='float32', usecols=(0))

X_train, X_test, y_train, y_test = train_test_split(X_dataset, y_dataset, train_size=0.75, random_state=RANDOM_SEED)



### Train

In [29]:
# Code below converts the data in numpy array format to RecordIO format
# This is the format required by Sagemaker Linear Learner 

buf = io.BytesIO() # create an in-memory byte array (buf is a buffer I will be writing to)
smac.write_numpy_to_dense_tensor(buf, X_train, y_train)
buf.seek(0) 
# When you write to in-memory byte arrays, it increments 1 every time you write to it
# Let's reset that back to zero 


0

In [30]:
import os

# Code to upload RecordIO data to S3
 
# Key refers to the name of the file    
key = 'linear-train-data'

# The following code uploads the data in record-io format to S3 bucket to be accessed later for training
boto3.resource('s3').Bucket(bucket).Object(os.path.join(prefix, 'train', key)).upload_fileobj(buf)

# Let's print out the training data location in s3
s3_train_data = 's3://{}/{}/train/{}'.format(bucket, prefix, key)
print('uploaded training data location: {}'.format(s3_train_data))

uploaded training data location: s3://sagemaker-us-east-1-410677554255/linear-learner/train/linear-train-data


### Test

In [32]:
# Code to upload RecordIO data to S3

buf = io.BytesIO() # create an in-memory byte array (buf is a buffer I will be writing to)
smac.write_numpy_to_dense_tensor(buf, X_test, y_test)
buf.seek(0) 
# When you write to in-memory byte arrays, it increments 1 every time you write to it
# Let's reset that back to zero 


0

In [33]:
# Key refers to the name of the file    
key = 'linear-test-data'

# The following code uploads the data in record-io format to S3 bucket to be accessed later for training
boto3.resource('s3').Bucket(bucket).Object(os.path.join(prefix, 'test', key)).upload_fileobj(buf)

# Let's print out the testing data location in s3
s3_test_data = 's3://{}/{}/test/{}'.format(bucket, prefix, key)
print('uploaded training data location: {}'.format(s3_test_data))

uploaded training data location: s3://sagemaker-us-east-1-410677554255/linear-learner/test/linear-test-data


In [34]:
# create an output placeholder in S3 bucket to store the linear learner output

output_location = 's3://{}/{}/output'.format(bucket, prefix)
print('Training artifacts will be uploaded to: {}'.format(output_location))

Training artifacts will be uploaded to: s3://sagemaker-us-east-1-410677554255/linear-learner/output


In [36]:
from sagemaker import image_uris
container = image_uris.retrieve(region=boto3.Session().region_name, framework="linear-learner")


In [41]:
# We have pass in the container, the type of instance that we would like to use for training 
# output path and sagemaker session into the Estimator. 
# We can also specify how many instances we would like to use for training
# sagemaker_session = sagemaker.Session()

linear = sagemaker.estimator.Estimator(container,
                                       role, 
                                       instance_count = 1, 
                                       instance_type = 'ml.c4.xlarge',
                                       output_path = output_location,
                                       sagemaker_session = sess)


# We can tune parameters like the number of features that we are passing in, type of predictor like 'regressor' or 'classifier', mini batch size, epochs
# Train 8 different versions of the model and will get the best out of them (built-in parameters optimization!)

linear.set_hyperparameters(feature_dim = 42,
                           predictor_type = 'multiclass_classifier',
                           mini_batch_size = 60,
                           epochs = 5,
                           num_classes = 4,
                           num_models = 8,
                           loss = 'softmax_loss')
# Now we are ready to pass in the training data from S3 to train the linear learner model

linear.fit({'train': s3_train_data})

# Let's see the progress using cloudwatch logs

2022-11-10 15:56:58 Starting - Starting the training job...
2022-11-10 15:57:21 Starting - Preparing the instances for trainingProfilerReport-1668095818: InProgress
.........
2022-11-10 15:58:55 Downloading - Downloading input data
2022-11-10 15:58:55 Training - Downloading the training image.........
2022-11-10 16:00:26 Training - Training image download completed. Training in progress..[34mDocker entrypoint called with argument(s): train[0m
[34mRunning default environment configuration script[0m
[34m[11/10/2022 16:00:30 INFO 140133525714752] Reading default configuration from /opt/amazon/lib/python3.7/site-packages/algorithm/resources/default-input.json: {'mini_batch_size': '1000', 'epochs': '15', 'feature_dim': 'auto', 'use_bias': 'true', 'binary_classifier_model_selection_criteria': 'accuracy', 'f_beta': '1.0', 'target_recall': '0.8', 'target_precision': '0.8', 'num_models': 'auto', 'num_calibration_samples': '10000000', 'init_method': 'uniform', 'init_scale': '0.07', 'init_si

In [67]:
linear_regressor = linear.deploy(initial_instance_count = 1,
                                          instance_type = 'ml.m4.xlarge')

--------!

In [80]:
from sagemaker.predictor import csv_serializer, json_deserializer

linear_regressor.content_types = 'text/csv'
linear_regressor.serializer = csv_serializer
linear_regressor.deserializer = json_deserializer

In [95]:
result['predictions']

[{'score': [0.6482167840003967,
   0.11138279736042023,
   0.24028529226779938,
   0.00011509278556331992],
  'predicted_label': 0},
 {'score': [0.972072422504425,
   0.018346454948186874,
   0.00956850778311491,
   1.2633706319320481e-05],
  'predicted_label': 0},
 {'score': [0.28350716829299927,
   0.09271127730607986,
   0.6237677931785583,
   1.3740491340286098e-05],
  'predicted_label': 2},
 {'score': [0.8245282173156738,
   0.034943751990795135,
   0.1405210644006729,
   6.981278147577541e-06],
  'predicted_label': 0},
 {'score': [0.0010490157874301076,
   0.9824925065040588,
   0.01122354157269001,
   0.005234922282397747],
  'predicted_label': 1},
 {'score': [0.009816275909543037,
   0.03653404489159584,
   0.0007362017058767378,
   0.9529134631156921],
  'predicted_label': 3},
 {'score': [0.07809475064277649,
   0.7898159623146057,
   0.13208530843257904,
   3.954525254812324e-06],
  'predicted_label': 1},
 {'score': [0.09362044930458069,
   0.043714623898267746,
   0.86266487

In [83]:
result = linear_regressor.predict(X_test)

The csv_serializer has been renamed in sagemaker>=2.
See: https://sagemaker.readthedocs.io/en/stable/v2.html for details.
The json_deserializer has been renamed in sagemaker>=2.
See: https://sagemaker.readthedocs.io/en/stable/v2.html for details.


In [99]:
result['predictions'][1187]
y_pred = [np.argmax(x['score']) for x in result['predictions'] ]

In [102]:
from sklearn.metrics import accuracy_score

accuracy_score(y_test, y_pred)

0.847953216374269

In [103]:
# delete endpoints
linear_regressor.delete_endpoint()


# Load the already trained Model


In [1]:
import os
import mxnet as mx
import boto3

bucket = "sagemaker-us-east-1-410677554255"
key = "linear-learner/output/linear-learner-2022-11-10-15-56-58-055/output/"
boto3.resource('s3').Bucket(bucket).download_file(key, 'model.tar.gz')

os.system('tar -zxvf model.tar.gz')

# Linear learner model is itself a zip file, containing a mxnet model and other metadata.
# First unzip the model.
os.system('unzip model_algo-1') 

# Load the mxnet module
mod = mx.module.Module.load("mx-mod", 0)



ModuleNotFoundError: No module named 'mxnet'