In [2]:
import numpy as np
import pandas as pd
df = pd.read_csv('data2.csv')

In [3]:
modelData = np.array(df[df.columns[1:]]).astype('float32')
target = np.array(df['Total Claim Amount']).astype('float32')

In [4]:
import boto3
import sagemaker
import io
import os
import sagemaker.amazon.common as smac

# Create new sagemaker session
sess = sagemaker.Session()

# S3 bucket to export results to
bucket = "bucketcostvar"
prefix = ""

# Use the IO buffer as dataset is small
buf = io.BytesIO()
smac.write_numpy_to_dense_tensor(buf, modelData, target)
buf.seek(0)
key = 'linearlearner'
boto3.resource('s3').Bucket(bucket).Object(os.path.join(prefix, 'train', key)).upload_fileobj(buf)
s3_train_data = 's3://{}/{}/train/{}'.format(bucket, prefix, key)
print('uploaded training data location: {}'.format(s3_train_data))
output_location = 's3://{}/{}/output'.format(bucket, prefix)
print('training artifacts will be uploaded to: {}'.format(output_location))
# Use all regions for ML model
containers = {'us-west-2': '174872318107.dkr.ecr.us-west-2.amazonaws.com/linear-learner:latest',
              'us-east-1': '382416733822.dkr.ecr.us-east-1.amazonaws.com/linear-learner:latest',
              'us-east-2': '404615174143.dkr.ecr.us-east-2.amazonaws.com/linear-learner:latest',
              'eu-west-1': '438346466558.dkr.ecr.eu-west-1.amazonaws.com/linear-learner:latest',
              'ap-northeast-1': '351501993468.dkr.ecr.ap-northeast-1.amazonaws.com/linear-learner:latest'}

uploaded training data location: s3://bucketcostvar//train/linearlearner
training artifacts will be uploaded to: s3://bucketcostvar//output


In [5]:
from sagemaker import get_execution_role
role = get_execution_role()
linear = sagemaker.estimator.Estimator(containers[boto3.Session().region_name],
                                       role, 
                                       train_instance_count=1, 
                                       train_instance_type='ml.m4.xlarge',
                                       output_path=output_location,
                                       sagemaker_session=sess)

In [15]:
linear.set_hyperparameters(feature_dim=17,
                           predictor_type='regressor',
                           epochs=50,
                           normalize_data=True)

In [16]:
linear.fit({'train': s3_train_data})

2019-08-25 00:33:25 Starting - Starting the training job...
2019-08-25 00:33:26 Starting - Launching requested ML instances...
2019-08-25 00:34:24 Starting - Preparing the instances for training......
2019-08-25 00:35:15 Downloading - Downloading input data...
2019-08-25 00:35:43 Training - Downloading the training image..
[31mDocker entrypoint called with argument(s): train[0m
[31m[08/25/2019 00:36:04 INFO 140382750205760] Reading default configuration from /opt/amazon/lib/python2.7/site-packages/algorithm/resources/default-input.json: {u'loss_insensitivity': u'0.01', u'epochs': u'15', u'init_bias': u'0.0', u'lr_scheduler_factor': u'auto', u'num_calibration_samples': u'10000000', u'accuracy_top_k': u'3', u'_num_kv_servers': u'auto', u'use_bias': u'true', u'num_point_for_scaler': u'10000', u'_log_level': u'info', u'quantile': u'0.5', u'bias_lr_mult': u'auto', u'lr_scheduler_step': u'auto', u'init_method': u'uniform', u'init_sigma': u'0.01', u'lr_scheduler_minimum_lr': u'auto', u'tar


2019-08-25 00:36:14 Uploading - Uploading generated training model
2019-08-25 00:36:14 Completed - Training job completed
Billable seconds: 59


In [None]:
linear_predictor = linear.deploy(initial_instance_count=1,
                                 instance_type='ml.m4.xlarge')

--

In [17]:
from sagemaker.predictor import csv_serializer, json_deserializer
linear_predictor.content_type = 'text/csv'
linear_predictor.serializer = csv_serializer
linear_predictor.deserializer = json_deserializer

In [14]:
result = linear_predictor.predict(modelData[2])
print(result)

{u'predictions': [{u'score': 383.12152099609375}]}
