In [34]:
# Define IAM role
import boto3
import re
from sagemaker import get_execution_role

role = get_execution_role()

In [35]:
import numpy as np
import sagemaker.amazon.common as smac
import json
import sagemaker
import io
import os
from sagemaker.amazon.amazon_estimator import get_image_uri
from sagemaker.predictor import csv_serializer, json_deserializer

In [38]:
# Import the data
data = np.load('tsla_model_2_record.npy')
fourth_quarter = int(len(data) * 0.75)
train_data = data[:fourth_quarter]
test_data = data[fourth_quarter:]
ctrl_train_data = data[:fourth_quarter, 4:]
ctrl_test_data = data[fourth_quarter:, 4:]
only_twitter_train_data = np.delete(data[:fourth_quarter], 4, 1)
only_twitter_test_data = np.delete(data[fourth_quarter:], 4, 1)

In [39]:
# Here is what our training data looks like
print(data)

[[ 2.2000000e+01  5.0000000e+00  4.4000000e+01  2.0000000e+00
   5.6642392e-03 -1.5619084e-03]
 [ 2.8000000e+01  8.0000000e+00  2.2000000e+01  5.0000000e+00
  -1.5619084e-03 -1.5880540e-03]
 [ 3.9000000e+01  1.1000000e+01  2.8000000e+01  8.0000000e+00
  -1.5880540e-03 -1.8778339e-02]
 ...
 [ 1.7100000e+02  5.5000000e+01  2.1000000e+02  7.4000000e+01
  -1.9030344e-02  3.8025755e-02]
 [ 1.1000000e+02  4.9000000e+01  1.7100000e+02  5.5000000e+01
   3.8025755e-02 -6.7694499e-03]
 [ 1.6200000e+02  3.7000000e+01  1.1000000e+02  4.9000000e+01
  -6.7694499e-03 -3.0882113e-02]]


In [37]:
locations = uploadToBucket(ctrl_train_data, "tsla_model_2_ctrl") # choose control data or train data
linear = train(locations, 1) # 5 features for normal test, 1 for control
#linear = "linear-learner-2018-09-06-00-06-35-174"
exportResults(linear, ctrl_test_data, "tsla_model_2_ctrl_predictions")

INFO:sagemaker:Creating training-job with name: linear-learner-2018-09-12-18-28-54-095


Uploaded training data location: s3://sagemaker-twitter-project/sagemaker/tsla_model_2_ctrl/train/linearlearner
Training artifacts will be uploaded to: s3://sagemaker-twitter-project/sagemaker/tsla_model_2_ctrl/output
.....................
[31mDocker entrypoint called with argument(s): train[0m
[31m[09/12/2018 18:32:13 INFO 140071137314624] Reading default configuration from /opt/amazon/lib/python2.7/site-packages/algorithm/default-input.json: {u'loss_insensitivity': u'0.01', u'epochs': u'15', u'init_bias': u'0.0', u'lr_scheduler_factor': u'auto', u'num_calibration_samples': u'10000000', u'accuracy_top_k': u'3', u'_num_kv_servers': u'auto', u'use_bias': u'true', u'num_point_for_scaler': u'10000', u'_log_level': u'info', u'quantile': u'0.5', u'bias_lr_mult': u'auto', u'lr_scheduler_step': u'auto', u'init_method': u'uniform', u'init_sigma': u'0.01', u'lr_scheduler_minimum_lr': u'auto', u'target_recall': u'0.8', u'num_models': u'auto', u'early_stopping_patience': u'3', u'momentum': u'a




INFO:sagemaker:Creating model with name: linear-learner-2018-09-12-18-33-06-104


Billable seconds: 119


INFO:sagemaker:Creating endpoint with name linear-learner-2018-09-12-18-28-54-095


--------------------------------------------------------------------------------------!

INFO:sagemaker:Deleting endpoint with name: linear-learner-2018-09-12-18-28-54-095


In [4]:
# Upload training data to S3 bucket and return the location to which training artificats will be uploaded
def uploadToBucket(data, name):
    bucket = "sagemaker-twitter-project"
    prefix = "sagemaker/" + name
    labels = data[:, -1]
    train = data[:, :-1]
    
    buf = io.BytesIO()
    smac.write_numpy_to_dense_tensor(buf, train, labels)
    buf.seek(0)

    key = 'linearlearner'
    boto3.resource('s3').Bucket(bucket).Object(os.path.join(prefix, 'train', key)).upload_fileobj(buf)

    s3_train_data = 's3://{}/{}/train/{}'.format(bucket, prefix, key)
    print('Uploaded training data location: {}'.format(s3_train_data))

    output_location = 's3://{}/{}/output'.format(bucket, prefix)
    print('Training artifacts will be uploaded to: {}'.format(output_location))
    
    return [output_location, s3_train_data]

In [5]:
# Specify model image, initialize estimator, and train our model
def train(output_location, num_features):
    sess = sagemaker.Session()
    container = get_image_uri(boto3.Session().region_name, 'linear-learner')
    
    linear = sagemaker.estimator.Estimator(container,
                                           role = role, 
                                           train_instance_count = 1, 
                                           train_instance_type = 'ml.m4.xlarge',
                                           output_path = locations[0],
                                           sagemaker_session = sess)

    linear.set_hyperparameters(feature_dim = num_features, mini_batch_size = 5, predictor_type = 'regressor')

    linear.fit({'train': locations[1]})
    
    return linear

In [6]:
# Compile prediction results
def exportResults(linear, test_data, name):
    # Load the test data and deploy an endpoint
    if isinstance(linear, str):
        estimator = sagemaker.estimator.Estimator.attach(linear)
        predictor = estimator.deploy(initial_instance_count = 1, instance_type='ml.t2.medium')
    else:
        predictor = linear.deploy(initial_instance_count = 1, instance_type='ml.t2.medium')

    predictor.content_type = 'text/csv'
    predictor.serializer = csv_serializer
    predictor.deserializer = json_deserializer

    predictions = []
    for datum in test_data:
        result = predictor.predict(datum[:-1])
        predictions.append([result['predictions'][0]['score'], datum[-1]])
    
    # Delete endpoint
    sagemaker.Session().delete_endpoint(predictor.endpoint)
    
    # Save record of model predictions for offline analysis
    np.save(name + '.npy', predictions)