In [1]:
import boto3
import math
import dateutil
import json

In [3]:
# Establish a session with AWS
# Specify credentials and region to be used for this session.
# We will use a ml_user_predict credentials that has limited privileges
boto_session = boto3.Session(profile_name='estellaliu_ml_user',region_name='us-east-1')

In [4]:
# Acquire a SageMaker Runtime client for us-east-1 region
client = boto_session.client(service_name='sagemaker-runtime',region_name='us-east-1')

In [5]:
# Specify Your Endpoint Name
endpoint_name = 'xgboost-v1'

In [6]:
# Raw Data
#"Location","Instance Type","Operating System","day","month","year"

sample_one = [0,2,3,9,12,2020]

sample_two = [1,3,2,3,4,2021]

sample_three = [2,1,1,2,3,2021]

In [7]:
# Single Observation
request = {
    "instances": [
        # First instance.
        {
            "features": sample_one
        }
    ]
}

In [8]:
print(json.dumps(request,indent=2))

{
  "instances": [
    {
      "features": [
        0,
        2,
        3,
        9,
        12,
        2020
      ]
    }
  ]
}


In [9]:
# Multiple Observations
request = {
    "instances": [
        # First instance.
        {
            "features": sample_one
        },
        # Second instance.
        {
            "features": sample_two
        },
        # Third instance.
        {
            "features": sample_three
        }
    ]
}

In [10]:
print(json.dumps(request,indent=2))

{
  "instances": [
    {
      "features": [
        0,
        2,
        3,
        9,
        12,
        2020
      ]
    },
    {
      "features": [
        1,
        3,
        2,
        3,
        4,
        2021
      ]
    },
    {
      "features": [
        2,
        1,
        1,
        2,
        3,
        2021
      ]
    }
  ]
}


In [31]:
def transform_data(data):
    features = data.copy()
    
    # Return the transformed data. skip datetime field
    return ','.join([str(feature) for feature in features[:]])

In [32]:
print('Raw Data:\n',sample_one)
print('Transformed Data:\n',transform_data(sample_one))

Raw Data:
 [0, 2, 3, 9, 12, 2020]
Transformed Data:
 0,2,3,9,12,2020


In [33]:
# Single with error
request = {
    "instances": [
        # First instance.
        {
            "features": ["hi there",0,2]
        }
    ]
}

In [34]:
try:
    transformed_data = [transform_data(instance['features']) for instance in request["instances"]]
except Exception as err:
    print('Error when transforming: {0}'.format(err))

In [35]:
# Single Observation
request = {
    "instances": [
        # First instance.
        {
            "features": sample_one
        }
    ]
}

In [36]:
# Let's invoke prediction now
result = client.invoke_endpoint(EndpointName=endpoint_name, 
                       Body=transform_data(request['instances'][0]['features']).encode('utf-8'),
                       ContentType='text/csv')

In [37]:
result = result['Body'].read().decode('utf-8')

In [38]:
# Model was trained with log1p(count)
# So, we need to apply inverse transformation to get the actual count
# Predicted Count looks much better now
print ('Predicted Price Per Unit', math.expm1(float(result)))

Predicted Price Per Unit 13.09348808101376


In [39]:
# Multiple Observations
request = {
    "instances": [
        # First instance.
        {
            "features": sample_one
        },
        # Second instance.
        {
            "features": sample_two
        },
        # Third instance.
        {
            "features": sample_three
        }
    ]
}

In [40]:
for instance in request["instances"]:
    print(instance)
    print('Transformed:')
    print(' ', transform_data(instance['features']))

{'features': [0, 2, 3, 9, 12, 2020]}
Transformed:
  0,2,3,9,12,2020
{'features': [1, 3, 2, 3, 4, 2021]}
Transformed:
  1,3,2,3,4,2021
{'features': [2, 1, 1, 2, 3, 2021]}
Transformed:
  2,1,1,2,3,2021


In [41]:
# XGBoost accepts data in CSV. It does not support JSON.
# So, we need to submit the request in CSV format
# Prediction for multiple observations in the same call
result = client.invoke_endpoint(EndpointName=endpoint_name, 
                       Body=('\n'.join(
                           [transform_data(instance['features']) 
                                for instance in request["instances"]]).encode('utf-8')),
                       ContentType='text/csv')

In [42]:
result = result['Body'].read().decode('utf-8')

In [43]:
result = result.split(',')
predictions = [math.expm1(float(r)) for r in result]

In [44]:
predictions

[13.09348808101376, 41.016599480572765, 3.9199462258842046]