# Bring Your Own Container

Do this if you need to use GPUs, for example:

* https://xgboost.readthedocs.io/en/latest/gpu/index.html

In [None]:
!pip install -q boto3
!pip install -q xgboost

In [None]:
import boto3
import sagemaker
import pandas as pd

sess   = sagemaker.Session()
bucket = sess.default_bucket()
role = sagemaker.get_execution_role()
region = boto3.Session().region_name

sm = boto3.Session().client(service_name='sagemaker', region_name=region)

## TODO:  Create a custom container with XGBoost GPU installed
See https://xgboost.readthedocs.io/en/latest/gpu/index.html

If you are using TensorFlow, PyTorch, or another SageMaker supported framework, the SageMaker Python SDK will automatically upload the model for you.

However, if you need to manually upload your model to S3, then you should create a Docker image per the steps below._

1. Build a custom container with XGBoost (`without train.py`)
2. `pip install sagemaker-containers` 
3. Push to ECR.
4. Create a tar file with your `train.py`
5. Upload to S3
6. Add the ECR `image_uri` to your `Estimator`
7. Add the following 2 hyperparameters to your `Estimator`:
```
{'sagemaker_program': 'train.py',
'sagemaker_submit_directory': 's3://bucket/prefix/training-scripts/sourcedir.tar.gz'}
```
8. Call `estimator.fit()`

In [None]:
from sagemaker.amazon.amazon_estimator import get_image_uri 

# get the URI for new container
custom_container_uri = get_image_uri(region_name=region,                                
                                     repo_name='xgboost', 
                                     repo_version='0.90-2')

model_output_path = 's3://{}/models/amazon-reviews/xgboost-byoc'.format(bucket)

# TODO:  Figure out `requirements.txt`
xgb_estimator = sagemaker.estimator.Estimator(image_name=builtin_container_uri, 
                                              role=role, 
                                              hyperparameters={'objective':'binary:logistic',
                                                               'num_round': 1,
                                                               'max_depth': 5,
                                                               'sagemaker_program': 'xgboost_reviews.py',
                                                               'sagemaker_submit_directory': 's3://bucket/prefix/training-scripts/sourcedir.tar.gz'},
                                              train_instance_count=1, 
                                              train_instance_type='ml.m4.xlarge', 
                                              output_path=model_output_path, 
                                              sagemaker_session=sess,
                                              enable_cloudwatch_metrics=True)

In [None]:
xgb_estimator.fit({'train': s3_input_train_data,
                   'validation': s3_input_validation_data
                  }
                  #, wait=False
                 )

In [None]:
training_job_name = xgb_estimator.latest_training_job.name
print('training_job_name:  {}'.format(training_job_name))

In [None]:
# download the model artifact from AWS S3
!aws s3 cp $model_output_path/$training_job_name/output/model.tar.gz ./models/byoc/

In [None]:
import tarfile
import pickle as pkl

# TODO:  extract to ./model/built-in/

#opens the downloaded model artifcat and loads it as 'model' variable
tar = tarfile.open('./models/byoc/model.tar.gz')
tar.extractall(path='./models/byoc/')
tar.close()
model = pkl.load(open('xgboost-model', 'rb'))

In [None]:
!ls -al ./models/byoc/

In [None]:
predictions, raw_outputs = bert_model.predict(["""Very funny. A typical mid 50's comedy."""])
print('Predictions: {}'.format(predictions))
print('Raw outputs: {}'.format(raw_outputs))

In [None]:
predictions, raw_outputs = bert_model.predict(["""That movie was absolutely awful."""])
print('Predictions: {}'.format(predictions))
print('Raw outputs: {}'.format(raw_outputs))