### Imports

In [33]:
import boto3
import sagemaker
import pickle

from time import gmtime, strftime

from sagemaker import get_execution_role
from sagemaker.model import Model
from sagemaker.pipeline import PipelineModel
from sagemaker.sklearn.estimator import SKLearn
from sagemaker.amazon.amazon_estimator import get_image_uri

from sklearn.base import BaseEstimator, TransformerMixin
from sklearn.preprocessing import KBinsDiscretizer

#from sklearn.ensemble import RandomForestClassifier, VotingClassifier
#from sklearn.svm import SVC
#from sklearn.linear_model import LogisticRegression

### Environment Setup

In [34]:
BUCKET = 'bf-titanic-model'
SCRIPT_PATH = '/titanic-sagemaker-inference/titanic_featurizer.py'

sagemaker_session = sagemaker.Session()
role = get_execution_role()

### Endpoint Functions

In [None]:
# Preprocess input data
def predict_fn(input_data, model):
    
    features = model.transform(input_data)

    if label_column in input_data:
        # Return the label (as the first column) and the set of features.
        return np.insert(features, 0, input_data[label_column], axis=1)
    else:
        # Return only the set of features
        return features

In [None]:
# Deserialize fitted model
def model_fn(model_dir):
    preprocessor = joblib.load(os.path.join(model_dir, "model.joblib"))
    return preprocessor

### Create Inference Pipeline

In [35]:
# Load the trained model from S3
s3 = boto3.resource('s3')
obj = s3.Object(BUCKET, 'v1/model/trained_model.pickle')
body = obj.get()['Body'].read()
inference_model = pickle.loads(body)

In [None]:
model_name = 'inference-pipeline-' + timestamp_prefix
endpoint_name = 'inference-pipeline-ep-' + timestamp_prefix
prod_model = PipelineModel(
    name=model_name, 
    role=role,
    train_instance_type="ml.c4.xlarge",
    models=inference_model)

sklearn_preprocessor.fit({'train': train_input})

### Batch Transform Training Data

In [None]:
# Define a SKLearn Transformer from the trained SKLearn Estimator
transformer = sklearn_preprocessor.transformer(
    instance_count=1, 
    instance_type='ml.m4.xlarge',
    assemble_with = 'Line',
    accept = 'text/csv')

# Preprocess training input
transformer.transform(train_input, content_type='text/csv')
print('Waiting for transform job: ' + transformer.latest_transform_job.job_name)
transformer.wait()
preprocessed_train = transformer.output_path