# Model GaussianNB

In [None]:
import numpy as np
from sklearn.metrics import accuracy_score
import pandas as pd
from sagemaker.session import Session
from sagemaker.sklearn.estimator import SKLearn
from sagemaker.sklearn.model import SKLearnModel
from sagemaker.session import get_execution_role
from sagemaker.tuner import HyperparameterTuner, CategoricalParameter

In [None]:
# default sagemaker parameters
role = get_execution_role()
sagemaker_session = Session()
default_bucket = sagemaker_session.default_bucket()
print('Current SageMaker session: {}'.format(sagemaker_session))
print('Current SageMaker default bucket: {}'.format(default_bucket))

In [None]:
# upload training data to s3 as csv without header and index
data_path = ['train-tf-500-1', 'test-tf-500-1']
input_data = []
for specific_path in data_path:
    tmp_path = sagemaker_session.upload_data(specific_path, bucket=default_bucket, key_prefix=specific_path)
    input_data.append(tmp_path)

print(input_data)

In [None]:
# create scikit-learn estimator
estimator = SKLearn(
    role=role,
    instance_count=1,
    instance_type='ml.c4.xlarge',
    entry_point='train-gaussian-nb.py', 
    source_dir='source', 
    framework_version='0.23-1', 
    py_version='py3', 
)

In [None]:
# fit model
estimator.fit({'train': input_data[0], 'test': input_data[1]})

In [None]:
# create scikit-learn model from training artifacts
model = SKLearnModel(
    model_data=estimator.model_data,
    role=role,
    entry_point='train.py', 
    source_dir='source', 
    framework_version='0.23-1',
    py_version='py3'
)

In [None]:
# deploy endpoint
predictor = model.deploy(
    initial_instance_count=1, 
    instance_type='ml.t2.medium'
)

In [None]:
# make predictions based on test data
y_pred = predictor.predict(X_test)

In [None]:
# get metrics
accuracy = accuracy_score(y_test, y_pred)
print('accuracy: {} %'.format(round(accuracy * 100, 1)))