---
# Running scikit learn in SageMaker - Tutorial
https://github.com/awslabs/amazon-sagemaker-examples/blob/master/sagemaker-python-sdk/scikit_learn_iris/Scikit-learn%20Estimator%20Example%20With%20Batch%20Transform.ipynb

In [23]:
prefix = 'Scikit-iris'

import sagemaker
from sagemaker import get_execution_role

sagemaker_session = sagemaker.Session()

# Get a SageMaker-compatible role used by this Notebook Instance.
role = get_execution_role()


In [24]:
import os
import numpy as np
from sklearn import datasets

# Load Iris dataset, then join labels and features
iris = datasets.load_iris()
joined_iris = np.insert(iris.data, 0, iris.target, axis=1)

# Create directory and write csv
os.makedirs('./data', exist_ok=True)
np.savetxt('./data/iris.csv', joined_iris, delimiter=',', fmt='%1.1f, %1.3f, %1.3f, %1.3f, %1.3f')


In [25]:
WORK_DIRECTORY = 'data'

train_input = sagemaker_session.upload_data(WORK_DIRECTORY, key_prefix="{}/{}".format(prefix, WORK_DIRECTORY) )


In [26]:
from sagemaker.sklearn.estimator import SKLearn

script_path = 'scikit_learn_iris.py'

sklearn = SKLearn(
    entry_point=script_path,
    train_instance_type="ml.c4.xlarge",
    role=role,
    sagemaker_session=sagemaker_session,
    hyperparameters={'max_leaf_nodes': 30})


This is not the latest supported version. If you would like to use version 0.23-1, please add framework_version=0.23-1 to your constructor.


In [29]:
sklearn.fit({'train': train_input})


's3_input' class will be renamed to 'TrainingInput' in SageMaker Python SDK v2.


2020-06-23 20:29:05 Starting - Starting the training job...
2020-06-23 20:29:07 Starting - Launching requested ML instances.........
2020-06-23 20:30:57 Starting - Preparing the instances for training......
2020-06-23 20:31:41 Downloading - Downloading input data...
2020-06-23 20:32:27 Training - Downloading the training image...
2020-06-23 20:32:47 Training - Training image download completed. Training in progress.[34m2020-06-23 20:32:48,431 sagemaker-containers INFO     Imported framework sagemaker_sklearn_container.training[0m
[34m2020-06-23 20:32:48,434 sagemaker-containers INFO     No GPUs detected (normal if no gpus installed)[0m
[34m2020-06-23 20:32:48,444 sagemaker_sklearn_container.training INFO     Invoking user training script.[0m
[34m2020-06-23 20:33:02,765 sagemaker-containers INFO     Module scikit_learn_iris does not provide a setup.py. [0m
[34mGenerating setup.py[0m
[34m2020-06-23 20:33:02,765 sagemaker-containers INFO     Generating setup.cfg[0m
[34m2020-0

UnexpectedStatusException: Error for Training job sagemaker-scikit-learn-2020-06-23-20-29-04-707: Failed. Reason: AlgorithmError: ExecuteUserScriptError:
Command "/miniconda3/bin/python -m scikit_learn_iris --max_leaf_nodes 30"

---
# Sandbox

In [13]:
!pwd

/root/CD4ML-AWS-Serverless


In [7]:
os.environ

environ{'PATH': '/opt/conda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:/opt/amazon/bin:/opt/amazon/bin:/tmp/miniconda3/condabin:/tmp/anaconda3/condabin:/tmp/miniconda2/condabin:/tmp/anaconda2/condabin',
        'AWS_DEFAULT_REGION': 'us-east-1',
        'AWS_REGION': 'us-east-1',
        'KERNEL_LAUNCH_TIMEOUT': '40',
        'KERNEL_WORKING_PATH': 'CD4ML-AWS-Serverless',
        'KERNEL_GATEWAY': '1',
        'JPY_PARENT_PID': '7',
        'LC_CTYPE': 'C.UTF-8',
        'TERM': 'xterm-color',
        'CLICOLOR': '1',
        'PAGER': 'cat',
        'GIT_PAGER': 'cat',
        'MPLBACKEND': 'module://ipykernel.pylab.backend_inline',
        'KMP_DUPLICATE_LIB_OK': 'True',
        'KMP_INIT_AT_FORK': 'FALSE',
        'SM_OUTPUT_DATA_DIR': '/data',
        'SM_MODEL_DIR': '/data/model',
        'SM_CHANNEL_TRAIN': '/data/train'}

In [4]:
os.environ['SM_OUTPUT_DATA_DIR'] = '/data'

In [5]:
os.environ

environ{'PATH': '/opt/conda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:/opt/amazon/bin:/opt/amazon/bin:/tmp/miniconda3/condabin:/tmp/anaconda3/condabin:/tmp/miniconda2/condabin:/tmp/anaconda2/condabin',
        'AWS_DEFAULT_REGION': 'us-east-1',
        'AWS_REGION': 'us-east-1',
        'KERNEL_LAUNCH_TIMEOUT': '40',
        'KERNEL_WORKING_PATH': 'CD4ML-AWS-Serverless',
        'KERNEL_GATEWAY': '1',
        'JPY_PARENT_PID': '7',
        'LC_CTYPE': 'C.UTF-8',
        'TERM': 'xterm-color',
        'CLICOLOR': '1',
        'PAGER': 'cat',
        'GIT_PAGER': 'cat',
        'MPLBACKEND': 'module://ipykernel.pylab.backend_inline',
        'KMP_DUPLICATE_LIB_OK': 'True',
        'KMP_INIT_AT_FORK': 'FALSE',
        'SM_OUTPUT_DATA_DIR': '/data'}

In [19]:
!pwd

/root/CD4ML-AWS-Serverless
