In [3]:
import requests
import sagemaker
import boto3
import s3fs
import json
import io

import numpy as np

from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score

from sagemaker.estimator import Estimator
from sagemaker.predictor import Predictor
from sagemaker.serializers import NumpySerializer
from sagemaker.deserializers import NumpyDeserializer
from sagemaker.local import LocalSession

from matplotlib import pyplot as plt
import matplotlib as mpl
import seaborn as sns

In [None]:
%matplotlib inline
sns.set()

seed = 42
rand = np.random.RandomState(seed)

local_mode = False # activate to use local mode

with open("config.json") as f:
    configs = json.load(f)
    
default_bucket = configs["default_bucket"] #bucket name here
role = configs["role_arn"] # sagemaker role arn here

boto_session = boto3.Session()
   
if local_mode:
    sagemaker_session = LocalSession(boto_session = boto_session)
    sagemaker_session._default_bucket = default_bucket
else:
    sagemaker_session = sagemaker.Session(
        boto_session = boto_session,
        default_bucket = default_bucket
    )

ecr_image = configs["image_arn"] #image uri from ECR here

prefix = "modeling/sagemaker"

data_name = f"anomaly"
test_name = "adb-poc"   

In [None]:
def get_s3fs():
    return s3fs.S3FileSystem(key = boto_session.get_credentials().access_key,
                             secret = boto_session.get_credentials().secret_key,
                             token = boto_session.get_credentials().token)

def plot_and_clear():
    plt.show()
    plt.clf()
    plt.cla()
    plt.close()

In [None]:

r = requests.get(url)

y, x = np.loadtxt(
    io.StringIO(r.text[r.text.index("Data:   y          x"):]), 
    skiprows=1, unpack=True
)

x = x.reshape(-1, 1)

X_train, X_test, y_train, y_test = train_test_split(
    x, y, test_size = 0.25, 
    random_state = rand
)

In [None]:
file_fn = f"{default_bucket}/{prefix}/{data_name}/train/data.csv"
file_path = f"s3://{file_fn}"

s3 = get_s3fs()
with s3.open(file_fn, 'wb') as f:
    np.savetxt(f, np.c_[X_train, y_train], delimiter = ',')

In [None]:
hyperparameters = {
    "train-file": "data.csv",
    "df": "20"
}

data_channels = {
    "train": file_path
}

estimator = Estimator(
    role = role,
    sagemaker_session = sagemaker_session,
    instance_count = 1,
    instance_type = "local" if local_mode else "ml.m5.large",
    image_uri = ecr_image,
    base_job_name = f'{data_name}-{test_name}',
    hyperparameters = hyperparameters,
    output_path = f"s3://{default_bucket}/{prefix}/{data_name}/model"
)

estimator.fit(data_channels, wait = True, logs = "None")
job_name = estimator.latest_training_job.name
print(job_name)