In [None]:
import pandas as pd
import numpy as np
import math
import random
%matplotlib inline
random.seed(0)
# 乱数の係数
random_factor = 0.05
# サイクルあたりのステップ数
steps_per_cycle = 80
# 生成するサイクル数
number_of_cycles = 50

df = pd.DataFrame(np.arange(steps_per_cycle * number_of_cycles + 1), columns=["t"])
df["sin_t"] = df.t.apply(lambda x: math.sin(x * (2 * math.pi / steps_per_cycle)+ random.uniform(-1.0, +1.0) * random_factor))
df[["sin_t"]].head(steps_per_cycle * 2).plot()

In [None]:
def _load_data(data, n_prev = 100):  
    """
    data should be pd.DataFrame()
    """

    docX, docY = [], []
    for i in range(len(data)-n_prev):
        docX.append(data.iloc[i:i+n_prev].as_matrix())
        docY.append(data.iloc[i+n_prev].as_matrix())
    alsX = np.array(docX)
    alsY = np.array(docY)

    return alsX, alsY

def train_test_split(df, test_size=0.1, n_prev = 100):  
    """
    This just splits data to training and testing parts
    """
    ntrn = round(len(df) * (1 - test_size))
    ntrn = int(ntrn)
    X_train, y_train = _load_data(df.iloc[0:ntrn], n_prev)
    X_test, y_test = _load_data(df.iloc[ntrn:], n_prev)

    return (X_train, y_train), (X_test, y_test)

In [None]:
length_of_sequences = 100

(X_train, y_train), (X_test, y_test) = train_test_split(df[["sin_t"]], n_prev =length_of_sequences)  

In [None]:
import os

os.makedirs("./data", exist_ok = True)

np.save('./data/X_train.npy', X_train)
np.save('./data/y_train.npy', y_train)

In [None]:
import sagemaker
sagemaker_session = sagemaker.Session()
bucket_name = sagemaker_session.default_bucket()
input_data = sagemaker_session.upload_data(path='./data', bucket=bucket_name, key_prefix='dataset/keras-rnn-sin')
print('Training data is uploaded to: {}'.format(input_data))

In [None]:
from sagemaker.tensorflow import TensorFlow
from sagemaker import get_execution_role

role = get_execution_role()

train_instance_type = 'local'

estimator = TensorFlow(
    entry_point = "./rnn-sin-sagemaker.py",
    role=role,
    train_instance_count=1,
    train_instance_type=train_instance_type,
    framework_version="1.12.0",
    py_version='py3',
    script_mode=True,
    hyperparameters={'batch-size': 600,
                     'validation_split': 0.05,
                     'epochs': 15})

estimator.fit(input_data)

In [None]:
instance_type = 'local'

predictor = estimator.deploy(instance_type=instance_type, initial_instance_count=1)

In [None]:
prediction = predictor.predict(X_test)

In [None]:
dataf =  pd.DataFrame(prediction['predictions'][:200])
dataf.columns = ["predict"]
dataf["input"] = y_test[:200]
dataf.plot(figsize=(15, 5))

In [None]:
from sagemaker.tensorflow import TensorFlow
from sagemaker import get_execution_role

role = get_execution_role()

train_instance_type = 'ml.m5.large'

estimator = TensorFlow(
    entry_point = "./rnn-sin-sagemaker.py",
    role=role,
    train_instance_count=1,
    train_instance_type=train_instance_type,
    framework_version="1.12.0",
    py_version='py3',
    script_mode=True,
    hyperparameters={'batch-size': 600,
                     'validation_split': 0.05,
                     'epochs': 15})

estimator.fit(input_data)

In [None]:
instance_type = 'ml.t2.large'

predictor = estimator.deploy(instance_type=instance_type, initial_instance_count=1)

In [None]:
prediction = predictor.predict(X_test)

In [None]:
dataf =  pd.DataFrame(prediction['predictions'][:200])
dataf.columns = ["predict"]
dataf["input"] = y_test[:200]
dataf.plot(figsize=(15, 5))

In [None]:
sagemaker.Session().delete_endpoint(predictor.endpoint)