# Forecast Model

In this notebook we'll train a deep learning model that learns if the target price or the stop loss would be hit for a long/short trade in the next hour based on the last two hours of price data.

Model:
* Multilayer Perceptron (MLP) (Feedforward neural network)
* 3 layers: input (29), hidden (14), output (2)
* Binary Classification
* `Input`: Time, Open, Low, High, Close, SMA(5 to 60 min), ROC(5 to 60 min)
* `Output`: Does a long or short trade hit the profit target (20 points) without hitting a stop loss (10 points) in the next hour? No=0,Yes=1 

In [None]:
model_name="long_short_predict"

with open("model/model_name", "w") as text_file:
    text_file.write(model_name)
model_name

# Step 1) Get Data

In [None]:
import pandas as pd

df = pd.read_csv("../0_data/INTC.csv",infer_datetime_format=True, parse_dates=['dt'], index_col=['dt'])
df.to_csv('local_test/test_dir/input/data/training/data_orig.csv')
print("count=%s" % len(df))
df.head()

In [None]:
%matplotlib notebook
df["close"].plot()

# Step 2) Run Data Preparation Locally

## Modify Data Preparation Code

In the following cell, you can modify the data preparation code or leave it as is.

In [None]:
%%writefile model/data_prep_long_short_predict.py
#!/usr/bin/env python

import numpy as np
import pandas as pd
import talib as ta
from talib.abstract import *
import math

prefix = '/opt/ml/'
input_path = prefix + 'input/data/training'

data_orig_file = input_path+'/data_orig.csv'
data_file = input_path+'/data.csv'

d = pd.read_csv(data_orig_file,infer_datetime_format=True, parse_dates=['dt'], index_col=['dt'])
print(d.head())

repeatCount=6
repeatStep=5
lookBack=repeatCount*repeatStep
forwardWindow=5

profitTarget=0.02
stopTarget=0.01

iCount=lookBack

# header
hData=["dt"]
hData.append("close")
for a in range(0,repeatCount):
    hData.append("sma"+str((a+1)*repeatStep))
for a in range(0,repeatCount):
    hData.append("roc"+str((a+1)*repeatStep))
hData.append("long")
hData.append("short")

# data
tData=[]

inputs = {
    'close': np.array(d["close"])
}
sma=[]
for a in range(0,repeatCount):
    sma.append(SMA(inputs,timeperiod=(a+1)*repeatStep))
roc=[]
for a in range(0,repeatCount):
    roc.append(ROC(inputs,timeperiod=(a+1)*repeatStep))

closeList=d["close"]
dLen=len(d)
n=0
lCount=0
sCount=0
nCount=0

for idx,row in d.iterrows():
    if n>=iCount-1:
        dt1=idx
        cl=row["close"]
        inputRec=[]
        inputRec.append(idx)

        inputRec0=[]

        #close
        inputRec0.append(cl)

        #sma
        for a in range(0,repeatCount):
            if math.isnan(sma[a][n]):
                inputRec0.append(cl)
            else:
                inputRec0.append(sma[a][n])

        m1=min(inputRec0)
        m2=max(inputRec0)

        for a in inputRec0:
            if m2-m1==0:
                inputRec.append(0)
            else:
                inputRec.append((a-m1)/(m2-m1))

        #roc
        for a in range(0,repeatCount):
            if math.isnan(roc[a][n]):
                inputRec.append(0)
            else:
                inputRec.append(roc[a][n])

        rClose=closeList[n:min(dLen,n+forwardWindow)]
        maxClose=max(rClose)
        minClose=min(rClose)

        #long/short
        long=0
        if maxClose-cl>=cl*(1+profitTarget) and not cl-minClose>=cl*(1+stopTarget):
            long=1
            lCount=lCount+1
        inputRec.append(long)
        short=0
        if cl-minClose>=cl*(1+profitTarget) and not maxClose-cl>=cl*(1+stopTarget):
            short=1
            sCount=sCount+1
        inputRec.append(short)
        if long==0 and short==0:
            nCount=nCount+1

        tData.append(inputRec)
          
print("lCount=%s,sCount=%s,nCount=%s" % (lCount,sCount,nCount))
df1=pd.DataFrame(tData,columns=hData)
df1.set_index(pd.DatetimeIndex(df1['dt']), inplace=True)
del df1['dt']
 
df1.to_csv(data_file)
print(df1.head(5))
print("count=%s" % (len(df1)))

## Run Data Preparation Locally in a Docker Container

In [None]:
!cp model/data_prep_$(cat model/model_name).py model/train
!chmod 777 model/train
!docker build -t data_prep_$(cat model/model_name) .
!docker run -v $(pwd)/local_test/test_dir:/opt/ml --rm data_prep_$(cat model/model_name) train

## Create Training and Test Data

In [None]:
df = pd.read_csv("local_test/test_dir/input/data/training/data.csv",infer_datetime_format=True, parse_dates=['dt'], index_col=['dt'])
print("totalCount=%s" % len(df))

trainCount=int(len(df)*0.3)
dfTrain = df.iloc[:trainCount]
dfTrain.to_csv('local_test/test_dir/input/data/training/data_train.csv')
print("trainCount=%s" % len(dfTrain))

dfTest = df.iloc[trainCount:]
dfTest.to_csv('local_test/test_dir/input/data/training/data_test.csv')
print("testCount=%s" % len(dfTest))
dfTest.head()

# Step 3) Train the Model

In the following cell, you can modify the model training code or leave it as is.

In [None]:
%%writefile model/model_long_short_predict.py
#!/usr/bin/env python
from __future__ import print_function

import os
import sys
import traceback
import math
import numpy as np
import pandas as pd
import tensorflow as tf

from keras.layers import Dropout, Dense
from keras.wrappers.scikit_learn import KerasClassifier
from keras.models import Sequential
from keras.wrappers.scikit_learn import KerasRegressor

yLen=2
b=0

# Optional
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'

# These are the paths to where SageMaker mounts interesting things in your
# container.
prefix = '/opt/ml/'

input_path = prefix + 'input/data/training/data_train.csv'
test_path = prefix + 'input/data/training/data_test.csv'

output_path = os.path.join(prefix, 'output')
model_path = os.path.join(prefix, 'model')

# Process and prepare the data
def data_process(df):
    global yLen
    global b
    dataX=[]
    dataY=[]
    for idx,row in df.iterrows():
        row1=[]
        r=row[1:len(row)-yLen]
        for a in r:
            row1.append(a)
        x=np.array(row1)
        y=np.array(row[len(row)-yLen:])
        b=len(x)
        dataX.append(x)
        dataY.append(y)
    dataX=np.array(dataX)
    dataY=np.array(dataY)
    return dataX,dataY,b

def build_classifier():
    global b
    global yLen
    print("build_classifier:b=%s,yLen=%s" % (b,yLen))
    model = Sequential()
    model.add(Dense(b, input_dim=b, kernel_initializer='normal', activation='relu'))
    model.add(Dropout(0.1))
    model.add(Dense(int(b/2), kernel_initializer='normal', activation='relu'))
    model.add(Dropout(0.1))
    model.add(Dense(yLen,kernel_initializer='normal', activation='sigmoid'))
    model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
    return model

def generate_model(dataX, dataY, b):
    model=build_classifier()
    model.fit(dataX, dataY, epochs=100, batch_size=15)
    scores = model.evaluate(dataX, dataY, verbose=0)
    print("Training Data %s: %.2f%%" % (model.metrics_names[1], scores[1]*100))
    return model
        
def train():
    print('Starting the training.')
    try:
        raw_data = pd.read_csv(input_path)
        #print(raw_data)
        X, y, b = data_process(raw_data)
        model = generate_model(X, y, b)
        model.save(os.path.join(model_path, 'model.h5'))
        
        print('Training is complete. Model saved.')
        
        raw_data = pd.read_csv(test_path)
        testX, testY, b = data_process(raw_data)
        scores = model.evaluate(testX, testY, verbose=0)
        print("Test Data %s: %.2f%%" % (model.metrics_names[1], scores[1]*100))
        
    except Exception as e:
        # Write out an error file. This will be returned as the failure
        # Reason in the DescribeTrainingJob result.
        trc = traceback.format_exc()
        with open(os.path.join(output_path, 'failure'), 'w') as s:
            s.write('Exception during training: ' + str(e) + '\n' + trc)
        # Printing this causes the exception to be in the training job logs
        print(
            'Exception during training: ' + str(e) + '\n' + trc,
            file=sys.stderr)
        # A non-zero exit code causes the training job to be marked as Failed.
        sys.exit(255)

if __name__ == '__main__':
    train()

    # A zero exit code causes the job to be marked a Succeeded.
    sys.exit(0)

### Option 1: Train Locally

You can choose if you want to do the training locally (Option 1) or remote via SageMaker (Option 2).

In [None]:
# Build Local ML Image
!echo $(cat model/model_name) 
!cp model/model_$(cat model/model_name).py model/train
!chmod 777 model/train
!docker build -t model_$(cat model/model_name) .
!docker run -v $(pwd)/local_test/test_dir:/opt/ml --rm model_$(cat model/model_name) train

#### Copy Model Artifact to Strategies Folder

In [None]:
!cp local_test/test_dir/model/model.h5 ../strategies/model/model_$(cat model/model_name).h5
!ls -la ../strategies/model/model_*.h5

### Option 2: Remote Training via SageMaker

You can choose if you want to do the training locally (Option 1) or remote via SageMaker (Option 2).

In [None]:
# Deploy ML Image to ECS
!./build_and_push.sh

In [None]:
import os
import sagemaker as sage
from sagemaker import get_execution_role
import datetime
from sagemaker.tensorflow import TensorFlow

model_name=''
with open('model/model_name', 'r') as file:
    model_name = file.read().replace('\n', '')

role = get_execution_role()
sess = sage.Session()

WORK_DIRECTORY = 'local_test/test_dir/input/data/training'
prefix = 'model_'+model_name
job_name=prefix.replace('_','-')

data_location = sess.upload_data(WORK_DIRECTORY, key_prefix='data')
print(data_location)

account = sess.boto_session.client('sts').get_caller_identity()['Account']
region = sess.boto_session.region_name
image = f'{account}.dkr.ecr.{region}.amazonaws.com/{prefix}:latest'

classifier = sage.estimator.Estimator(
    image_name=image,
    role=role,
    train_instance_count=1,
    train_instance_type='ml.m4.xlarge',
    output_path="s3://{}/output".format(sess.default_bucket()),
    sagemaker_session=sess,
    base_job_name=job_name)
classifier.fit(data_location)

#### Download Model Artifact from Amazon S3 and copy it to Strategies Folder

In [None]:
#Get Model from S3
model_name=classifier.model_data.replace('s3://'+sess.default_bucket()+'/','')
import boto3
s3 = boto3.resource('s3')
my_bucket = s3.Bucket(sess.default_bucket())
my_bucket.download_file(model_name,'model.tar.gz')
!tar -xzf model.tar.gz
!rm model.tar.gz
!cp model.h5 ../strategies/model/model_$(cat model_name).h5
!ls -la ../strategies/model/model_*.h5