# Reinforcement Learning Model

In this notebook we'll train a reinforcement learning model.

In [None]:
model_name="rl"

with open("model/model_name", "w") as text_file:
    text_file.write(model_name)
model_name

# Step 1) Get Data

In [None]:
import pandas as pd

df = pd.read_csv("../0_data/INTC.csv",infer_datetime_format=True, parse_dates=['dt'], index_col=['dt'])
df.to_csv('local_test/test_dir/input/data/training/data_orig.csv')
print("count=%s" % len(df))
df.head()

In [None]:
%matplotlib notebook
df["close"].plot()

# Step 2) Run Data Preparation Locally

## Modify Data Preparation Code

In the following cell, you can modify the data preparation code or leave it as is.

In [None]:
%%writefile model/data_prep_rl.py
#!/usr/bin/env python

import pandas as pd
from tensortrade.exchanges.simulated import SimulatedExchange
from tensortrade.features import FeaturePipeline
from tensortrade.features.scalers import MinMaxNormalizer
from tensortrade.features.stationarity import FractionalDifference

prefix = '/opt/ml/'
input_path = prefix + 'input/data/training'

data_orig_file = input_path+'/data_orig.csv'
data_file = input_path+'/data.csv'

df = pd.read_csv(data_orig_file,infer_datetime_format=True, parse_dates=['dt'], index_col=['dt'])
print(df.head())

exchange = SimulatedExchange(data_frame=df, base_instrument='USD', pretransform=True)

normalize_price = MinMaxNormalizer(["open", "high", "low", "close"])
difference_all = FractionalDifference(difference_order=0.6)
feature_pipeline = FeaturePipeline(steps=[normalize_price, difference_all])
exchange.feature_pipeline = feature_pipeline

df1=feature_pipeline.transform(exchange.data_frame)
#df1=exchange.data_frame
     
df1.to_csv(data_file)
print(df1.head()) 
print("count=%s" % (len(df1)))

## Run Data Preparation Locally in a Docker Container

In [None]:
!cp model/data_prep_$(cat model/model_name).py model/train
!chmod 777 model/train
!docker build -t data_prep_$(cat model/model_name) -f Dockerfile-RL .
!docker run -v $(pwd)/local_test/test_dir:/opt/ml --rm data_prep_$(cat model/model_name) train

## Create Training and Test Data

In [None]:
df = pd.read_csv("local_test/test_dir/input/data/training/data.csv",infer_datetime_format=True, parse_dates=['dt'], index_col=['dt'])
print("totalCount=%s" % len(df))

trainCount=int(len(df)*0.3)
dfTrain = df.iloc[:trainCount]
dfTrain.to_csv('local_test/test_dir/input/data/training/data_train.csv')
print("trainCount=%s" % len(dfTrain))

dfTest = df.iloc[trainCount:]
dfTest.to_csv('local_test/test_dir/input/data/training/data_test.csv')
print("testCount=%s" % len(dfTest))
dfTest.head()

# Step 3) Train the Model

In the following cell, you can modify the model training code or leave it as is.

In [None]:
%%writefile model/model_rl.py
#!/usr/bin/env python

import os
import sys
import traceback

import tensorflow as tf
import pandas as pd

from stable_baselines.common.policies import MlpLnLstmPolicy
from stable_baselines import PPO2

from tensortrade.strategies import StableBaselinesTradingStrategy
from tensortrade.environments import TradingEnvironment
from tensortrade.rewards import RiskAdjustedReturns
from tensortrade.actions import ManagedRiskOrders
from tensortrade.instruments import Quantity, TradingPair, EUR, USD, BTC
from tensortrade.wallets import Wallet, Portfolio
from tensortrade.exchanges.simulated import SimulatedExchange
from tensortrade.features.stationarity import LogDifference
from tensortrade.features.scalers import MinMaxNormalizer
from tensortrade.features import FeaturePipeline
        
def train():
    print('Starting the training.')
    try:
        # These are the paths to where SageMaker mounts interesting things in your
        # container.
        prefix = '/opt/ml/'

        input_path = prefix + 'input/data/training/data_orig.csv' #train.csv'
        test_path = prefix + 'input/data/training/data_test.csv'

        output_path = os.path.join(prefix, 'output')
        model_path = os.path.join(prefix, 'model')

        df = pd.read_csv(input_path,infer_datetime_format=True, parse_dates=['dt'], index_col=['dt'])
        print(df)

        WINDOW_SIZE = 1
        PRICE_COLUMN = 'close'

        normalize = MinMaxNormalizer(inplace=True)
        difference = LogDifference(inplace=True)
        feature_pipeline = FeaturePipeline(steps=[normalize])

        action_scheme = ManagedRiskOrders(pairs=[USD/BTC])
        reward_scheme = RiskAdjustedReturns(return_algorithm="sortino")

        exchange = SimulatedExchange(data_frame=df, price_column=PRICE_COLUMN, randomize_time_slices=True)
        wallets = [(exchange, USD, 100000.0), (exchange, BTC, 0)]

        portfolio = Portfolio(base_instrument=USD,wallets=wallets)

        environment = TradingEnvironment(exchange=exchange,
                                         portfolio=portfolio,
                                         action_scheme=action_scheme,
                                         reward_scheme=reward_scheme,
                                         feature_pipeline=feature_pipeline,
                                         window_size=WINDOW_SIZE,
                                         observe_wallets=[USD, BTC])

        print('Observation Data:')
        print(environment.observation_columns)

        model = PPO2
        policy = MlpLnLstmPolicy
        params = { "learning_rate": 1e-5, 'nminibatches': 1 }

        strategy = StableBaselinesTradingStrategy(environment=environment,
                                                  model=model,
                                                  policy=policy,
                                                  model_kwargs=params)

        perf=strategy.run(steps=10000)
        print("perf=%s" % (perf))
        
        #save model
        strategy.save_agent(path=os.path.join(model_path, 'model.h5'))       
        print('Training is complete. Model saved.')
                
    except Exception as e:
        # Write out an error file. This will be returned as the failure
        # Reason in the DescribeTrainingJob result.
        trc = traceback.format_exc()
        with open(os.path.join(output_path, 'failure'), 'w') as s:
            s.write('Exception during training: ' + str(e) + '\n' + trc)
        # Printing this causes the exception to be in the training job logs
        print(
            'Exception during training: ' + str(e) + '\n' + trc,
            file=sys.stderr)
        # A non-zero exit code causes the training job to be marked as Failed.
        sys.exit(255)

if __name__ == '__main__':
    train()

    # A zero exit code causes the job to be marked a Succeeded.
    sys.exit(0)

### Option 1: Train Locally

You can choose if you want to do the training locally (Option 1) or remote via SageMaker (Option 2).

In [None]:
# Build Local ML Image
!echo $(cat model/model_name) 
!cp model/model_$(cat model/model_name).py model/train
!chmod 777 model/train
!docker build -t model_$(cat model/model_name) -f Dockerfile-RL .
!docker run -v $(pwd)/local_test/test_dir:/opt/ml --rm model_$(cat model/model_name) train

#### Copy Model Artifact to Strategies Folder

In [None]:
!cp local_test/test_dir/model/model.h5 ../strategies/model/model_$(cat model/model_name).h5
!ls -la ../strategies/model/model_*.h5

### Option 2: Remote Training via SageMaker

You can choose if you want to do the training locally (Option 1) or remote via SageMaker (Option 2).

In [None]:
# Deploy ML Image to ECS
!./build_and_push.sh

In [None]:
import os
import sagemaker as sage
from sagemaker import get_execution_role
import datetime
from sagemaker.tensorflow import TensorFlow

model_name=''
with open('model/model_name', 'r') as file:
    model_name = file.read().replace('\n', '')

role = get_execution_role()
sess = sage.Session()

WORK_DIRECTORY = 'local_test/test_dir/input/data/training'
prefix = 'model_'+model_name
job_name=prefix.replace('_','-')

data_location = sess.upload_data(WORK_DIRECTORY, key_prefix='data')
print(data_location)

account = sess.boto_session.client('sts').get_caller_identity()['Account']
region = sess.boto_session.region_name
image = f'{account}.dkr.ecr.{region}.amazonaws.com/{prefix}:latest'

classifier = sage.estimator.Estimator(
    image_name=image,
    role=role,
    train_instance_count=1,
    train_instance_type='ml.m4.xlarge',
    output_path="s3://{}/output".format(sess.default_bucket()),
    sagemaker_session=sess,
    base_job_name=job_name)
classifier.fit(data_location)

#### Download Model Artifact from Amazon S3 and copy it to Strategies Folder

In [None]:
#Get Model from S3
model_name=classifier.model_data.replace('s3://'+sess.default_bucket()+'/','')
import boto3
s3 = boto3.resource('s3')
my_bucket = s3.Bucket(sess.default_bucket())
my_bucket.download_file(model_name,'model.tar.gz')
!tar -xzf model.tar.gz
!rm model.tar.gz
!cp model.h5 ../strategies/model/model_$(cat model_name).h5
!ls -la ../strategies/model/model_*.h5