In [1]:
import numpy as np
import pandas as pd
import tensorflow as tf
tf.get_logger().setLevel('ERROR')
for gpu in tf.config.experimental.list_physical_devices('GPU'):
    tf.config.experimental.set_memory_growth(gpu, True)

from keras import layers, models

from finrock.data_feeder import PdDataFeeder
from finrock.trading_env import TradingEnv
from finrock.scalers import MinMaxScaler, ZScoreScaler
from finrock.reward import SimpleReward, AccountValueChangeReward
from finrock.metrics import DifferentActions, AccountValue, MaxDrawdown, SharpeRatio
from finrock.indicators import BolingerBands, RSI, PSAR, SMA, MACD

from rockrl.utils.misc import MeanAverage
from rockrl.utils.memory import MemoryManager
from rockrl.tensorflow import PPOAgent
from rockrl.utils.vectorizedEnv import VectorizedEnv

# The following allows to save plots in SVG format.
import matplotlib_inline
%matplotlib inline
matplotlib_inline.backend_inline.set_matplotlib_formats('svg')




In [2]:
df = pd.read_csv('Datasets/random_sinusoid.csv')
df = df[:-1000]


pd_data_feeder = PdDataFeeder(
    df,
    indicators = [
        BolingerBands(data=df, period=20, std=2),
        RSI(data=df, period=14),
        PSAR(data=df),
        MACD(data=df),
        SMA(data=df, period=7),
    ]
)

num_envs = 10
env = VectorizedEnv(
    env_object = TradingEnv,
    num_envs = num_envs,
    data_feeder = pd_data_feeder,
    output_transformer = ZScoreScaler(),
    initial_balance = 1000.0,
    max_episode_steps = 1000,
    window_size = 50,
    reward_function = AccountValueChangeReward(),
    metrics = [
        DifferentActions(),
        AccountValue(),
        MaxDrawdown(),
        SharpeRatio(),
    ]
)

action_space = env.action_space
input_shape = env.observation_space.shape

def actor_model(input_shape, action_space):
    input = layers.Input(shape=input_shape, dtype=tf.float32)
    x = layers.Flatten()(input)
    x = layers.Dense(512, activation='elu')(x)
    x = layers.Dense(256, activation='elu')(x)
    x = layers.Dense(64, activation='elu')(x)
    x = layers.Dropout(0.2)(x)
    output = layers.Dense(action_space, activation='softmax')(x) # discrete action space
    return models.Model(inputs=input, outputs=output)

def critic_model(input_shape):
    input = layers.Input(shape=input_shape, dtype=tf.float32)
    x = layers.Flatten()(input)
    x = layers.Dense(512, activation='elu')(x)
    x = layers.Dense(256, activation='elu')(x)
    x = layers.Dense(64, activation='elu')(x)
    x = layers.Dropout(0.2)(x)
    output = layers.Dense(1, activation=None)(x)
    return models.Model(inputs=input, outputs=output)

agent = PPOAgent(
    actor = actor_model(input_shape, action_space),
    critic = critic_model(input_shape),
    optimizer=tf.keras.optimizers.Adam(learning_rate=0.0001),
    batch_size=128,
    lamda=0.95,
    kl_coeff=0.5,
    c2=0.01,
    writer_comment='ppo_sinusoid_discrete',
)

pd_data_feeder.save_config(agent.logdir)
env.env.save_config(agent.logdir)

memory = MemoryManager(num_envs=num_envs)
meanAverage = MeanAverage(best_mean_score_episode=1000)
states, infos = env.reset()
rewards = 0.0
while True:
    action, prob = agent.act(states)

    next_states, reward, terminated, truncated, infos = env.step(action)
    memory.append(states, action, reward, prob, terminated, truncated, next_states, infos)
    states = next_states

    for index in memory.done_indices():
        env_memory = memory[index]
        history = agent.train(env_memory)
        mean_reward = meanAverage(np.sum(env_memory.rewards))

        if meanAverage.is_best(agent.epoch):
            agent.save_models('ppo_sinusoid')

        if history['kl_div'] > 0.05 and agent.epoch > 1000:
            agent.reduce_learning_rate(0.995, verbose=False)

        info = env_memory.infos[-1]
        print(agent.epoch, np.sum(env_memory.rewards), mean_reward, info["metrics"]['account_value'], history['kl_div'])
        agent.log_to_writer(info['metrics'])
        states[index], infos[index] = env.reset(index=index)

    if agent.epoch >= 10000:
        break

env.close()


Failed to save config to file
1 0.9164874249761428 0.9164874249761428 1580.4072396698893 0.14606008
2 0.4693926243640617 0.6929400246701023 977.7273044393779 0.14177135
3 0.35145719800818154 0.5791124157827954 908.9537990910005 0.1454596
4 1.2185625381959961 0.7389749463860955 1976.6283444896912 0.14702967
5 2.3636529817758443 1.0639105534640454 6623.457462763493 0.15420376
6 1.2452137751730974 1.0941277570822205 2200.146481363065 0.14429587
7 2.357666692628405 1.274633319303104 6348.647936419401 0.15825523
8 1.2563146618321173 1.2723434871192307 2081.6628418224436 0.16954371
9 0.024923623074151524 1.1337412800031108 610.0617918660519 0.15658888
10 0.4205200987780938 1.0624191618806091 955.4624757266583 0.1510733
11 1.8667116021675487 1.135536656452149 3985.6149595087054 0.0919168
12 1.9908222192848568 1.2068104533548747 4588.266148785496 0.11079005
13 1.1826741051195886 1.2049538111829294 1970.1233751309344 0.109118484
14 2.9562132524728675 1.3300437712750681 11932.749370356967 0.1086

  saving_api.save_model(


1001 13.679265732011178 13.401371070747734 3.746438017213261e+21 0.07635428
1002 12.557457311714462 13.386605497141009 1271716498.5647473 0.12534583
1003 13.737019696639463 13.392446233813532 3568174460953703.0 0.11143734
1004 14.303438090759437 13.382255164899583 1.429968127776758e+29 0.0911325
1005 13.484809315559753 13.370058119933764 2.595506364167013e+18 0.10850555
1006 12.818332049357023 13.34785222016804 55329447931.176796 0.13505378
1007 13.611588715443293 13.3446838005528 7166706337771.575 0.099731125
1008 14.570283846638588 13.348504934382664 463376326680309.56 0.13978466
1009 13.72656277391741 13.351888234192499 92764808414653.44 0.120793715
1010 14.549608036236144 13.391895745344838 631595878480211.8 0.12855111
1011 13.231042718285298 13.379985344914289 24392288430.628613 0.11422763
1012 13.819336494828558 13.402756300542737 401670036479732.0 0.09663583
1013 13.838150161335978 13.409553196403612 604846232.341243 0.14020796
1014 13.435410725565575 13.412029473932659 1.944405