In [18]:
import gym
from tqdm.notebook import tqdm
COLAB = False
if not COLAB:
    import os
    os.environ['CUDA_VISIBLE_DEVICES'] = '-1'
import tensorflow as tf
import numpy as np
from tensorflow.keras.layers import *
from tensorflow.keras import Input
from environment_ddpg import StockEnv, create_stock_env
from utils import ReplayBuffer, OrnsteinUhlenbeckActionNoise
path_base = "models/"
RESUME = True

In [19]:
class Actor:
    def __init__(self, params):
        self.output_range = params["output_range"]
        self.hidden_layers = params["actor_hidden_layers"]
        self.state_dimensions = params["state_dimensions"]
        self.action_dimensions = params["action_dimensions"]
        self.actor = self.model()
        
    def model(self):
        inputs = Input(shape=(1, self.state_dimensions))
        x = Lambda(lambda x: x)(inputs)
        for layer in self.hidden_layers:
            x = Dense(layer, activation='relu')(x)
        x = Dense(self.action_dimensions, activation='tanh')(x)
        x = Lambda(lambda x: x*self.output_range)(x)
        model = tf.keras.Model(inputs = inputs, outputs = x)
        return model
    
    def get_action(self, state):
        state_tensor = tf.Variable(shape = state.shape, initial_value = state)
        return (self.actor(state_tensor)).numpy()

    def save_weights(self):
        self.actor.save_weights(path_base + "actor.h5")
               
    def load_weights(self):
        self.actor.load_weights(path_base + "actor.h5")
        
    
class Critic:
    def __init__(self, params):
        self.hidden_layers = params["critic_hidden_layers"]
        self.state_dimensions = params["state_dimensions"]
        self.action_dimensions = params["action_dimensions"]
        self.optimizer = params["critic_optimizer"]
        self.critic_online = self.model()
        self.critic_target = self.model()


    def model(self):
        input_a = Input(shape = (1, self.state_dimensions))
        input_b = Input(shape = (1, self.action_dimensions))
        x = concatenate([input_a, input_b], axis=-1)
        for layer in self.hidden_layers:
            x = Dense(layer, activation='relu')(x)
        x = Dense(1, activation='linear')(x)
        model = tf.keras.Model(inputs=[input_a, input_b], outputs = x)
        model.compile(loss='mse', optimizer=self.optimizer)
        return model
    
    def save_weights(self):
        self.critic_online.save_weights(path_base + "critic_online.h5")
        self.critic_target.save_weights(path_base +  "critic_target.h5")
               
    def load_weights(self):
        self.critic_online.load_weights(path_base + "critic_online.h5")
        self.critic_target.load_weights(path_base + "critic_target.h5")

    def get_qvalues(self, state_array, action_array, online=True):
        state_tensor = tf.Variable(shape = state_array.shape, initial_value = state_array)
        action_tensor = tf.Variable(shape = action_array.shape, initial_value = action_array)
        return (self.critic_online([state_tensor, action_tensor]).numpy() if online else self.critic_target([state_tensor, action_tensor]).numpy())
    
    def call(self, state_tensor, action_tensor, online = True):
        return (self.critic_online([state_tensor, action_tensor]) if online else self.critic_target([state_tensor, action_tensor]))
    def merge_networks(self, tau):
        self.critic_target.set_weights(tau*np.array(self.critic_online.get_weights())
                                                                    + (1-tau)*np.array(self.critic_target.get_weights()))
        
    

In [20]:
class Agent:
    def __init__(self, params, test=False):
        self.test = test
        self.actor = Actor(params)
        self.critic = Critic(params)
        self.buffer = ReplayBuffer(params["buffer_size"])
        self.state_dimensions = params["state_dimensions"]
        self.action_dimensions = params["action_dimensions"]
        self.discount = params["discount"]
        self.action_range = params["output_range"]
        self.save_frequency = params["save_frequency"]
        self.batch_size = params["batch_size"]
        self.optimizer = params["actor_optimizer"]
        self.tau = params["tau"]
        self.step = 0
        self.noise_func =  OrnsteinUhlenbeckActionNoise(mu=np.zeros(params["action_dimensions"]))
        if RESUME:
            self.load_networks()
        
    def agent_start(self, observation):
        observation = np.reshape(observation, (1, self.state_dimensions))
        act = np.squeeze(self.actor.get_action(observation))
        if not self.test:
            action = self.clip_action(act + self.noise_func())
        else:
            action = [self.clip_action(act)]
        self.prev_state = observation
        self.prev_action = action
        return action 

    def clip_action(self, action):
        if abs(action) > self.action_range:
            action *= abs(self.action_range)/abs(action)
        
        return action

    def agent_step(self, reward, observation):
        observation = np.reshape(observation, (1, self.state_dimensions))
        if not self.test:
            relay = (self.prev_state, self.prev_action, reward, observation)
            self.buffer.add(relay)
        self.prev_state = observation
        act = np.squeeze(self.actor.get_action(observation))
        if not self.test:
            self.prev_action = self.clip_action(act + self.noise_func())
            self.train(self.batch_size)
        else:
            self.prev_action = [self.clip_action(act)]
        
        return self.prev_action 
    
    def save_networks(self):
        self.actor.save_weights()
        self.critic.save_weights()

    def load_networks(self):
        self.actor.load_weights()
        self.critic.load_weights()


    def train(self, sample_size):
        self.step += 1
        batch, batch_size = self.buffer.sample(sample_size)

        state_array = np.array([ element[3] for element in batch])
        action_array = self.actor.get_action(state_array)
        prev_state_array = np.array([ element[0] for element in batch])
        prev_action_array = np.array([ [[element[2]]] for element in batch])
        output = self.critic.get_qvalues(state_array, action_array, False)
        output = np.array([element[2] + self.discount*out[0] for element, out in zip(batch, output)])
        self.critic.critic_online.fit([state_array, action_array], output, verbose=0)

        prev_state_tensor = tf.Variable(shape = prev_state_array.shape, initial_value = prev_state_array)
        prev_action_tensor = tf.Variable(shape = prev_action_array.shape, initial_value = prev_action_array)

        with tf.GradientTape(persistent=True) as g:
            g.watch(prev_action_tensor) 
            g.watch(prev_state_tensor)
            value = self.critic.call(prev_state_tensor, prev_action_tensor)
            action = self.actor.actor(prev_state_tensor)
            
        gradient = -tf.squeeze(g.gradient(value, prev_action_tensor))
        gradient = tf.cast(gradient, tf.float32)
        gradient_actor = g.gradient(action, self.actor.actor.trainable_weights, gradient)
        gradient_actor = list(map(lambda x: tf.math.divide(x, batch_size), gradient_actor))
        self.optimizer.apply_gradients(zip(gradient_actor, self.actor.actor.trainable_weights))
        self.critic.merge_networks(self.tau)

        if self.step%self.save_frequency == 0:
            self.save_networks()



In [21]:
AGENT_PARAMS = {
	"output_range": 1,
	"actor_hidden_layers": [30, 8],
	"critic_hidden_layers": [30, 8],
	"state_dimensions": 5,
	"action_dimensions": 1,
	"critic_optimizer": tf.keras.optimizers.Adam(learning_rate = 0.001),
	"actor_optimizer": tf.keras.optimizers.Adam(learning_rate = 0.0001),
	"batch_size": 64,
	"buffer_size":1000000,
	"discount": 0.99,
	"tau": 0.001,
	"save_frequency": 100
}
agent = Agent(AGENT_PARAMS, True)

In [None]:
import matplotlib.pyplot as plt
import os
files = os.listdir("data/")
for num_iter, file in enumerate(files):
    env = create_stock_env("data/{0}".format(file))
    profit = np.zeros(2000)
    prev_profit = 0 
    obs = env.reset()
    prev_profit = 0
    action = agent.agent_start(env.reset())
    observation, reward, done, info = env.step(action[0])
    ITERATIONS = 2000
    pbar = tqdm(desc="Iteration: ", total=ITERATIONS)
    for i in range(ITERATIONS):
        action = agent.agent_step(reward, observation)
        observation, reward, done, info = env.step(action[0])
        profit[i] += reward + prev_profit
        prev_profit +=reward
        pbar.update(1)
#     plt.figure() 
#     plt.plot(profit)
    print(file + " profit: "+str(profit[-1]))

HBox(children=(IntProgress(value=0, description='Iteration: ', max=2000, style=ProgressStyle(description_width…

FGB.csv profit: 82931.5111144795


HBox(children=(IntProgress(value=0, description='Iteration: ', max=2000, style=ProgressStyle(description_width…

FDS.csv profit: 162327.35198382623


HBox(children=(IntProgress(value=0, description='Iteration: ', max=2000, style=ProgressStyle(description_width…

FLL.csv profit: 26112.9602270568


HBox(children=(IntProgress(value=0, description='Iteration: ', max=2000, style=ProgressStyle(description_width…

FNJN.csv profit: 90263.0808829175


HBox(children=(IntProgress(value=0, description='Iteration: ', max=2000, style=ProgressStyle(description_width…

FHB.csv profit: 1556806.9152948647


HBox(children=(IntProgress(value=0, description='Iteration: ', max=2000, style=ProgressStyle(description_width…

FET.csv profit: 353110.3005525867


HBox(children=(IntProgress(value=0, description='Iteration: ', max=2000, style=ProgressStyle(description_width…

FNKO.csv profit: 65056.323906801845


HBox(children=(IntProgress(value=0, description='Iteration: ', max=2000, style=ProgressStyle(description_width…

FIV.csv profit: 182788502.69724643


HBox(children=(IntProgress(value=0, description='Iteration: ', max=2000, style=ProgressStyle(description_width…

FFBC.csv profit: 151097.5730914848


HBox(children=(IntProgress(value=0, description='Iteration: ', max=2000, style=ProgressStyle(description_width…

FIT.csv profit: 49930.43772191338


HBox(children=(IntProgress(value=0, description='Iteration: ', max=2000, style=ProgressStyle(description_width…

FHI.csv profit: 44755.165865581235


HBox(children=(IntProgress(value=0, description='Iteration: ', max=2000, style=ProgressStyle(description_width…

FLDM.csv profit: 119025.61493109872


HBox(children=(IntProgress(value=0, description='Iteration: ', max=2000, style=ProgressStyle(description_width…


ACHV.csv profit: 52015.98387952722


HBox(children=(IntProgress(value=0, description='Iteration: ', max=2000, style=ProgressStyle(description_width…

FFBW.csv profit: 56607.26454563081


HBox(children=(IntProgress(value=0, description='Iteration: ', max=2000, style=ProgressStyle(description_width…

MSFT.csv profit: 42877.80486768806


HBox(children=(IntProgress(value=0, description='Iteration: ', max=2000, style=ProgressStyle(description_width…

FLEX.csv profit: 97745.56776235967


HBox(children=(IntProgress(value=0, description='Iteration: ', max=2000, style=ProgressStyle(description_width…

FEUL.csv profit: 75958.49734130038


HBox(children=(IntProgress(value=0, description='Iteration: ', max=2000, style=ProgressStyle(description_width…

FIVE.csv profit: 34409.443285117086


HBox(children=(IntProgress(value=0, description='Iteration: ', max=2000, style=ProgressStyle(description_width…

FLC.csv profit: 79313.04587942468


HBox(children=(IntProgress(value=0, description='Iteration: ', max=2000, style=ProgressStyle(description_width…

FISI.csv profit: 81107.29187728508


HBox(children=(IntProgress(value=0, description='Iteration: ', max=2000, style=ProgressStyle(description_width…

FENG.csv profit: 35844.32364711852


HBox(children=(IntProgress(value=0, description='Iteration: ', max=2000, style=ProgressStyle(description_width…

FNGO.csv profit: 64288.422152185


HBox(children=(IntProgress(value=0, description='Iteration: ', max=2000, style=ProgressStyle(description_width…

FMCI.csv profit: 62639.72611186329


HBox(children=(IntProgress(value=0, description='Iteration: ', max=2000, style=ProgressStyle(description_width…

FEYE.csv profit: 259166.51396169973


HBox(children=(IntProgress(value=0, description='Iteration: ', max=2000, style=ProgressStyle(description_width…

FMC.csv profit: 470553.4249767228


HBox(children=(IntProgress(value=0, description='Iteration: ', max=2000, style=ProgressStyle(description_width…

FFC.csv profit: 53016.84863647903


HBox(children=(IntProgress(value=0, description='Iteration: ', max=2000, style=ProgressStyle(description_width…

FLIR.csv profit: 113397.95630557471


HBox(children=(IntProgress(value=0, description='Iteration: ', max=2000, style=ProgressStyle(description_width…

FF.csv profit: 66714.4449458259


HBox(children=(IntProgress(value=0, description='Iteration: ', max=2000, style=ProgressStyle(description_width…

FEO.csv profit: 57120.44333529245


HBox(children=(IntProgress(value=0, description='Iteration: ', max=2000, style=ProgressStyle(description_width…

FITB.csv profit: 1059903.2740246893


HBox(children=(IntProgress(value=0, description='Iteration: ', max=2000, style=ProgressStyle(description_width…

FDEF.csv profit: 35510.078179984994


HBox(children=(IntProgress(value=0, description='Iteration: ', max=2000, style=ProgressStyle(description_width…


FLXS.csv profit: 1577.701270931051


HBox(children=(IntProgress(value=0, description='Iteration: ', max=2000, style=ProgressStyle(description_width…

ACH.csv profit: 139864.11381168835


HBox(children=(IntProgress(value=0, description='Iteration: ', max=2000, style=ProgressStyle(description_width…

FLXN.csv profit: 88726.64071318007


HBox(children=(IntProgress(value=0, description='Iteration: ', max=2000, style=ProgressStyle(description_width…

ACGLO.csv profit: 64466.30034281839


HBox(children=(IntProgress(value=0, description='Iteration: ', max=2000, style=ProgressStyle(description_width…

FISV.csv profit: 204586.91112362288


HBox(children=(IntProgress(value=0, description='Iteration: ', max=2000, style=ProgressStyle(description_width…

FITBO.csv profit: 63599.537244119536


HBox(children=(IntProgress(value=0, description='Iteration: ', max=2000, style=ProgressStyle(description_width…