In [4]:
# Load libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from pandas import read_csv
from numpy.random import choice
import random

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from torch import nn
from collections import deque
import torch

device = "cpu" if not torch.cuda.is_available() else 'cuda'
#Disable the warnings
import warnings
warnings.filterwarnings('ignore')

In [5]:


train_path = "lib/data/IVV_1m_training.csv"
validation_path = "lib/data/IVV_1m_validation.csv"
# dataset = AIFinanceDataloader(file_csv = train_path)
# dataloader_train = DataLoader(dataset, batch_size=1, shuffle=False)

#dataset = AIFinanceDataloader(file_csv = validation_path)
#dataloader_val = DataLoader(dataset, batch_size=2, shuffle=False)

In [6]:
# Day zero
# print(dataset[0].shape)
# print(dataset[1].shape)

# feature_size = dataset[0].shape[1]

In [10]:
from tqdm import tqdm
from lib.AgentNetworks import AgentCNNNetwork, AgentLSTMNetwork, AgentGRUNetwork
import time

class Agent():
    def __init__(self, feature_size, window_size, is_eval=False, model_name=""):
        super(Agent, self).__init__()
        self.feature_size = feature_size
        self.window_size = window_size
        self.action_size = 3
        self.memory = deque(maxlen=1000)
        self.inventory = []
        self.model_name = model_name
        self.is_eval = is_eval
        self.gamma = 0.95
        self.epsilon = 1.0
        self.epsilon_min = 0.01
        self.epsilon_decay = 0.995

        self.model = AgentGRUNetwork(self.feature_size, self.window_size, self.action_size, device, is_eval)
        self.model.to(device)
        self.optimizer = torch.optim.Adam(self.model.parameters(), lr=0.001)

    def act(self, state): 
        #If it is test and self.epsilon is still very high, once the epsilon become low, there are no random
        #actions suggested.
        if not self.is_eval and random.random() <= self.epsilon:
            return random.randrange(self.action_size) 
        self.model.eval()
        with torch.no_grad():
            options = self.model(state.float()).reshape(-1).cpu().numpy()   
        
        #set_trace()
        #action is based on the action that has the highest value from the q-value function.
        return np.argmax(options)

    def expReplay(self, batch_size):
        mini_batch = []
        l = len(self.memory)

        self.model.train()
        for i in range(l - batch_size + 1, l):
            mini_batch.append(self.memory[i])
        
        exp_repl_mean_loss = 0
        for state, action, reward, next_state, done in mini_batch:
            
            self.optimizer.zero_grad()
            output = self.model(state.float()).reshape(-1)
            # Target does not need gradients
            target_f = output.detach().clone()

            if done: 
                target_f[action] = reward
            else:
                with torch.no_grad():
                    target_f[action] = reward + self.gamma * torch.max(self.model(next_state.float()).reshape(-1)).cpu().numpy()   

            loss = nn.MSELoss()
            output = loss(target_f, output)
            output.backward()
            self.optimizer.step()

            exp_repl_mean_loss += output.item()

        exp_repl_mean_loss /= batch_size
        
        if self.epsilon > self.epsilon_min:
            self.epsilon *= self.epsilon_decay

        return exp_repl_mean_loss

In [11]:
import numpy as np

def seed_everything(seed: int):
    import random, os
    import numpy as np
    import torch
    
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = True


# prints formatted price
def formatPrice(n):
    return ("-$" if n < 0 else "$") + "{0:.2f}".format(abs(n))

# Plots the behavior of the output
def plot_behavior(data_input, states_buy, states_sell, profit):
    fig = plt.figure(figsize = (15,5))
    plt.plot(data_input, color='r', lw=2.)
    plt.plot(data_input, '^', markersize=10, color='m', label = 'Buying signal', markevery = states_buy)
    plt.plot(data_input, 'v', markersize=10, color='k', label = 'Selling signal', markevery = states_sell)
    plt.title('Total gains: %f'%(profit))
    plt.legend()
    #plt.savefig('output/'+name+'.png')
    plt.show()

In [14]:
from IPython.core.debugger import set_trace
from lib.IVVEnvironment import IVVEnvironment

window_size = 10
batch_size = 16
feature_size = 3
seed = 9
seed_everything(9)

agent = Agent(feature_size, window_size)
train_environment = IVVEnvironment(train_path, seed=seed, device=device, trading_cost=1e-3)

episode_count = 0
rewards_list = []
while(train_environment.there_is_another_episode()):
    print("Running episode " + str(episode_count) + "/" + str(train_environment.num_of_ep()))

    # Reset the environment and obtain the initial observation
    observation = train_environment.reset()
    info = {}

    episode_loss = 0
    start_time = time.time()
    while True:

        action = agent.act(observation)
        next_observation, reward, done, info = train_environment.step(action)

        agent.memory.append((observation, action, reward, next_observation, done))
        rewards_list.append(reward)

        if done: break

        if len(agent.memory) > batch_size:
            episode_loss += agent.expReplay(batch_size) 

        observation = next_observation

    # plot_behavior(day_episode, states_buy, states_sell, total_profit)
    print(f" >>> Reward: {np.mean(rewards_list):3.2f} Loss: {str(episode_loss)}, \n >>> Profit: {info['total_profit']}, BUY trades: {len(info['when_bought'])}, SELL trades: {len(info['when_sold'])}, \n >>> Time : {str(time.time() - start_time)}")
    print(info['buy_sell_order'])

    episode_count += 1


Running episode 0/2518
>>> Reward: 0.05 Loss: 6.60227970005969, Profit: 19.67689999999996, BUY trades: 119, SELL trades: 112, Time : 11.127171993255615
['BUY', 'BUY', 'SELL', 'SELL', 'BUY', 'BUY', 'BUY', 'BUY', 'BUY', 'BUY', 'SELL', 'SELL', 'BUY', 'SELL', 'SELL', 'SELL', 'BUY', 'BUY', 'SELL', 'SELL', 'BUY', 'SELL', 'SELL', 'BUY', 'SELL', 'SELL', 'BUY', 'BUY', 'BUY', 'BUY', 'SELL', 'SELL', 'SELL', 'SELL', 'BUY', 'SELL', 'BUY', 'SELL', 'BUY', 'SELL', 'BUY', 'BUY', 'SELL', 'BUY', 'SELL', 'BUY', 'SELL', 'SELL', 'BUY', 'SELL', 'BUY', 'SELL', 'BUY', 'BUY', 'SELL', 'SELL', 'BUY', 'BUY', 'SELL', 'SELL', 'BUY', 'BUY', 'BUY', 'SELL', 'BUY', 'BUY', 'SELL', 'SELL', 'SELL', 'SELL', 'BUY', 'BUY', 'SELL', 'SELL', 'BUY', 'SELL', 'BUY', 'SELL', 'BUY', 'BUY', 'BUY', 'BUY', 'BUY', 'BUY', 'BUY', 'BUY', 'BUY', 'BUY', 'BUY', 'BUY', 'BUY', 'BUY', 'BUY', 'SELL', 'BUY', 'BUY', 'BUY', 'SELL', 'BUY', 'BUY', 'SELL', 'SELL', 'BUY', 'BUY', 'SELL', 'BUY', 'SELL', 'BUY', 'BUY', 'SELL', 'SELL', 'BUY', 'BUY', 'SELL', '