In [73]:
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'

from absl import logging
logging.set_verbosity(logging.ERROR)

In [74]:
# Load Python Libraries
import math
import sys

# O Keras é uma biblioteca de alto nível para construção e treinamento de redes neurais em Python. Ela é integrada ao TensorFlow como seu backend padrão desde o TensorFlow 2.0.
import keras

import random
import datetime
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

# (Double-Ended Queue) é uma estrutura de dados da biblioteca padrão do Python que permite a manipulação eficiente de dados em ambas as extremidades
from collections import deque

# uma biblioteca usada para criar barras de progresso em loops, útil para acompanhar o progresso de tarefas demoradas.
from tqdm.notebook import tqdm

from IPython.display import display, HTML

# for dataframe display
pd.set_option("display.max_rows", None)
def display_df(df):
    # Puts the scrollbar next to the DataFrame
    display(HTML("<div style='height: 200px; overflow: auto; width: fit-content'>" + df.to_html() + "</div>"))

# for reproducability of answers
keras.utils.set_random_seed(42)

In [75]:
import tensorflow as tf

# Verifica se há GPUs disponíveis
gpus = tf.config.list_physical_devices('GPU')

if gpus:
    try:
        # Configura apenas a primeira GPU detectada como visível
        tf.config.experimental.set_visible_devices(gpus[0], 'GPU')
        with tf.device('/GPU:0'):
            print("Usando GPU:0")
    except RuntimeError as e:
        # Tratamento de erros caso a GPU já esteja configurada
        sys.exit(f"Erro ao configurar a GPU: {e}")
else:
    sys.exit("Nenhuma GPU foi detectada.")

Usando GPU:0


In [76]:
# data = pd.read_csv('GOOG_2009-2010_6m_all_features_1d.csv')
data = pd.read_csv('google_2008_2009.csv')
# display_df(data)

dataset = data.reset_index()[['Date','Close','MA5','MA20','BB-upper','BB-lower']]
# display_df(dataset)

In [77]:
@keras.saving.register_keras_serializable()
# Define DQN Model Architecture
class DQN(keras.Model):
    def __init__(self, state_size, action_size):

        model = keras.models.Sequential()
        model.add(keras.layers.Input(shape=(state_size,)))  # Definir explicitamente a entrada
        model.add(keras.layers.Dense(units=32, activation="relu"))
        model.add(keras.layers.Dense(units=8, activation="relu"))
        model.add(keras.layers.Dense(action_size, activation="linear"))

        model.compile(loss="mse", optimizer=keras.optimizers.Adam(learning_rate=0.001))

        self.model = model


In [78]:
class Agent:
    def __init__(self, window_size, num_features, test_mode=False, model_name=''): # Este é o método inicializador da classe. Ele configura os parâmetros e variáveis do agente.
        self.window_size = window_size # How many days of historical data do we want to include in our state representation?
        self.num_features = num_features # How many training features do we have?
        self.state_size = window_size*num_features # State size includes number of training features per day, and number of lookback days 
        self.action_size = 3 # 0=hold, 1=buy, 2=sell
        self.memory = deque(maxlen=1000) # Bound memory size: once the memory reaches 1000 units, the lefthand values are discarded as righthand values are added
        self.inventory = [] # Inventory to hold trades
        self.model_name = model_name # filename for saved model checkpoint loading
        self.test_mode = test_mode # flag for testing (allows model load from checkpoint model_name)

        self.gamma = 0.95 # Fator de desconto. Valores entre 0 e 1. Determina o quanto o agente valoriza recompensas futuras em comparação às imediatas. Exemplo: 0.95 significa que recompensas futuras valem um pouco menos do que recompensas imediatas.
        self.epsilon = 1.0 # Taxa de exploração. Inicialmente alta (1.0), o agente escolhe ações aleatórias para explorar o ambiente.
        self.epsilon_min = 0.01 # O valor mínimo da taxa de exploração (evita 100% de decisões previsíveis)
        self.epsilon_decay = 0.995 # Controla o quanto a taxa de exploração diminui com o tempo
        
        self.model = keras.models.load_model(model_name) if test_mode else self._model()


    # Deep Q Learning (DQL) model
    # state_size: Tamanho da entrada da rede (número de informações sobre o estado atual).
    # action_size: Número de saídas, correspondente às ações possíveis (manter, comprar, vender).
    # DQN: Uma rede neural básica com três camadas (definida em outro lugar no código).
    def _model(self): # Constrói o modelo de rede neural para o agente.
        model = DQN(self.state_size, self.action_size).model
        return model
    

    # DQL Predict (with input reshaping)
    #   Input = State
    #   Output = Q-Table of action Q-Values
    def get_q_values_for_state(self, state): # Este método prevê os valores Q para um dado estado. Valores Q representam a "qualidade" de cada ação.
        return self.model.predict(state.flatten().reshape(1, self.state_size))
    

    # DQL Fit (with input reshaping)
    #   Input = State, Target Q-Table 
    #   Output = MSE Loss between Target Q-Table and Actual Q-Table for State
    def fit_model(self, input_state, target_output):
        return self.model.fit(input_state.flatten().reshape(1, self.state_size), target_output, epochs=1, verbose=0)    
    

    # Agent Action Selector
    #   Input = State
    #   Policy = epsilon-greedy (to minimize possibility of overfitting)
    #   Intitially high epsilon = more random, epsilon decay = less random later
    #   Output = Action (0, 1, or 2)
    def act(self, state): 
        # Choose any action at random (Probablility = epsilon for training mode, 0% for testing mode)
        if not self.test_mode and random.random() <= self.epsilon:
            return random.randrange(self.action_size)   
        # Choose the action which has the highest Q-value (Probablitly = 1-epsilon for training mode, 100% for testing mode)
        options = self.get_q_values_for_state(state)
        return np.argmax(options[0]) 

    # Experience Replay (Learning Function)
    #   Input = Batch of (state, action, next_state) tuples
    #   Optimal Q Selection Policy = Bellman equation
    #   Important Notes = Model fitting step is in this function (fit_model)
    #                     Epsilon decay step is in this function
    #   Output = Model loss from fitting step
    def exp_replay(self, batch_size):
        losses = []
        mini_batch = []
        l = len(self.memory)
        for i in range(l - batch_size + 1, l):
            mini_batch.append(self.memory[i])
            
        for state, action, reward, next_state, done in mini_batch:
            # reminders: 
            #   - state is a vector containing close & MA values for the current time step
            #   - action is an integer representing the action taken by the act function at the current time step- buy, hold, or sell
            #   - reward represents the profit of a given action - it is either 0 (for buy, hold, and sells which loose money) or the profit in dollars (for a profitable sell)
            #   - next_state is a vector containing close & MA values for the next time step
            #   - done is a boolean flag representing whether or not we are in the last iteration of a training episode (i.e. True when next_state does not exist.)
            
            if done:
                # special condition for last training epoch in batch (no next_state)
                optimal_q_for_action = reward  
            else:
                # target Q-value is updated using the Bellman equation: reward + gamma * max(predicted Q-value of next state)
                optimal_q_for_action = reward + self.gamma * np.max(self.get_q_values_for_state(next_state))
            # Get the predicted Q-values of the current state
            target_q_table = self.get_q_values_for_state(state)  
            # Update the output Q table - replace the predicted Q value for action with the target Q value for action 
            target_q_table[0][action] = optimal_q_for_action
            # Fit the model where state is X and target_q_table is Y
            history = self.fit_model(state, target_q_table)
            losses += history.history['loss']

        # define epsilon decay (for the act function)     
        if self.epsilon > self.epsilon_min:
            self.epsilon *= self.epsilon_decay
        return losses

In [79]:
# split dataset df into train (80%) and test (20%) datasets
training_rows = int(len(dataset.index)*0.8)
train_df = dataset.iloc[:training_rows].set_index("Date")
test_df = dataset.iloc[training_rows:].set_index("Date")

# display train and test dfs (ensure no overlap)
print(f"Tamanho do train_df: {len(train_df)}")
print(f"Tamanho do test_df: {len(test_df)}")

# display_df(train_df)
# display_df(test_df)

Tamanho do train_df: 388
Tamanho do test_df: 97


In [80]:
# convert train and test dfs to np arrays with dtype=float
X_train = train_df.values.astype(float)
X_test = test_df.values.astype(float)

# print the shape of X_train to remind yourself how many examples and features are in the dataset
X_train.shape # X_train.shape retorna as dimensões (shape) do array X_train no formato de uma tupla (n_samples, n_features)

(388, 5)

In [81]:
keras.utils.disable_interactive_logging()

window_size = 1
agent = Agent(window_size, num_features=X_train.shape[1])

In [82]:
# for key, value in agent.__dict__.items():
#     if isinstance(value, deque):
#         print(f"{key}: {list(value)[:5]}... (showing first 5 items)")  # Mostra os primeiros 5 itens do deque
#     elif key == "model":
#         print(f"{key}: Model object with summary below:")
#         value.summary()
#     else:
#         print(f"{key}: {value}")
        
# print("-> ",agent.model.summary())  # Mostra a arquitetura do modelo

# for item in dir(agent):
#     print(item)
    
# print(f"\nMemory contents: {list(agent.memory)}\n")

# for item in vars(agent):
#     print(item)
    
# for layer in agent.model.layers:
#     print(f"Pesos da camada {layer.name}: {layer.get_weights()}")


In [83]:
# Format price string
def format_price(n):
    return ('-$' if n < 0 else '$') + '{0:.2f}'.format(abs(n))

def sigmoid(x):
    return 1 / (1 + np.exp(-x))  # Substitui math.exp por np.exp

# Plot behavior of trade output
def plot_behavior(data_input, bb_upper_data, bb_lower_data, states_buy, states_sell, profit, train=True):
    fig = plt.figure(figsize = (15,5))
    plt.plot(data_input, color='k', lw=2., label= 'Close Price')
    plt.plot(bb_upper_data, color='b', lw=2., label = 'Bollinger Bands')
    plt.plot(bb_lower_data, color='b', lw=2.)
    plt.plot(data_input, '^', markersize=10, color='r', label = 'Buying signal', markevery = states_buy)
    plt.plot(data_input, 'v', markersize=10, color='g', label = 'Selling signal', markevery = states_sell)
    plt.title('Total gains: %f'%(profit))
    plt.legend()

    if train:
        plt.xticks(range(0, len(train_df.index.values), int(len(train_df.index.values)/15)), train_df.index.values[0:: int(len(train_df.index.values)/15)], rotation=45, fontsize='small')
    else:
        plt.xticks(range(0, len(test_df.index.values), int(len(test_df.index.values)/2)), test_df.index.values[0::int(len(test_df.index.values)/2)], rotation=45, fontsize='small')
    
    plt.show()

# Plot training loss
def plot_losses(losses, title):
    plt.plot(losses)
    plt.title(title)
    plt.ylabel('MSE Loss Value')
    plt.xlabel('batch')
    plt.show()

def get_state(data, t, n):
    """
    Retorna uma representação de estado de n dias terminando no tempo t.

    Parâmetros:
    - data: np.ndarray, matriz contendo os dados com shape (dias, features).
    - t: int, índice do tempo atual.
    - n: int, número de dias (lookback) para a representação do estado.

    Retorno:
    - np.ndarray: representação do estado com shape (1, n-1, features).
    """
    # Garantir que `data` é um array NumPy
    data = np.array(data)

    # Determinar o bloco de dados a ser usado
    d = t - n
    if d >= 0:
        block = data[d:t]
    else:
        block = np.vstack([data[0]] * (abs(d)) + [data[0:t]])  # Repetir o primeiro valor se não houver histórico suficiente

    # Calcular as diferenças (deltas) entre dias consecutivos
    deltas = block[1:] - block[:-1]

    # Aplicar a função sigmoide nos deltas
    res = 1 / (1 + np.exp(-deltas))  # Vetorizando a aplicação da função sigmoide

    # Expandir a dimensão para manter o formato esperado
    return np.expand_dims(res, axis=0)

In [None]:
keras.config.disable_traceback_filtering()  # Desativa logs desnecessários do Keras

idx_close = 0  # Índice para preço de fechamento
idx_ma5 = 1    # Índice para média móvel de 5 dias
idx_ma20 = 2   # Índice para média móvel de 20 dias
idx_bb_upper = 3  # Índice para banda superior de Bollinger
idx_bb_lower = 4  # Índice para banda inferior de Bollinger

# track number of examples in dataset (i.e. number of days to train on)
l = X_train[:,0].shape[0] - 1

# batch size defines how often to run the exp_replay method
batch_size = 32

# An episode represents a complete pass over the data.
episode_count = 1

batch_losses = []
num_batches_trained = 0

for e in range(episode_count):
    state = get_state(X_train, 0, window_size + 1)

    # initialize variables
    total_profit = 0
    total_winners = 0
    total_losers = 0
    agent.inventory = []
    states_sell = []
    states_buy = []
    
    for t in tqdm(range(l), desc=f'Running episode {e}/{episode_count}'):
        action = agent.act(state)   
        next_state = get_state(X_train, t + 1, window_size + 1)

        # initialize reward for the current time step
        reward = 0
            
        if action == 0:
            print(f"Id: {t} | Action: {action} | Memory size: {len(agent.memory)}")
            pass
        
        elif action == 1:
            short_positions = []

            for i, pos in enumerate(agent.inventory):
                
                
                if pos < 0:
                    short_positions.append(i)  # Adiciona o índice a short_positions
                    print(f"Id: {t} | Action: {action}  Índice:: {i}, Valor: {pos:.2f}, short_positions (até agora): {short_positions}")
            
            if short_positions:
                # Fecha a posição vendida mais antiga (FIFO = índice menor)
                idx_short = short_positions[0]
                short_sell_price = abs(agent.inventory.pop(idx_short))
                trade_profit = short_sell_price - X_train[t, idx_close]  # Lucro de "cover short"
                reward = trade_profit
                total_profit += trade_profit
                if trade_profit >= 0:
                    total_winners += trade_profit
                else:
                    total_losers += trade_profit
                print(f"Id: {t} | Action: {action} "
                      f"| Memory size: {len(agent.memory)} "
                      f"| Repurchase (Cover Short): {format_price(X_train[t, idx_close])} "
                      f"| Profit: {format_price(trade_profit)} | L/P: {format_price(total_profit)}")
                
            else:  # Compra normal
                buy_price = X_train[t, idx_close]
                agent.inventory.append(buy_price)  # Adiciona ao inventário como posição comprada
                states_buy.append(t)
                print(f"Id: {t} | Action: {action} | Memory size: {len(agent.memory)} | Buy: {format_price(buy_price)} | L/P: {format_price(total_profit)}")
                
        elif action == 2:
            buy_positions = []
            
            for i, pos in enumerate(agent.inventory):
                
                
                if pos > 0:
                    buy_positions.append(i)  # Adiciona o índice à lista buy_positions
                    print(f"Id: {t} | Action: {action}  Índice: {i}, Valor: {pos:.2f}, buy_positions (até agora): {buy_positions}")

            if buy_positions:
                # Fecha a posição comprada mais antiga
                idx_buy = buy_positions[0]
                bought_price = agent.inventory.pop(idx_buy)
                sell_price = X_train[t, idx_close]
                trade_profit = sell_price - bought_price
                reward = trade_profit
                total_profit += trade_profit
                if trade_profit >= 0:
                    total_winners += trade_profit
                else:
                    total_losers += trade_profit
                    print(f"Id: {t} | Action: {action} "
                      f"| Memory size: {len(agent.memory)} "
                      f"| Sell: {format_price(sell_price)} "
                      f"| Profit: {format_price(trade_profit)} | L/P: {format_price(total_profit)}")
                
            else:  # Venda a descoberto
                short_sell_price = X_train[t, idx_close]
                agent.inventory.append(-short_sell_price)  # Posição negativa
                states_sell.append(t)
                print(f"Id: {t} | Action: {action} | Memory size: {len(agent.memory)} | Short Sell: {format_price(short_sell_price)} | L/P: {format_price(total_profit)}")
        
        # flag for final training iteration
        done = True if t == l - 1 else False
        # append the details of the state action etc in the memory, to be used by the exp_replay function        
        agent.memory.append((state, action, reward, next_state, done))
        state = next_state
        
        # print total profit and plot behaviour of the current episode when the episode is finished
        if done:
        # Vender todos os itens restantes no inventário
            while len(agent.inventory) > 0:
                position = agent.inventory.pop(0)
                if position > 0:  # Fechar compra pendente
                    sell_price = X_train[t, idx_close]  # Use o preço de fechamento do último dia
                    trade_profit = sell_price - position
                else:  # Fechar venda a descoberto pendente
                    short_sell_price = abs(position)
                    trade_profit = short_sell_price - X_train[t, idx_close]
                    
                total_profit += trade_profit

                if trade_profit >= 0:
                    total_winners += trade_profit
                else:
                    total_losers += trade_profit

                states_sell.append(t)
                print(f'Forced Close: {format_price(X_train[t, idx_close])} | Profit: {format_price(trade_profit)}')

            # Logs e comportamento já existentes
            print('--------------------------------')
            print(f'Episode {e}')
            print(f'Total Profit: {format_price(total_profit)}')
            print(f'Total Winners: {format_price(total_winners)}')
            print(f'Total Losers: {format_price(total_losers)}')
            print('--------------------------------')
            plot_behavior(X_train[:, idx_close].flatten(), X_train[:, idx_bb_upper].flatten(), X_train[:, idx_bb_lower].flatten(), states_buy, states_sell, total_profit)
            plot_losses(batch_losses[num_batches_trained:len(batch_losses)], f'Episode {e} DQN model loss')
            num_batches_trained = len(batch_losses)

        if len(agent.memory) > batch_size:
            # when the size of the memory is greater than the batch size, run the exp_replay function on the batch to fit the model and get losses for the batch
            losses = agent.exp_replay(batch_size)   
            # then sum the losses for the batch and append them to the batch_losses list
            batch_losses.append(sum(losses))

    agent.model.save(f'model_ep{e}.keras')

Running episode 0/1:   0%|          | 0/387 [00:00<?, ?it/s]

Id: 0 | Action: 0 | Memory size: 0
Id: 1 | Action: 2 | Memory size: 1 | Short Sell: $14.05 | L/P: $0.00
Id: 2 | Action: 2 | Memory size: 2 | Short Sell: $12.85 | L/P: $0.00
Id: 3 | Action: 0 | Memory size: 3
Id: 4 | Action: 0 | Memory size: 4
Id: 5 | Action: 0 | Memory size: 5
Id: 6 | Action: 2 | Memory size: 6 | Short Sell: $12.58 | L/P: $0.00
Id: 7 | Action: 0 | Memory size: 7
Id: 8 | Action: 2 | Memory size: 8 | Short Sell: $12.98 | L/P: $0.00
Id: 9 | Action: 0 | Memory size: 9
Id: 10 | Action: 1  Índice:: 0, Valor: -14.05, short_positions (até agora): [0]
Id: 10 | Action: 1  Índice:: 1, Valor: -12.85, short_positions (até agora): [0, 1]
Id: 10 | Action: 1  Índice:: 2, Valor: -12.58, short_positions (até agora): [0, 1, 2]
Id: 10 | Action: 1  Índice:: 3, Valor: -12.98, short_positions (até agora): [0, 1, 2, 3]
Id: 10 | Action: 1 | Memory size: 10 | Repurchase (Cover Short): $13.32 | Profit: $0.74 | L/P: $0.74
Id: 11 | Action: 0 | Memory size: 11
Id: 12 | Action: 0 | Memory size: 12
I

I0000 00:00:1735085666.527490 2166347 service.cc:146] XLA service 0x7f33ac004b80 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:
I0000 00:00:1735085666.527578 2166347 service.cc:154]   StreamExecutor device (0): NVIDIA GeForce RTX 2060, Compute Capability 7.5
I0000 00:00:1735085666.911657 2166347 device_compiler.h:188] Compiled cluster using XLA!  This line is logged at most once for the lifetime of the process.


Id: 33 | Action: 2 | Memory size: 33 | Short Sell: $10.94 | L/P: -$0.71
Id: 34 | Action: 2 | Memory size: 34 | Short Sell: $10.76 | L/P: -$0.71
Id: 35 | Action: 2 | Memory size: 35 | Short Sell: $10.80 | L/P: -$0.71
Id: 36 | Action: 1  Índice:: 0, Valor: -10.94, short_positions (até agora): [0]
Id: 36 | Action: 1  Índice:: 1, Valor: -10.76, short_positions (até agora): [0, 1]
Id: 36 | Action: 1  Índice:: 2, Valor: -10.80, short_positions (até agora): [0, 1, 2]
Id: 36 | Action: 1 | Memory size: 36 | Repurchase (Cover Short): $11.47 | Profit: -$0.53 | L/P: -$1.24
Id: 37 | Action: 2 | Memory size: 37 | Short Sell: $11.23 | L/P: -$1.24


In [13]:
# l_test = len(X_test) - 1
# done = False
# states_sell_test = []
# states_buy_test = []

# agent = Agent(window_size, num_features=X_test.shape[1], test_mode=True, model_name=f'model_ep{episode_count}.keras')

In [14]:
# state = get_state(X_test, 0, window_size + 1)

# # initialize variables
# total_profit = 0
# total_winners = 0
# total_losers = 0
# agent.inventory = []

# for t in tqdm(range(l_test)):
#     action = agent.act(state)   
#     next_state = get_state(X_test, t + 1, window_size + 1)

#     # initialize reward for the current time step
#     reward = 0

#     if action == 1: # buy
#         # inverse transform to get true buy price in dollars
#         buy_price = X_test[t, idx_close]
#         agent.inventory.append(buy_price)
#         states_buy.append(t)
#         print(f'Buy: {format_price(buy_price)}')

#     elif action == 2 and len(agent.inventory) > 0: # sell
#         bought_price = agent.inventory.pop(0)  
#         # inverse transform to get true sell price in dollars
#         sell_price = X_test[t, idx_close]

#         # define reward as max of profit (close price at time of sell - close price at time of buy) and 0 
#         trade_profit = sell_price - bought_price
#         reward = max(trade_profit, 0)
#         total_profit += trade_profit

#         if trade_profit >=0:
#             total_winners += trade_profit
#         else:
#             total_losers += trade_profit
            
#         states_sell_test.append(t)
#         print(f'Sell: {format_price(sell_price)} | Profit: {format_price(trade_profit)}')
    
#     # flag for final training iteration
#     done = True if t == l_test - 1 else False
#     # append the details of the state action etc in the memory, to be used by the exp_replay function        
#     agent.memory.append((state, action, reward, next_state, done))
#     state = next_state
#     # print total profit and plot behaviour of the current episode when the episode is finished
#     if done:
#         print('--------------------------------')
#         print(f'Total Profit: {format_price(total_profit)}')
#         print(f'Total Winners: {format_price(total_winners)}')
#         print(f'Total Losers: {format_price(total_losers)}')
#         print('--------------------------------')
#         plot_behavior(X_test[:, idx_close].flatten(), X_test[:, idx_bb_upper].flatten(), X_test[:, idx_bb_lower].flatten(), states_buy_test, states_sell_test, total_profit, train=False)