In [None]:
# ==============================================================================
# PROJETO FINAL MBA IA & DS: QUANTUMFINANCE - AGENTE DE TRADING COM RL
# ==============================================================================

# --- 1. IMPORTAÇÃO DAS BIBLIOTECAS ---
print("Carregando bibliotecas...")
import numpy as np
import pandas as pd
import yfinance as yf
from collections import deque
import random
import itertools
from datetime import date

# Bibliotecas para a Rede Neural (DQN)
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.optimizers import Adam

# Biblioteca para visualização de dados
import matplotlib.pyplot as plt
import matplotlib.ticker as mticker
print("Bibliotecas carregadas com sucesso.")

# --- 2. DEFINIÇÃO DO AMBIENTE DE NEGOCIAÇÃO ---
class TradingEnv:
    """
    Classe que simula o ambiente da bolsa de valores para o agente de RL.
    Aceita tickers e um período de tempo para carregar os dados.
    """
    def __init__(self, tickers, start_date, end_date, initial_balance=10000, window_size=10):
        self.tickers = tickers
        self.start_date = start_date
        self.end_date = end_date
        self.window_size = window_size
        self.initial_balance = initial_balance
        self.data = self._load_data()
        self.action_map = self._create_action_map()
        self.action_size = len(self.action_map)

    def _load_data(self):
        print(f"Baixando dados históricos de {self.start_date} até {self.end_date}...")
        # yfinance agora ajusta o 'Close' automaticamente (auto_adjust=True por padrão)
        full_data = yf.download(self.tickers, start=self.start_date, end=self.end_date)
        if full_data.empty:
            raise ValueError("Nenhum dado baixado. Verifique os tickers e o período.")
        df_close = full_data['Close']
        print("Dados baixados com sucesso!")
        return df_close.dropna()

    def _create_action_map(self):
        # 0: Manter (Hold), 1: Comprar (Buy), 2: Vender (Sell)
        actions_per_asset = [0, 1, 2]
        action_combos = list(itertools.product(actions_per_asset, repeat=len(self.tickers)))
        return {i: combo for i, combo in enumerate(action_combos)}

    def _get_state(self, t):
        state = [self.balance]
        for ticker in self.tickers:
            state.append(self.shares_held[ticker])

        price_history = []
        for ticker in self.tickers:
            series = self.data[ticker].values
            price_window = series[t - self.window_size : t]
            normalized_window = price_window / series[t]
            price_history.extend(normalized_window)

        state.extend(price_history)
        return np.array([state])

    def reset(self):
        self.balance = self.initial_balance
        self.shares_held = {ticker: 0 for ticker in self.tickers}
        self.current_step = self.window_size
        self.portfolio_value_history = [self.initial_balance]
        return self._get_state(self.current_step)

    def step(self, action_index):
        old_portfolio_value = sum(self.shares_held[ticker] * self.data[ticker].iloc[self.current_step] for ticker in self.tickers) + self.balance

        action_tuple = self.action_map[action_index]
        for i, ticker in enumerate(self.tickers):
            action = action_tuple[i]
            price = self.data[ticker].iloc[self.current_step]

            if action == 1 and self.balance >= price: # Comprar
                self.balance -= price
                self.shares_held[ticker] += 1
            elif action == 2 and self.shares_held[ticker] > 0: # Vender
                self.balance += price
                self.shares_held[ticker] -= 1

        self.current_step += 1

        new_portfolio_value = sum(self.shares_held[ticker] * self.data[ticker].iloc[self.current_step] for ticker in self.tickers) + self.balance
        self.portfolio_value_history.append(new_portfolio_value)

        reward = new_portfolio_value - old_portfolio_value
        next_state = self._get_state(self.current_step)
        done = self.current_step >= len(self.data) - 1

        return next_state, reward, done, {}

# --- 3. DEFINIÇÃO DO AGENTE DQN ---
class DQNAgent:
    """
    O agente que aprende a operar no mercado usando uma Deep Q-Network.
    """
    def __init__(self, state_size, action_size):
        self.state_size = state_size
        self.action_size = action_size
        self.memory = deque(maxlen=2000)
        self.gamma = 0.95
        self.epsilon = 1.0
        self.epsilon_min = 0.01
        self.epsilon_decay = 0.995
        self.model = self._build_model()

    def _build_model(self):
        model = Sequential([
            Dense(64, input_dim=self.state_size, activation='relu'),
            Dense(32, activation='relu'),
            Dense(self.action_size, activation='linear')
        ])
        model.compile(loss='mse', optimizer=Adam(learning_rate=0.001))
        return model

    def remember(self, state, action, reward, next_state, done):
        self.memory.append((state, action, reward, next_state, done))

    def act(self, state):
        if np.random.rand() <= self.epsilon:
            return random.randrange(self.action_size)
        act_values = self.model.predict(state, verbose=0)
        return np.argmax(act_values[0])

    def replay(self, batch_size):
        minibatch = random.sample(self.memory, batch_size)
        for state, action, reward, next_state, done in minibatch:
            target = reward
            if not done:
                q_next = self.model.predict(next_state, verbose=0)[0]
                target = reward + self.gamma * np.amax(q_next)

            target_f = self.model.predict(state, verbose=0)
            target_f[0][action] = target
            self.model.fit(state, target_f, epochs=1, verbose=0)

        if self.epsilon > self.epsilon_min:
            self.epsilon *= self.epsilon_decay

# --- 4. BLOCO DE EXECUÇÃO PRINCIPAL ---
if __name__ == "__main__":

    tickers = ["VALE3.SA", "PETR4.SA", "BRFS3.SA"]

    # --- FASE 1: TREINAMENTO ---
    print("\n=========================================")
    print("===      INICIANDO FASE DE TREINO     ===")
    print("=========================================")

    train_env = TradingEnv(tickers, start_date="2021-01-01", end_date="2024-12-31")

    state_size = train_env.window_size * len(tickers) + 1 + len(tickers)
    action_size = train_env.action_size
    agent = DQNAgent(state_size, action_size)

    episodes = 10
    batch_size = 32

    for e in range(episodes):
        print(f"\n--- Treino: Episódio {e+1}/{episodes} ---")
        state = train_env.reset()
        for time in range(train_env.window_size, len(train_env.data)):
            action = agent.act(state)
            next_state, reward, done, _ = train_env.step(action)
            agent.remember(state, action, reward, next_state, done)
            state = next_state
            if done:
                print(f"Episódio {e+1} CONCLUÍDO. Valor Final: {train_env.portfolio_value_history[-1]:.2f}")
                break
            if len(agent.memory) > batch_size:
                agent.replay(batch_size)

    print("\nTreinamento Finalizado. O agente está pronto para ser avaliado.")

    # --- FASE 2: AVALIAÇÃO (BACKTESTING) ---
    print("\n============================================")
    print("===   INICIANDO FASE DE AVALIAÇÃO (TESTE)  ===")
    print("============================================")

    today = date.today().strftime("%Y-%m-%d")
    test_env = TradingEnv(tickers, start_date="2025-01-01", end_date=today)

    agent.epsilon = 0.0

    state = test_env.reset()
    for time in range(test_env.window_size, len(test_env.data)):
        action = agent.act(state)
        next_state, reward, done, _ = test_env.step(action)
        state = next_state
        if time % 20 == 0:
            print(f"  Avaliando... Dia {time}/{len(test_env.data)}, Valor do Portfólio: {test_env.portfolio_value_history[-1]:.2f}")
        if done:
            break

    final_test_value = test_env.portfolio_value_history[-1]
    initial_test_value = test_env.initial_balance
    test_profit = final_test_value - initial_test_value
    test_return = (test_profit / initial_test_value) * 100

    print("\n--------------------------------------------")
    print("         RESULTADO DA AVALIAÇÃO")
    print("--------------------------------------------")
    print(f"Capital Inicial: R$ {initial_test_value:.2f}")
    print(f"Valor Final do Portfólio: R$ {final_test_value:.2f}")
    print(f"Lucro/Prejuízo: R$ {test_profit:.2f}")
    print(f"Rentabilidade no Período: {test_return:.2f}%")
    print("--------------------------------------------")

    # --- FASE 3: VISUALIZAÇÃO DOS RESULTADOS ---
    print("\n============================================")
    print("===   GERANDO GRÁFICO DE PERFORMANCE   ===")
    print("============================================")

    df_test = test_env.data
    initial_balance = test_env.initial_balance
    num_tickers = len(test_env.tickers)
    capital_per_ticker = initial_balance / num_tickers
    initial_prices = df_test.iloc[0]
    shares_held = capital_per_ticker / initial_prices
    benchmark_portfolio_value = df_test.mul(shares_held, axis='columns').sum(axis=1)

    plot_df = pd.DataFrame(index=df_test.index[test_env.window_size-1:])
    plot_df['Agente DQN'] = test_env.portfolio_value_history
    benchmark_to_plot = benchmark_portfolio_value[test_env.window_size-1:].copy()
    benchmark_to_plot = (benchmark_to_plot / benchmark_to_plot.iloc[0]) * initial_balance
    plot_df['Benchmark (Buy and Hold)'] = benchmark_to_plot

    plt.style.use('seaborn-v0_8-darkgrid')
    fig, ax = plt.subplots(figsize=(14, 8))

    ax.plot(plot_df.index, plot_df['Agente DQN'], label='Agente DQN', color='royalblue', linewidth=2)
    ax.plot(plot_df.index, plot_df['Benchmark (Buy and Hold)'], label='Benchmark (Buy and Hold)', color='gray', linestyle='--')

    ax.set_title('Performance do Agente DQN vs. Benchmark (2025)', fontsize=16, pad=20)
    ax.set_xlabel('Data', fontsize=12)
    ax.set_ylabel('Valor do Portfólio (R$)', fontsize=12)

    formatter = mticker.FormatStrFormatter('R$ %.2f')
    ax.yaxis.set_major_formatter(formatter)

    plt.xticks(rotation=45)
    ax.legend(fontsize=12)
    plt.tight_layout()

    plt.savefig('performance_agente_vs_benchmark.png', dpi=300)
    print("\nGráfico 'performance_agente_vs_benchmark.png' salvo com sucesso!")

    plt.show()

Carregando bibliotecas...
Bibliotecas carregadas com sucesso.

===      INICIANDO FASE DE TREINO     ===
Baixando dados históricos de 2021-01-01 até 2024-12-31...
YF.download() has changed argument auto_adjust default to True


[*********************100%***********************]  3 of 3 completed
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Dados baixados com sucesso!

--- Treino: Episódio 1/10 ---
