In [1]:
# ü§ñ Importa√ß√µes Iron Man (Tecnologia Avan√ßada)
import yfinance as yf
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from collections import deque, namedtuple
import random
import warnings
warnings.filterwarnings('ignore')

# Verificar GPU dispon√≠vel
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"üöÄ Iron Man Tech Stack carregado!")
print(f"üîß Device: {device}")
print(f"üß† PyTorch version: {torch.__version__}")
print(f"üí° Sistema pronto para Deep Q-Learning!")

ModuleNotFoundError: No module named 'torch'

## ‚öôÔ∏è CONFIGURA√á√ÉO IRON MAN - Estado da Arte

Sistema avan√ßado com redes neurais para qualquer ativo. Simplesmente altere `TICKER_SYMBOL`!

In [None]:
# üöÄ CONFIGURA√á√ÉO IRON MAN - Tecnologia de Ponta
TICKER_SYMBOL = "PETR3.SA"    # Flex√≠vel para qualquer ativo
PERIOD = "2y"                 # Mais dados para Deep Learning
INITIAL_CAPITAL = 10000.0

# Par√¢metros DQN (Iron Man Tech)
STATE_SIZE = 20               # Features do estado (pre√ßos + indicadores)
HIDDEN_SIZE = 128             # Neur√¥nios na camada oculta
LEARNING_RATE = 0.001         # Learning rate para Adam
BATCH_SIZE = 64               # Batch size para treinamento
MEMORY_SIZE = 10000           # Tamanho do replay buffer
TARGET_UPDATE = 100           # Frequ√™ncia de atualiza√ß√£o da target network
EPSILON_START = 1.0
EPSILON_MIN = 0.01
EPSILON_DECAY = 0.995
GAMMA = 0.99                  # Discount factor

# Par√¢metros de treinamento
NUM_EPISODES = 2000           # Mais epis√≥dios para DQN
WARMUP_EPISODES = 100         # Epis√≥dios de aquecimento

# Indicadores t√©cnicos (Iron Man usa features avan√ßadas)
WINDOW_SIZE = 10              # Janela para m√©dias m√≥veis
RSI_PERIOD = 14               # Per√≠odo do RSI

print(f"ü§ñ Iron Man DQN configurado para: {TICKER_SYMBOL}")
print(f"üß† Arquitetura: {STATE_SIZE} ‚Üí {HIDDEN_SIZE} ‚Üí 3 a√ß√µes")
print(f"üíæ Memory buffer: {MEMORY_SIZE:,} experi√™ncias")
print(f"üéØ Episodes: {NUM_EPISODES}")
print(f"‚ö° Device: {device}")

## üìä SISTEMA AVAN√áADO DE DADOS IRON MAN

Carregamento inteligente com features avan√ßadas para Deep Learning.

In [None]:
# üî¨ Sistema Avan√ßado de Features Iron Man
def load_advanced_data(ticker_symbol, period="2y"):
    """
    Carrega dados com features avan√ßadas para Deep Learning
    """
    try:
        print(f"üöÄ Iron Man carregando dados de {ticker_symbol}...")
        ticker = yf.Ticker(ticker_symbol)
        df = ticker.history(period=period)
        
        if df.empty:
            raise ValueError(f"Dados n√£o encontrados para {ticker_symbol}")
        
        # Features b√°sicas
        df['Returns'] = df['Close'].pct_change()
        df['Log_Returns'] = np.log(df['Close'] / df['Close'].shift(1))
        
        # M√©dias m√≥veis
        df['SMA_5'] = df['Close'].rolling(5).mean()
        df['SMA_10'] = df['Close'].rolling(10).mean()
        df['SMA_20'] = df['Close'].rolling(20).mean()
        
        # Exponential Moving Average
        df['EMA_12'] = df['Close'].ewm(span=12).mean()
        df['EMA_26'] = df['Close'].ewm(span=26).mean()
        
        # MACD
        df['MACD'] = df['EMA_12'] - df['EMA_26']
        df['MACD_Signal'] = df['MACD'].ewm(span=9).mean()
        
        # Bollinger Bands
        df['BB_Middle'] = df['Close'].rolling(20).mean()
        bb_std = df['Close'].rolling(20).std()
        df['BB_Upper'] = df['BB_Middle'] + (bb_std * 2)
        df['BB_Lower'] = df['BB_Middle'] - (bb_std * 2)
        df['BB_Position'] = (df['Close'] - df['BB_Lower']) / (df['BB_Upper'] - df['BB_Lower'])
        
        # RSI
        delta = df['Close'].diff()
        gain = (delta.where(delta > 0, 0)).rolling(RSI_PERIOD).mean()
        loss = (-delta.where(delta < 0, 0)).rolling(RSI_PERIOD).mean()
        rs = gain / loss
        df['RSI'] = 100 - (100 / (1 + rs))
        
        # Volatilidade
        df['Volatility'] = df['Returns'].rolling(10).std()
        df['ATR'] = df[['High', 'Low', 'Close']].apply(lambda x: x['High'] - x['Low'], axis=1).rolling(14).mean()
        
        # Volume features
        df['Volume_SMA'] = df['Volume'].rolling(10).mean()
        df['Volume_Ratio'] = df['Volume'] / df['Volume_SMA']
        
        # Momentum
        df['Momentum_5'] = df['Close'] / df['Close'].shift(5)
        df['Momentum_10'] = df['Close'] / df['Close'].shift(10)
        
        # Price position
        df['Price_Position'] = (df['Close'] - df['Close'].rolling(20).min()) / (df['Close'].rolling(20).max() - df['Close'].rolling(20).min())
        
        # Remover NaN
        df = df.dropna()
        
        info = ticker.info
        company_name = info.get('longName', ticker_symbol)
        
        print(f"‚úÖ Dados Iron Man carregados: {company_name}")
        print(f"üìä Features: {len(df.columns)} colunas")
        print(f"üìÖ Per√≠odo: {df.index[0].date()} at√© {df.index[-1].date()}")
        print(f"üìà Observa√ß√µes: {len(df)}")
        print(f"üí∞ Pre√ßo atual: R$ {df['Close'].iloc[-1]:.2f}")
        
        return df, info
        
    except Exception as e:
        print(f"‚ùå Erro Iron Man: {e}")
        return None, None

def create_feature_matrix(df, window_size=WINDOW_SIZE):
    """
    Cria matriz de features para DQN
    """
    feature_columns = [
        'Close', 'Volume', 'Returns', 'SMA_5', 'SMA_10', 'SMA_20',
        'EMA_12', 'EMA_26', 'MACD', 'MACD_Signal', 'BB_Position', 
        'RSI', 'Volatility', 'ATR', 'Volume_Ratio', 'Momentum_5', 
        'Momentum_10', 'Price_Position'
    ]
    
    # Normalizar features (exceto Close que ser√° normalizado por janela)
    df_norm = df.copy()
    for col in feature_columns[1:]:  # Pular Close
        if col in df_norm.columns:
            mean_val = df_norm[col].mean()
            std_val = df_norm[col].std()
            if std_val > 0:
                df_norm[col] = (df_norm[col] - mean_val) / std_val
    
    return df_norm[feature_columns].values, feature_columns

# Carregar dados Iron Man
df_ironman, ironman_info = load_advanced_data(TICKER_SYMBOL, PERIOD)

## üß† DEEP Q-NETWORK IRON MAN

Rede neural state-of-the-art com PyTorch para aproxima√ß√£o da fun√ß√£o Q.

In [None]:
# üß† Deep Q-Network Iron Man (Estado da Arte)
class IronManDQN(nn.Module):
    """
    Rede Neural Avan√ßada Iron Man para Q-Learning
    """
    def __init__(self, state_size, hidden_size=HIDDEN_SIZE, output_size=3):
        super(IronManDQN, self).__init__()
        
        # Arquitetura avan√ßada com dropout e batch normalization
        self.network = nn.Sequential(
            nn.Linear(state_size, hidden_size),
            nn.BatchNorm1d(hidden_size),
            nn.ReLU(),
            nn.Dropout(0.2),
            
            nn.Linear(hidden_size, hidden_size),
            nn.BatchNorm1d(hidden_size), 
            nn.ReLU(),
            nn.Dropout(0.2),
            
            nn.Linear(hidden_size, hidden_size // 2),
            nn.ReLU(),
            
            nn.Linear(hidden_size // 2, output_size)
        )
        
        # Inicializa√ß√£o Xavier
        self.apply(self._init_weights)
    
    def _init_weights(self, module):
        if isinstance(module, nn.Linear):
            nn.init.xavier_uniform_(module.weight)
            nn.init.constant_(module.bias, 0)
    
    def forward(self, x):
        return self.network(x)

# Experience Replay Buffer
Experience = namedtuple('Experience', ['state', 'action', 'reward', 'next_state', 'done'])

class IronManReplayBuffer:
    """
    Buffer de experi√™ncias para DQN com prioriza√ß√£o
    """
    def __init__(self, capacity=MEMORY_SIZE):
        self.buffer = deque(maxlen=capacity)
        self.capacity = capacity
    
    def push(self, state, action, reward, next_state, done):
        experience = Experience(state, action, reward, next_state, done)
        self.buffer.append(experience)
    
    def sample(self, batch_size=BATCH_SIZE):
        return random.sample(self.buffer, batch_size)
    
    def __len__(self):
        return len(self.buffer)

# A√ß√µes Iron Man
class IronManActions:
    HOLD = 0
    BUY = 1
    SELL = 2
    
    @classmethod
    def get_actions(cls):
        return [cls.HOLD, cls.BUY, cls.SELL]
    
    @classmethod
    def action_name(cls, action):
        names = {cls.HOLD: "HOLD", cls.BUY: "BUY", cls.SELL: "SELL"}
        return names.get(action, "UNKNOWN")

# Verificar dados e inicializar componentes
if df_ironman is not None:
    # Criar matriz de features
    feature_matrix, feature_names = create_feature_matrix(df_ironman, WINDOW_SIZE)
    prices_ironman = df_ironman['Close'].values
    
    print(f"üß† Rede Neural Iron Man:")
    print(f"   üî¢ Input size: {STATE_SIZE}")
    print(f"   üèóÔ∏è Hidden size: {HIDDEN_SIZE}")
    print(f"   üéØ Output size: 3 a√ß√µes")
    print(f"   üìä Features dispon√≠veis: {len(feature_names)}")
    print(f"   üñ•Ô∏è Device: {device}")
    
    # Inicializar redes
    main_net = IronManDQN(STATE_SIZE, HIDDEN_SIZE).to(device)
    target_net = IronManDQN(STATE_SIZE, HIDDEN_SIZE).to(device)
    target_net.load_state_dict(main_net.state_dict())
    
    # Otimizador
    optimizer = optim.Adam(main_net.parameters(), lr=LEARNING_RATE)
    
    # Replay buffer
    replay_buffer = IronManReplayBuffer(MEMORY_SIZE)
    
    print("‚úÖ Componentes Iron Man inicializados!")
    print(f"   üß† Redes: Main + Target")
    print(f"   üíæ Buffer: {MEMORY_SIZE:,} experi√™ncias")
    print(f"   ‚öôÔ∏è Otimizador: Adam (lr={LEARNING_RATE})")
    
else:
    print("‚ùå Erro: Dados Iron Man n√£o carregados!")

## üèóÔ∏è AMBIENTE AVAN√áADO IRON MAN

Ambiente de trading com estados cont√≠nuos e features avan√ßadas.

In [None]:
# üèóÔ∏è Ambiente de Trading Avan√ßado Iron Man
class IronManTradingEnvironment:
    """
    Ambiente avan√ßado com estados cont√≠nuos e m√∫ltiplas features
    """
    def __init__(self, feature_matrix, prices, initial_capital=INITIAL_CAPITAL, window_size=WINDOW_SIZE):
        self.feature_matrix = feature_matrix
        self.prices = prices
        self.initial_capital = initial_capital
        self.window_size = window_size
        self.reset()
        
        # Estat√≠sticas para normaliza√ß√£o
        self.price_mean = np.mean(prices)
        self.price_std = np.std(prices)
        
    def reset(self):
        """Reinicia ambiente para novo epis√≥dio"""
        self.current_step = self.window_size
        self.cash = self.initial_capital
        self.shares = 0
        self.portfolio_history = []
        self.action_history = []
        
        return self._get_state()
    
    def _get_state(self):
        """
        Cria estado avan√ßado com janela de features
        """
        if self.current_step < self.window_size:
            # Padding para in√≠cio
            padding = np.zeros((self.window_size - self.current_step - 1, self.feature_matrix.shape[1]))
            window_data = np.vstack([padding, self.feature_matrix[:self.current_step + 1]])
        else:
            window_data = self.feature_matrix[self.current_step - self.window_size + 1:self.current_step + 1]
        
        # Normalizar pre√ßos na janela pelo pre√ßo atual
        current_price = self.prices[self.current_step]
        price_normalized = window_data[:, 0] / current_price  # Close price √© primeira coluna
        
        # Combinar pre√ßos normalizados com outras features
        other_features = window_data[:, 1:].flatten()  # Outras features
        
        # Estado final: pre√ßos + features + posi√ß√£o atual
        portfolio_value = self.cash + self.shares * current_price
        position_info = np.array([
            self.cash / self.initial_capital,  # Cash ratio
            self.shares * current_price / self.initial_capital,  # Position ratio
            portfolio_value / self.initial_capital,  # Total value ratio
        ])
        
        # Combinar tudo
        state = np.concatenate([
            price_normalized,
            other_features[:STATE_SIZE-self.window_size-3],  # Limitar tamanho
            position_info
        ])
        
        # Garantir tamanho fixo
        if len(state) > STATE_SIZE:
            state = state[:STATE_SIZE]
        elif len(state) < STATE_SIZE:
            padding = np.zeros(STATE_SIZE - len(state))
            state = np.concatenate([state, padding])
        
        return state.astype(np.float32)
    
    def step(self, action):
        """Executa a√ß√£o e retorna (state, reward, done, info)"""
        current_price = self.prices[self.current_step]
        portfolio_before = self.cash + self.shares * current_price
        
        # Executar a√ß√£o
        action_executed = False
        transaction_cost = 0
        
        if action == IronManActions.BUY and self.cash >= current_price:
            shares_to_buy = int(self.cash // current_price)  # Comprar m√°ximo poss√≠vel
            if shares_to_buy > 0:
                self.shares += shares_to_buy
                cost = shares_to_buy * current_price
                transaction_cost = cost * 0.001  # 0.1% de taxa
                self.cash -= (cost + transaction_cost)
                action_executed = True
                
        elif action == IronManActions.SELL and self.shares > 0:
            shares_to_sell = self.shares
            revenue = shares_to_sell * current_price
            transaction_cost = revenue * 0.001  # 0.1% de taxa
            self.cash += (revenue - transaction_cost)
            self.shares = 0
            action_executed = True
        
        # HOLD sempre √© v√°lido
        if action == IronManActions.HOLD:
            action_executed = True
        
        # Calcular recompensa avan√ßada
        portfolio_after = self.cash + self.shares * current_price
        
        # Recompensa base: mudan√ßa no portf√≥lio
        portfolio_return = (portfolio_after - portfolio_before) / portfolio_before if portfolio_before > 0 else 0
        
        # Recompensa por performance vs mercado
        if self.current_step > 0:
            market_return = (current_price - self.prices[self.current_step - 1]) / self.prices[self.current_step - 1]
            alpha_reward = (portfolio_return - market_return) * 100  # Alpha reward
        else:
            alpha_reward = 0
        
        # Penalidade por transa√ß√£o
        transaction_penalty = transaction_cost
        
        # Recompensa final
        reward = portfolio_return * 1000 + alpha_reward - transaction_penalty
        
        # Registrar hist√≥rico
        self.portfolio_history.append(portfolio_after)
        self.action_history.append(action)
        
        # Pr√≥ximo step
        self.current_step += 1
        done = self.current_step >= len(self.prices) - 1
        
        next_state = self._get_state() if not done else None
        
        info = {
            'portfolio_value': portfolio_after,
            'cash': self.cash,
            'shares': self.shares,
            'current_price': current_price,
            'action_executed': action_executed,
            'transaction_cost': transaction_cost,
            'alpha_reward': alpha_reward
        }
        
        return next_state, reward, done, info
    
    def get_performance_metrics(self):
        """Calcula m√©tricas de performance"""
        if not self.portfolio_history:
            return None
            
        returns = np.diff(self.portfolio_history) / self.portfolio_history[:-1]
        
        total_return = (self.portfolio_history[-1] - self.initial_capital) / self.initial_capital
        
        # Sharpe ratio
        if len(returns) > 1 and np.std(returns) > 0:
            sharpe = np.mean(returns) / np.std(returns) * np.sqrt(252)  # Anualizado
        else:
            sharpe = 0
        
        # Maximum drawdown
        peak = np.maximum.accumulate(self.portfolio_history)
        drawdown = (self.portfolio_history - peak) / peak
        max_drawdown = np.min(drawdown)
        
        return {
            'total_return': total_return,
            'sharpe_ratio': sharpe,
            'max_drawdown': max_drawdown,
            'final_value': self.portfolio_history[-1],
            'num_trades': len([a for a in self.action_history if a != IronManActions.HOLD])
        }

# Inicializar ambiente Iron Man
if df_ironman is not None:
    ironman_env = IronManTradingEnvironment(
        feature_matrix, 
        prices_ironman, 
        INITIAL_CAPITAL, 
        WINDOW_SIZE
    )
    
    print("üèóÔ∏è Ambiente Iron Man inicializado!")
    print(f"   üéØ Estado size: {STATE_SIZE}")
    print(f"   üìä Features: {feature_matrix.shape[1]}")
    print(f"   üí∞ Capital: R$ {INITIAL_CAPITAL:,.2f}")
    print(f"   üìà Dados: {len(prices_ironman)} dias")
    
    # Testar estado
    test_state = ironman_env.reset()
    print(f"   üß™ Estado teste: shape {test_state.shape}, range [{test_state.min():.3f}, {test_state.max():.3f}]")
    
else:
    print("‚ùå Ambiente Iron Man n√£o pode ser inicializado!")

## ü§ñ AGENTE DQN IRON MAN

Agente avan√ßado com Deep Q-Learning, Experience Replay e Target Network.

In [None]:
# ü§ñ Agente DQN Iron Man (Estado da Arte)
class IronManDQNAgent:
    """
    Agente Deep Q-Learning avan√ßado com todas as t√©cnicas modernas
    """
    def __init__(self, state_size=STATE_SIZE, action_size=3, lr=LEARNING_RATE):
        self.state_size = state_size
        self.action_size = action_size
        self.lr = lr
        
        # Par√¢metros de explora√ß√£o
        self.epsilon = EPSILON_START
        self.epsilon_min = EPSILON_MIN
        self.epsilon_decay = EPSILON_DECAY
        
        # Redes neurais
        self.main_net = IronManDQN(state_size, HIDDEN_SIZE, action_size).to(device)
        self.target_net = IronManDQN(state_size, HIDDEN_SIZE, action_size).to(device)
        self.target_net.load_state_dict(self.main_net.state_dict())
        
        # Otimizador
        self.optimizer = optim.Adam(self.main_net.parameters(), lr=lr)
        
        # Experience replay
        self.memory = IronManReplayBuffer(MEMORY_SIZE)
        
        # Estat√≠sticas
        self.losses = []
        self.episode_rewards = []
        self.episode_returns = []
        self.q_values_history = []
        
        print(f"ü§ñ Agente Iron Man DQN inicializado!")
        print(f"   üß† Arquitetura: {state_size} ‚Üí {HIDDEN_SIZE} ‚Üí {action_size}")
        print(f"   ‚ö° Device: {device}")
        print(f"   üéØ Epsilon: {self.epsilon}")
        
    def get_action(self, state, training=True):
        """
        Seleciona a√ß√£o usando Œµ-greedy com rede neural
        """
        if training and random.random() < self.epsilon:
            # Explora√ß√£o
            return random.choice(range(self.action_size)), True
        
        # Exploita√ß√£o usando rede neural
        state_tensor = torch.FloatTensor(state).unsqueeze(0).to(device)
        
        with torch.no_grad():
            q_values = self.main_net(state_tensor)
            action = q_values.argmax().item()
            
        # Registrar Q-values para an√°lise
        if training:
            self.q_values_history.append(q_values.cpu().numpy().flatten())
            
        return action, False
    
    def remember(self, state, action, reward, next_state, done):
        """Armazena experi√™ncia no buffer"""
        self.memory.push(state, action, reward, next_state, done)
    
    def replay(self):
        """
        Treina a rede usando batch de experi√™ncias (Experience Replay)
        """
        if len(self.memory) < BATCH_SIZE:
            return None
            
        # Sample batch
        experiences = self.memory.sample(BATCH_SIZE)
        
        states = torch.FloatTensor([e.state for e in experiences]).to(device)
        actions = torch.LongTensor([e.action for e in experiences]).to(device)
        rewards = torch.FloatTensor([e.reward for e in experiences]).to(device)
        next_states = torch.FloatTensor([e.next_state if not e.done else np.zeros(self.state_size) for e in experiences]).to(device)
        dones = torch.BoolTensor([e.done for e in experiences]).to(device)
        
        # Q-values atuais
        current_q_values = self.main_net(states).gather(1, actions.unsqueeze(1))
        
        # Q-values do pr√≥ximo estado (usando target network)
        with torch.no_grad():
            next_q_values = self.target_net(next_states).max(1)[0]
            target_q_values = rewards + (GAMMA * next_q_values * ~dones)
        
        # Loss (Huber loss para estabilidade)
        loss = F.smooth_l1_loss(current_q_values.squeeze(), target_q_values)
        
        # Backpropagation
        self.optimizer.zero_grad()
        loss.backward()
        
        # Gradient clipping para estabilidade
        torch.nn.utils.clip_grad_norm_(self.main_net.parameters(), 1.0)
        
        self.optimizer.step()
        
        # Decay epsilon
        if self.epsilon > self.epsilon_min:
            self.epsilon *= self.epsilon_decay
        
        self.losses.append(loss.item())
        return loss.item()
    
    def update_target_network(self):
        """Atualiza target network"""
        self.target_net.load_state_dict(self.main_net.state_dict())
    
    def train_episode(self, env):
        """Treina um epis√≥dio completo"""
        state = env.reset()
        episode_reward = 0
        episode_loss = 0
        loss_count = 0
        
        while True:
            # Selecionar a√ß√£o
            action, is_exploration = self.get_action(state, training=True)
            
            # Executar a√ß√£o
            next_state, reward, done, info = env.step(action)
            
            # Armazenar experi√™ncia
            self.remember(state, action, reward, next_state, done)
            
            # Treinar se temos experi√™ncias suficientes
            if len(self.memory) >= BATCH_SIZE:
                loss = self.replay()
                if loss is not None:
                    episode_loss += loss
                    loss_count += 1
            
            episode_reward += reward
            
            if done:
                break
                
            state = next_state
        
        # Calcular m√©tricas do epis√≥dio
        performance = env.get_performance_metrics()
        
        # Registrar estat√≠sticas
        self.episode_rewards.append(episode_reward)
        if performance:
            self.episode_returns.append(performance['total_return'])
        
        return {
            'episode_reward': episode_reward,
            'episode_loss': episode_loss / max(1, loss_count),
            'epsilon': self.epsilon,
            'performance': performance,
            'memory_size': len(self.memory)
        }
    
    def get_training_stats(self, window=100):
        """Retorna estat√≠sticas de treinamento"""
        if not self.episode_rewards:
            return None
            
        recent_rewards = self.episode_rewards[-window:]
        recent_returns = self.episode_returns[-window:] if self.episode_returns else [0]
        recent_losses = self.losses[-window:] if self.losses else [0]
        
        return {
            'episodes': len(self.episode_rewards),
            'avg_reward': np.mean(recent_rewards),
            'avg_return': np.mean(recent_returns),
            'avg_loss': np.mean(recent_losses),
            'epsilon': self.epsilon,
            'memory_size': len(self.memory)
        }

# Inicializar agente Iron Man
if df_ironman is not None:
    ironman_agent = IronManDQNAgent(STATE_SIZE, len(IronManActions.get_actions()), LEARNING_RATE)
    
    print("‚úÖ Agente Iron Man DQN pronto!")
    print(f"   üß† Par√¢metros: {sum(p.numel() for p in ironman_agent.main_net.parameters()):,}")
    print(f"   üíæ Memory capacity: {MEMORY_SIZE:,}")
    print(f"   üéØ Target update: cada {TARGET_UPDATE} epis√≥dios")
    
else:
    print("‚ùå Agente Iron Man n√£o pode ser inicializado!")

## üöÄ TREINAMENTO IRON MAN

Treinamento avan√ßado com monitoramento em tempo real e otimiza√ß√µes modernas.

In [None]:
# üöÄ Treinamento Avan√ßado Iron Man
def train_ironman_agent(agent, env, num_episodes=NUM_EPISODES, print_every=200):
    """
    Treinamento avan√ßado com todas as t√©cnicas modernas
    """
    print(f"üöÄ Iniciando treinamento Iron Man DQN - {num_episodes} epis√≥dios")
    print(f"üß† Arquitetura: {agent.state_size} inputs ‚Üí {HIDDEN_SIZE} hidden ‚Üí {agent.action_size} outputs")
    print(f"üíæ Experience replay: {MEMORY_SIZE:,} experi√™ncias")
    print(f"üéØ Target network update: cada {TARGET_UPDATE} epis√≥dios")
    print("=" * 70)
    
    training_history = {
        'episodes': [],
        'avg_reward': [],
        'avg_return': [],
        'avg_loss': [],
        'epsilon': [],
        'memory_size': [],
        'sharpe_ratio': [],
        'max_drawdown': []
    }
    
    best_performance = -float('inf')
    
    for episode in range(1, num_episodes + 1):
        # Treinar epis√≥dio
        episode_info = agent.train_episode(env)
        
        # Atualizar target network
        if episode % TARGET_UPDATE == 0:
            agent.update_target_network()
        
        # Relat√≥rio peri√≥dico
        if episode % print_every == 0 or episode <= WARMUP_EPISODES:
            stats = agent.get_training_stats()
            performance = episode_info.get('performance', {})
            
            print(f"üöÄ Epis√≥dio {episode}/{num_episodes}")
            print(f"   üí∞ Reward m√©dio: {stats['avg_reward']:+.2f}")
            print(f"   üìä Retorno m√©dio: {stats['avg_return']:+.2%}")
            print(f"   üß† Loss m√©dio: {stats['avg_loss']:.4f}")
            print(f"   üîç Epsilon: {stats['epsilon']:.3f}")
            print(f"   üíæ Memory: {stats['memory_size']:,}/{MEMORY_SIZE:,}")
            
            if performance:
                print(f"   üìà √öltimo retorno: {performance['total_return']:+.2%}")
                print(f"   ‚ö° Sharpe ratio: {performance['sharpe_ratio']:.3f}")
                print(f"   üìâ Max drawdown: {performance['max_drawdown']:+.2%}")
                print(f"   üîÑ Trades: {performance['num_trades']}")
                
                # Salvar melhor modelo
                if performance['total_return'] > best_performance:
                    best_performance = performance['total_return']
                    print(f"   üèÜ NOVO RECORD: {best_performance:+.2%}!")
            
            print("-" * 50)
            
            # Salvar hist√≥rico
            training_history['episodes'].append(episode)
            training_history['avg_reward'].append(stats['avg_reward'])
            training_history['avg_return'].append(stats['avg_return'])
            training_history['avg_loss'].append(stats['avg_loss'])
            training_history['epsilon'].append(stats['epsilon'])
            training_history['memory_size'].append(stats['memory_size'])
            
            if performance:
                training_history['sharpe_ratio'].append(performance['sharpe_ratio'])
                training_history['max_drawdown'].append(performance['max_drawdown'])
            else:
                training_history['sharpe_ratio'].append(0)
                training_history['max_drawdown'].append(0)
    
    print("‚úÖ Treinamento Iron Man conclu√≠do!")
    final_stats = agent.get_training_stats()
    print(f"üìä Estat√≠sticas finais:")
    print(f"   üß† Par√¢metros da rede: {sum(p.numel() for p in agent.main_net.parameters()):,}")
    print(f"   üíæ Experi√™ncias coletadas: {final_stats['memory_size']:,}")
    print(f"   üéØ Epsilon final: {final_stats['epsilon']:.4f}")
    print(f"   üí∞ Reward m√©dio final: {final_stats['avg_reward']:+.2f}")
    print(f"   üìà Retorno m√©dio final: {final_stats['avg_return']:+.2%}")
    print(f"   üèÜ Melhor performance: {best_performance:+.2%}")
    
    return training_history

# Executar treinamento Iron Man
if df_ironman is not None and 'ironman_agent' in locals():
    print(f"üöÄ Iniciando treinamento Iron Man para {TICKER_SYMBOL}")
    print(f"üßÆ Usando {device}")
    
    # Treinamento principal
    ironman_training_history = train_ironman_agent(
        ironman_agent, 
        ironman_env, 
        NUM_EPISODES, 
        print_every=250
    )
    
else:
    print("‚ùå Componentes Iron Man n√£o dispon√≠veis para treinamento!")
    print("üí° Verifique se PyTorch est√° instalado: pip install torch")

## üìä AVALIA√á√ÉO E AN√ÅLISE IRON MAN

Avalia√ß√£o completa com m√©tricas avan√ßadas e visualiza√ß√µes modernas.

In [None]:
# üìä Avalia√ß√£o Completa Iron Man
def evaluate_ironman_agent(agent, env, num_episodes=50):
    """
    Avalia√ß√£o avan√ßada com m√©tricas de trading profissionais
    """
    print("üß™ Avaliando agente Iron Man DQN...")
    
    test_results = []
    portfolio_curves = []
    
    # Desativar explora√ß√£o para teste
    original_epsilon = agent.epsilon
    agent.epsilon = 0.0
    
    for episode in range(num_episodes):
        state = env.reset()
        episode_portfolio = [env.initial_capital]
        
        while True:
            action, _ = agent.get_action(state, training=False)
            next_state, reward, done, info = env.step(action)
            
            episode_portfolio.append(info['portfolio_value'])
            
            if done:
                break
            state = next_state
        
        performance = env.get_performance_metrics()
        if performance:
            test_results.append(performance)
            portfolio_curves.append(episode_portfolio)
    
    # Restaurar epsilon
    agent.epsilon = original_epsilon
    
    # An√°lise dos resultados
    if test_results:
        returns = [r['total_return'] for r in test_results]
        sharpes = [r['sharpe_ratio'] for r in test_results]
        drawdowns = [r['max_drawdown'] for r in test_results]
        
        avg_return = np.mean(returns)
        avg_sharpe = np.mean(sharpes)
        avg_drawdown = np.mean(drawdowns)
        win_rate = len([r for r in returns if r > 0]) / len(returns)
        volatility = np.std(returns)
        
        # Buy & Hold comparison
        buy_hold_return = (prices_ironman[-1] - prices_ironman[env.window_size]) / prices_ironman[env.window_size]
        
        print(f"üìà Resultados Iron Man DQN ({num_episodes} epis√≥dios):")
        print(f"   üí∞ Retorno m√©dio: {avg_return:+.2%}")
        print(f"   ‚ö° Sharpe ratio: {avg_sharpe:.3f}")
        print(f"   üìâ Max drawdown: {avg_drawdown:+.2%}")
        print(f"   üéØ Taxa de sucesso: {win_rate:.1%}")
        print(f"   üìä Volatilidade: {volatility:.2%}")
        print(f"   üìà Buy & Hold: {buy_hold_return:+.2%}")
        print(f"   üèÜ Alpha vs B&H: {avg_return - buy_hold_return:+.2%}")
        
        # Information Ratio
        if volatility > 0:
            info_ratio = (avg_return - buy_hold_return) / volatility
            print(f"   üìä Information Ratio: {info_ratio:.3f}")
        
        return {
            'avg_return': avg_return,
            'avg_sharpe': avg_sharpe,
            'avg_drawdown': avg_drawdown,
            'win_rate': win_rate,
            'volatility': volatility,
            'buy_hold_return': buy_hold_return,
            'alpha': avg_return - buy_hold_return,
            'test_results': test_results,
            'portfolio_curves': portfolio_curves
        }
    
    return None

def plot_ironman_results(training_history, evaluation_results):
    """
    Visualiza√ß√µes avan√ßadas dos resultados Iron Man
    """
    plt.style.use('default')
    fig, axes = plt.subplots(3, 2, figsize=(16, 12))
    fig.suptitle(f'ü§ñ Iron Man DQN Results - {TICKER_SYMBOL}', fontsize=16, fontweight='bold')
    
    # 1. Learning Curve (Retorno)
    episodes = training_history['episodes']
    axes[0,0].plot(episodes, training_history['avg_return'], 'b-', linewidth=2, label='DQN Return')
    axes[0,0].axhline(y=0, color='r', linestyle='--', alpha=0.5)
    if evaluation_results:
        axes[0,0].axhline(y=evaluation_results['buy_hold_return'], color='orange', linestyle='--', label='Buy & Hold')
    axes[0,0].set_title('Evolu√ß√£o do Retorno (Treinamento)')
    axes[0,0].set_xlabel('Epis√≥dio')
    axes[0,0].set_ylabel('Retorno M√©dio')
    axes[0,0].legend()
    axes[0,0].grid(True, alpha=0.3)
    
    # 2. Loss Function
    axes[0,1].plot(episodes, training_history['avg_loss'], 'r-', linewidth=2)
    axes[0,1].set_title('Loss Function (DQN)')
    axes[0,1].set_xlabel('Epis√≥dio')
    axes[0,1].set_ylabel('Loss M√©dio')
    axes[0,1].grid(True, alpha=0.3)
    
    # 3. Epsilon Decay
    axes[1,0].plot(episodes, training_history['epsilon'], 'g-', linewidth=2)
    axes[1,0].set_title('Exploration Decay (Epsilon)')
    axes[1,0].set_xlabel('Epis√≥dio')
    axes[1,0].set_ylabel('Epsilon')
    axes[1,0].grid(True, alpha=0.3)
    
    # 4. Sharpe Ratio Evolution
    axes[1,1].plot(episodes, training_history['sharpe_ratio'], 'purple', linewidth=2)
    axes[1,1].axhline(y=0, color='r', linestyle='--', alpha=0.5)
    axes[1,1].set_title('Sharpe Ratio Evolution')
    axes[1,1].set_xlabel('Epis√≥dio')
    axes[1,1].set_ylabel('Sharpe Ratio')
    axes[1,1].grid(True, alpha=0.3)
    
    # 5. Memory Usage
    axes[2,0].plot(episodes, training_history['memory_size'], 'brown', linewidth=2)
    axes[2,0].axhline(y=MEMORY_SIZE, color='r', linestyle='--', alpha=0.5, label='Max Capacity')
    axes[2,0].set_title('Experience Replay Buffer')
    axes[2,0].set_xlabel('Epis√≥dio')
    axes[2,0].set_ylabel('Experi√™ncias Armazenadas')
    axes[2,0].legend()
    axes[2,0].grid(True, alpha=0.3)
    
    # 6. Performance Comparison
    if evaluation_results:
        methods = ['Iron Man DQN', 'Buy & Hold']
        returns = [evaluation_results['avg_return'], evaluation_results['buy_hold_return']]
        colors = ['red', 'orange']
        
        bars = axes[2,1].bar(methods, returns, color=colors, alpha=0.7)
        axes[2,1].set_title('Performance Comparison')
        axes[2,1].set_ylabel('Retorno')
        axes[2,1].axhline(y=0, color='black', linestyle='--', alpha=0.5)
        
        for bar, return_val in zip(bars, returns):
            height = bar.get_height()
            axes[2,1].text(bar.get_x() + bar.get_width()/2., height,
                          f'{return_val:+.2%}',
                          ha='center', va='bottom' if height > 0 else 'top')
    
    plt.tight_layout()
    plt.show()
    
    # Portfolio curves (se dispon√≠vel)
    if evaluation_results and evaluation_results['portfolio_curves']:
        plt.figure(figsize=(12, 6))
        
        # Plotar algumas curvas de portf√≥lio
        curves_to_plot = min(10, len(evaluation_results['portfolio_curves']))
        for i in range(curves_to_plot):
            curve = evaluation_results['portfolio_curves'][i]
            plt.plot(curve, alpha=0.3, color='blue')
        
        # M√©dia das curvas
        avg_curve = np.mean(evaluation_results['portfolio_curves'], axis=0)
        plt.plot(avg_curve, color='red', linewidth=3, label='M√©dia Iron Man')
        
        # Buy & Hold
        buy_hold_curve = [INITIAL_CAPITAL * (1 + evaluation_results['buy_hold_return'] * i / len(avg_curve)) 
                         for i in range(len(avg_curve))]
        plt.plot(buy_hold_curve, color='orange', linewidth=2, linestyle='--', label='Buy & Hold')
        
        plt.axhline(y=INITIAL_CAPITAL, color='black', linestyle=':', alpha=0.5, label='Capital Inicial')
        plt.title('Evolu√ß√£o do Portf√≥lio - Iron Man DQN')
        plt.xlabel('Dias de Trading')
        plt.ylabel('Valor do Portf√≥lio (R$)')
        plt.legend()
        plt.grid(True, alpha=0.3)
        plt.show()

# Executar avalia√ß√£o Iron Man
if df_ironman is not None and 'ironman_training_history' in locals():
    print("üìä Executando avalia√ß√£o completa Iron Man...")
    ironman_evaluation = evaluate_ironman_agent(ironman_agent, ironman_env, num_episodes=30)
    plot_ironman_results(ironman_training_history, ironman_evaluation)
else:
    print("‚ö†Ô∏è Execute primeiro o treinamento para avaliar o agente Iron Man!")

## üîß FLEXIBILIDADE IRON MAN

Sistema completamente flex√≠vel - use com qualquer ativo alterando apenas `TICKER_SYMBOL`!

In [None]:
# üîß Sistema Iron Man - Flexibilidade Total
"""
ü§ñ IRON MAN DQN - GUIA DE USO AVAN√áADO

Para testar com outros ativos:

1Ô∏è‚É£ Altere TICKER_SYMBOL na segunda c√©lula:
   TICKER_SYMBOL = "VALE3.SA"  # ou BRFS3.SA, ITUB4.SA, etc.

2Ô∏è‚É£ Re-execute todas as c√©lulas do notebook

3Ô∏è‚É£ O sistema Iron Man automaticamente:
   ‚úÖ Baixa dados hist√≥ricos do novo ativo
   ‚úÖ Calcula features avan√ßadas (RSI, MACD, Bollinger, etc.)
   ‚úÖ Reconstr√≥i a rede neural DQN
   ‚úÖ Treina o agente com Deep Q-Learning
   ‚úÖ Avalia performance com m√©tricas profissionais
   ‚úÖ Gera visualiza√ß√µes avan√ßadas

üéØ ATIVOS RECOMENDADOS PARA TESTE:
- PETR3.SA, PETR4.SA (Petrobras) - Alta liquidez
- VALE3.SA (Vale) - Commodities
- BRFS3.SA (BRF) - Consumo
- ITUB4.SA (Ita√∫) - Financeiro
- ABEV3.SA (Ambev) - Bebidas
- WEGE3.SA (WEG) - Industrial

‚ö° DIFEREN√áAS vs BATMAN:
‚úÖ Estados cont√≠nuos (vs discretos)
‚úÖ Redes neurais (vs tabela Q)
‚úÖ Experience replay (vs aprendizado direto)
‚úÖ Target networks (vs rede √∫nica)
‚úÖ Features avan√ßadas (vs pre√ßos simples)
‚úÖ M√©tricas profissionais (vs b√°sicas)

üí° REQUISITOS:
- PyTorch instalado: pip install torch
- Mais RAM para redes neurais
- GPU opcional (acelera treinamento)

üîß OTIMIZA√á√ïES DISPON√çVEIS:
- Ajustar HIDDEN_SIZE para complexidade
- Modificar BATCH_SIZE para velocidade
- Alterar LEARNING_RATE para converg√™ncia
- Aumentar MEMORY_SIZE para mais experi√™ncia
"""

def save_ironman_model(agent, filepath="ironman_model.pth"):
    """Salva o modelo treinado"""
    torch.save({
        'main_net_state_dict': agent.main_net.state_dict(),
        'target_net_state_dict': agent.target_net.state_dict(),
        'optimizer_state_dict': agent.optimizer.state_dict(),
        'epsilon': agent.epsilon,
        'training_stats': {
            'episode_rewards': agent.episode_rewards,
            'episode_returns': agent.episode_returns,
            'losses': agent.losses
        }
    }, filepath)
    print(f"üíæ Modelo Iron Man salvo em: {filepath}")

def load_ironman_model(agent, filepath="ironman_model.pth"):
    """Carrega modelo salvo"""
    try:
        checkpoint = torch.load(filepath, map_location=device)
        agent.main_net.load_state_dict(checkpoint['main_net_state_dict'])
        agent.target_net.load_state_dict(checkpoint['target_net_state_dict'])
        agent.optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
        agent.epsilon = checkpoint['epsilon']
        print(f"üì• Modelo Iron Man carregado de: {filepath}")
        return True
    except FileNotFoundError:
        print(f"‚ö†Ô∏è Arquivo n√£o encontrado: {filepath}")
        return False

# Resumo final Iron Man
print("ü§ñ SISTEMA IRON MAN DQN COMPLETO!")
print("=" * 60)
print("‚úÖ Deep Q-Network com PyTorch")
print("‚úÖ Estados cont√≠nuos avan√ßados")
print("‚úÖ Experience Replay Buffer")  
print("‚úÖ Target Network para estabilidade")
print("‚úÖ Features t√©cnicas profissionais")
print("‚úÖ M√©tricas de trading avan√ßadas")
print("‚úÖ Visualiza√ß√µes state-of-the-art")
print("‚úÖ Sistema flex√≠vel para qualquer ativo")
print("‚úÖ Otimiza√ß√µes modernas (batch norm, dropout)")
print("‚úÖ Saving/Loading de modelos")
print("=" * 60)
print(f"üéØ Configurado para: {TICKER_SYMBOL}")
print(f"üß† Arquitetura: {STATE_SIZE} ‚Üí {HIDDEN_SIZE} ‚Üí 3")
print(f"‚ö° Device: {device}")

if 'ironman_agent' in locals():
    print(f"ü§ñ Status: Agente treinado e pronto!")
    print("üí° Para salvar modelo: save_ironman_model(ironman_agent)")
else:
    print("‚ö†Ô∏è Status: Execute as c√©lulas para treinar")

print("\nüöÄ Iron Man tech at your service!")

# ü§ñ IRON MAN APPROACH - Reinforcement Learning Trading

## Estrat√©gia: Inovadora e Tecnol√≥gica

### Filosofia Iron Man
- **Tecnologia de ponta**: Deep Q-Networks (DQN) com redes neurais
- **Abordagem moderna**: Estados cont√≠nuos e approxima√ß√£o de fun√ß√£o
- **Inova√ß√£o constante**: Experience replay, target networks, double DQN
- **Performance focada**: Otimizado para resultados reais de trading
- **Escalabilidade**: Arquitetura preparada para m√∫ltiplos ativos

### Objetivo
Desenvolver um agente de Reinforcement Learning avan√ßado usando **Deep Q-Learning**.
O sistema deve ser **state-of-the-art** e funcionar com qualquer ativo (PETR3, VALE3, BRFS3, etc.).

### Caracter√≠sticas da Implementa√ß√£o
- ‚úÖ Deep Q-Network (DQN) com PyTorch/TensorFlow
- ‚úÖ Estados cont√≠nuos (pre√ßos normalizados)
- ‚úÖ Experience Replay Buffer
- ‚úÖ Target Network para estabilidade
- ‚úÖ Indicadores t√©cnicos como features
- ‚úÖ Arquitetura moderna e escal√°vel
- ‚úÖ Performance otimizada