In [1]:
# 🤖 Importações Iron Man (Tecnologia Avançada)
import yfinance as yf
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from collections import deque, namedtuple
import random
import warnings
warnings.filterwarnings('ignore')

# Verificar GPU disponível
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"🚀 Iron Man Tech Stack carregado!")
print(f"🔧 Device: {device}")
print(f"🧠 PyTorch version: {torch.__version__}")
print(f"💡 Sistema pronto para Deep Q-Learning!")

ModuleNotFoundError: No module named 'torch'

## ⚙️ CONFIGURAÇÃO IRON MAN - Estado da Arte

Sistema avançado com redes neurais para qualquer ativo. Simplesmente altere `TICKER_SYMBOL`!

In [None]:
# 🚀 CONFIGURAÇÃO IRON MAN - Tecnologia de Ponta
TICKER_SYMBOL = "PETR3.SA"    # Flexível para qualquer ativo
PERIOD = "2y"                 # Mais dados para Deep Learning
INITIAL_CAPITAL = 10000.0

# Parâmetros DQN (Iron Man Tech)
STATE_SIZE = 20               # Features do estado (preços + indicadores)
HIDDEN_SIZE = 128             # Neurônios na camada oculta
LEARNING_RATE = 0.001         # Learning rate para Adam
BATCH_SIZE = 64               # Batch size para treinamento
MEMORY_SIZE = 10000           # Tamanho do replay buffer
TARGET_UPDATE = 100           # Frequência de atualização da target network
EPSILON_START = 1.0
EPSILON_MIN = 0.01
EPSILON_DECAY = 0.995
GAMMA = 0.99                  # Discount factor

# Parâmetros de treinamento
NUM_EPISODES = 2000           # Mais episódios para DQN
WARMUP_EPISODES = 100         # Episódios de aquecimento

# Indicadores técnicos (Iron Man usa features avançadas)
WINDOW_SIZE = 10              # Janela para médias móveis
RSI_PERIOD = 14               # Período do RSI

print(f"🤖 Iron Man DQN configurado para: {TICKER_SYMBOL}")
print(f"🧠 Arquitetura: {STATE_SIZE} → {HIDDEN_SIZE} → 3 ações")
print(f"💾 Memory buffer: {MEMORY_SIZE:,} experiências")
print(f"🎯 Episodes: {NUM_EPISODES}")
print(f"⚡ Device: {device}")

## 📊 SISTEMA AVANÇADO DE DADOS IRON MAN

Carregamento inteligente com features avançadas para Deep Learning.

In [None]:
# 🔬 Sistema Avançado de Features Iron Man
def load_advanced_data(ticker_symbol, period="2y"):
    """
    Carrega dados com features avançadas para Deep Learning
    """
    try:
        print(f"🚀 Iron Man carregando dados de {ticker_symbol}...")
        ticker = yf.Ticker(ticker_symbol)
        df = ticker.history(period=period)
        
        if df.empty:
            raise ValueError(f"Dados não encontrados para {ticker_symbol}")
        
        # Features básicas
        df['Returns'] = df['Close'].pct_change()
        df['Log_Returns'] = np.log(df['Close'] / df['Close'].shift(1))
        
        # Médias móveis
        df['SMA_5'] = df['Close'].rolling(5).mean()
        df['SMA_10'] = df['Close'].rolling(10).mean()
        df['SMA_20'] = df['Close'].rolling(20).mean()
        
        # Exponential Moving Average
        df['EMA_12'] = df['Close'].ewm(span=12).mean()
        df['EMA_26'] = df['Close'].ewm(span=26).mean()
        
        # MACD
        df['MACD'] = df['EMA_12'] - df['EMA_26']
        df['MACD_Signal'] = df['MACD'].ewm(span=9).mean()
        
        # Bollinger Bands
        df['BB_Middle'] = df['Close'].rolling(20).mean()
        bb_std = df['Close'].rolling(20).std()
        df['BB_Upper'] = df['BB_Middle'] + (bb_std * 2)
        df['BB_Lower'] = df['BB_Middle'] - (bb_std * 2)
        df['BB_Position'] = (df['Close'] - df['BB_Lower']) / (df['BB_Upper'] - df['BB_Lower'])
        
        # RSI
        delta = df['Close'].diff()
        gain = (delta.where(delta > 0, 0)).rolling(RSI_PERIOD).mean()
        loss = (-delta.where(delta < 0, 0)).rolling(RSI_PERIOD).mean()
        rs = gain / loss
        df['RSI'] = 100 - (100 / (1 + rs))
        
        # Volatilidade
        df['Volatility'] = df['Returns'].rolling(10).std()
        df['ATR'] = df[['High', 'Low', 'Close']].apply(lambda x: x['High'] - x['Low'], axis=1).rolling(14).mean()
        
        # Volume features
        df['Volume_SMA'] = df['Volume'].rolling(10).mean()
        df['Volume_Ratio'] = df['Volume'] / df['Volume_SMA']
        
        # Momentum
        df['Momentum_5'] = df['Close'] / df['Close'].shift(5)
        df['Momentum_10'] = df['Close'] / df['Close'].shift(10)
        
        # Price position
        df['Price_Position'] = (df['Close'] - df['Close'].rolling(20).min()) / (df['Close'].rolling(20).max() - df['Close'].rolling(20).min())
        
        # Remover NaN
        df = df.dropna()
        
        info = ticker.info
        company_name = info.get('longName', ticker_symbol)
        
        print(f"✅ Dados Iron Man carregados: {company_name}")
        print(f"📊 Features: {len(df.columns)} colunas")
        print(f"📅 Período: {df.index[0].date()} até {df.index[-1].date()}")
        print(f"📈 Observações: {len(df)}")
        print(f"💰 Preço atual: R$ {df['Close'].iloc[-1]:.2f}")
        
        return df, info
        
    except Exception as e:
        print(f"❌ Erro Iron Man: {e}")
        return None, None

def create_feature_matrix(df, window_size=WINDOW_SIZE):
    """
    Cria matriz de features para DQN
    """
    feature_columns = [
        'Close', 'Volume', 'Returns', 'SMA_5', 'SMA_10', 'SMA_20',
        'EMA_12', 'EMA_26', 'MACD', 'MACD_Signal', 'BB_Position', 
        'RSI', 'Volatility', 'ATR', 'Volume_Ratio', 'Momentum_5', 
        'Momentum_10', 'Price_Position'
    ]
    
    # Normalizar features (exceto Close que será normalizado por janela)
    df_norm = df.copy()
    for col in feature_columns[1:]:  # Pular Close
        if col in df_norm.columns:
            mean_val = df_norm[col].mean()
            std_val = df_norm[col].std()
            if std_val > 0:
                df_norm[col] = (df_norm[col] - mean_val) / std_val
    
    return df_norm[feature_columns].values, feature_columns

# Carregar dados Iron Man
df_ironman, ironman_info = load_advanced_data(TICKER_SYMBOL, PERIOD)

## 🧠 DEEP Q-NETWORK IRON MAN

Rede neural state-of-the-art com PyTorch para aproximação da função Q.

In [None]:
# 🧠 Deep Q-Network Iron Man (Estado da Arte)
class IronManDQN(nn.Module):
    """
    Rede Neural Avançada Iron Man para Q-Learning
    """
    def __init__(self, state_size, hidden_size=HIDDEN_SIZE, output_size=3):
        super(IronManDQN, self).__init__()
        
        # Arquitetura avançada com dropout e batch normalization
        self.network = nn.Sequential(
            nn.Linear(state_size, hidden_size),
            nn.BatchNorm1d(hidden_size),
            nn.ReLU(),
            nn.Dropout(0.2),
            
            nn.Linear(hidden_size, hidden_size),
            nn.BatchNorm1d(hidden_size), 
            nn.ReLU(),
            nn.Dropout(0.2),
            
            nn.Linear(hidden_size, hidden_size // 2),
            nn.ReLU(),
            
            nn.Linear(hidden_size // 2, output_size)
        )
        
        # Inicialização Xavier
        self.apply(self._init_weights)
    
    def _init_weights(self, module):
        if isinstance(module, nn.Linear):
            nn.init.xavier_uniform_(module.weight)
            nn.init.constant_(module.bias, 0)
    
    def forward(self, x):
        return self.network(x)

# Experience Replay Buffer
Experience = namedtuple('Experience', ['state', 'action', 'reward', 'next_state', 'done'])

class IronManReplayBuffer:
    """
    Buffer de experiências para DQN com priorização
    """
    def __init__(self, capacity=MEMORY_SIZE):
        self.buffer = deque(maxlen=capacity)
        self.capacity = capacity
    
    def push(self, state, action, reward, next_state, done):
        experience = Experience(state, action, reward, next_state, done)
        self.buffer.append(experience)
    
    def sample(self, batch_size=BATCH_SIZE):
        return random.sample(self.buffer, batch_size)
    
    def __len__(self):
        return len(self.buffer)

# Ações Iron Man
class IronManActions:
    HOLD = 0
    BUY = 1
    SELL = 2
    
    @classmethod
    def get_actions(cls):
        return [cls.HOLD, cls.BUY, cls.SELL]
    
    @classmethod
    def action_name(cls, action):
        names = {cls.HOLD: "HOLD", cls.BUY: "BUY", cls.SELL: "SELL"}
        return names.get(action, "UNKNOWN")

# Verificar dados e inicializar componentes
if df_ironman is not None:
    # Criar matriz de features
    feature_matrix, feature_names = create_feature_matrix(df_ironman, WINDOW_SIZE)
    prices_ironman = df_ironman['Close'].values
    
    print(f"🧠 Rede Neural Iron Man:")
    print(f"   🔢 Input size: {STATE_SIZE}")
    print(f"   🏗️ Hidden size: {HIDDEN_SIZE}")
    print(f"   🎯 Output size: 3 ações")
    print(f"   📊 Features disponíveis: {len(feature_names)}")
    print(f"   🖥️ Device: {device}")
    
    # Inicializar redes
    main_net = IronManDQN(STATE_SIZE, HIDDEN_SIZE).to(device)
    target_net = IronManDQN(STATE_SIZE, HIDDEN_SIZE).to(device)
    target_net.load_state_dict(main_net.state_dict())
    
    # Otimizador
    optimizer = optim.Adam(main_net.parameters(), lr=LEARNING_RATE)
    
    # Replay buffer
    replay_buffer = IronManReplayBuffer(MEMORY_SIZE)
    
    print("✅ Componentes Iron Man inicializados!")
    print(f"   🧠 Redes: Main + Target")
    print(f"   💾 Buffer: {MEMORY_SIZE:,} experiências")
    print(f"   ⚙️ Otimizador: Adam (lr={LEARNING_RATE})")
    
else:
    print("❌ Erro: Dados Iron Man não carregados!")

## 🏗️ AMBIENTE AVANÇADO IRON MAN

Ambiente de trading com estados contínuos e features avançadas.

In [None]:
# 🏗️ Ambiente de Trading Avançado Iron Man
class IronManTradingEnvironment:
    """
    Ambiente avançado com estados contínuos e múltiplas features
    """
    def __init__(self, feature_matrix, prices, initial_capital=INITIAL_CAPITAL, window_size=WINDOW_SIZE):
        self.feature_matrix = feature_matrix
        self.prices = prices
        self.initial_capital = initial_capital
        self.window_size = window_size
        self.reset()
        
        # Estatísticas para normalização
        self.price_mean = np.mean(prices)
        self.price_std = np.std(prices)
        
    def reset(self):
        """Reinicia ambiente para novo episódio"""
        self.current_step = self.window_size
        self.cash = self.initial_capital
        self.shares = 0
        self.portfolio_history = []
        self.action_history = []
        
        return self._get_state()
    
    def _get_state(self):
        """
        Cria estado avançado com janela de features
        """
        if self.current_step < self.window_size:
            # Padding para início
            padding = np.zeros((self.window_size - self.current_step - 1, self.feature_matrix.shape[1]))
            window_data = np.vstack([padding, self.feature_matrix[:self.current_step + 1]])
        else:
            window_data = self.feature_matrix[self.current_step - self.window_size + 1:self.current_step + 1]
        
        # Normalizar preços na janela pelo preço atual
        current_price = self.prices[self.current_step]
        price_normalized = window_data[:, 0] / current_price  # Close price é primeira coluna
        
        # Combinar preços normalizados com outras features
        other_features = window_data[:, 1:].flatten()  # Outras features
        
        # Estado final: preços + features + posição atual
        portfolio_value = self.cash + self.shares * current_price
        position_info = np.array([
            self.cash / self.initial_capital,  # Cash ratio
            self.shares * current_price / self.initial_capital,  # Position ratio
            portfolio_value / self.initial_capital,  # Total value ratio
        ])
        
        # Combinar tudo
        state = np.concatenate([
            price_normalized,
            other_features[:STATE_SIZE-self.window_size-3],  # Limitar tamanho
            position_info
        ])
        
        # Garantir tamanho fixo
        if len(state) > STATE_SIZE:
            state = state[:STATE_SIZE]
        elif len(state) < STATE_SIZE:
            padding = np.zeros(STATE_SIZE - len(state))
            state = np.concatenate([state, padding])
        
        return state.astype(np.float32)
    
    def step(self, action):
        """Executa ação e retorna (state, reward, done, info)"""
        current_price = self.prices[self.current_step]
        portfolio_before = self.cash + self.shares * current_price
        
        # Executar ação
        action_executed = False
        transaction_cost = 0
        
        if action == IronManActions.BUY and self.cash >= current_price:
            shares_to_buy = int(self.cash // current_price)  # Comprar máximo possível
            if shares_to_buy > 0:
                self.shares += shares_to_buy
                cost = shares_to_buy * current_price
                transaction_cost = cost * 0.001  # 0.1% de taxa
                self.cash -= (cost + transaction_cost)
                action_executed = True
                
        elif action == IronManActions.SELL and self.shares > 0:
            shares_to_sell = self.shares
            revenue = shares_to_sell * current_price
            transaction_cost = revenue * 0.001  # 0.1% de taxa
            self.cash += (revenue - transaction_cost)
            self.shares = 0
            action_executed = True
        
        # HOLD sempre é válido
        if action == IronManActions.HOLD:
            action_executed = True
        
        # Calcular recompensa avançada
        portfolio_after = self.cash + self.shares * current_price
        
        # Recompensa base: mudança no portfólio
        portfolio_return = (portfolio_after - portfolio_before) / portfolio_before if portfolio_before > 0 else 0
        
        # Recompensa por performance vs mercado
        if self.current_step > 0:
            market_return = (current_price - self.prices[self.current_step - 1]) / self.prices[self.current_step - 1]
            alpha_reward = (portfolio_return - market_return) * 100  # Alpha reward
        else:
            alpha_reward = 0
        
        # Penalidade por transação
        transaction_penalty = transaction_cost
        
        # Recompensa final
        reward = portfolio_return * 1000 + alpha_reward - transaction_penalty
        
        # Registrar histórico
        self.portfolio_history.append(portfolio_after)
        self.action_history.append(action)
        
        # Próximo step
        self.current_step += 1
        done = self.current_step >= len(self.prices) - 1
        
        next_state = self._get_state() if not done else None
        
        info = {
            'portfolio_value': portfolio_after,
            'cash': self.cash,
            'shares': self.shares,
            'current_price': current_price,
            'action_executed': action_executed,
            'transaction_cost': transaction_cost,
            'alpha_reward': alpha_reward
        }
        
        return next_state, reward, done, info
    
    def get_performance_metrics(self):
        """Calcula métricas de performance"""
        if not self.portfolio_history:
            return None
            
        returns = np.diff(self.portfolio_history) / self.portfolio_history[:-1]
        
        total_return = (self.portfolio_history[-1] - self.initial_capital) / self.initial_capital
        
        # Sharpe ratio
        if len(returns) > 1 and np.std(returns) > 0:
            sharpe = np.mean(returns) / np.std(returns) * np.sqrt(252)  # Anualizado
        else:
            sharpe = 0
        
        # Maximum drawdown
        peak = np.maximum.accumulate(self.portfolio_history)
        drawdown = (self.portfolio_history - peak) / peak
        max_drawdown = np.min(drawdown)
        
        return {
            'total_return': total_return,
            'sharpe_ratio': sharpe,
            'max_drawdown': max_drawdown,
            'final_value': self.portfolio_history[-1],
            'num_trades': len([a for a in self.action_history if a != IronManActions.HOLD])
        }

# Inicializar ambiente Iron Man
if df_ironman is not None:
    ironman_env = IronManTradingEnvironment(
        feature_matrix, 
        prices_ironman, 
        INITIAL_CAPITAL, 
        WINDOW_SIZE
    )
    
    print("🏗️ Ambiente Iron Man inicializado!")
    print(f"   🎯 Estado size: {STATE_SIZE}")
    print(f"   📊 Features: {feature_matrix.shape[1]}")
    print(f"   💰 Capital: R$ {INITIAL_CAPITAL:,.2f}")
    print(f"   📈 Dados: {len(prices_ironman)} dias")
    
    # Testar estado
    test_state = ironman_env.reset()
    print(f"   🧪 Estado teste: shape {test_state.shape}, range [{test_state.min():.3f}, {test_state.max():.3f}]")
    
else:
    print("❌ Ambiente Iron Man não pode ser inicializado!")

## 🤖 AGENTE DQN IRON MAN

Agente avançado com Deep Q-Learning, Experience Replay e Target Network.

In [None]:
# 🤖 Agente DQN Iron Man (Estado da Arte)
class IronManDQNAgent:
    """
    Agente Deep Q-Learning avançado com todas as técnicas modernas
    """
    def __init__(self, state_size=STATE_SIZE, action_size=3, lr=LEARNING_RATE):
        self.state_size = state_size
        self.action_size = action_size
        self.lr = lr
        
        # Parâmetros de exploração
        self.epsilon = EPSILON_START
        self.epsilon_min = EPSILON_MIN
        self.epsilon_decay = EPSILON_DECAY
        
        # Redes neurais
        self.main_net = IronManDQN(state_size, HIDDEN_SIZE, action_size).to(device)
        self.target_net = IronManDQN(state_size, HIDDEN_SIZE, action_size).to(device)
        self.target_net.load_state_dict(self.main_net.state_dict())
        
        # Otimizador
        self.optimizer = optim.Adam(self.main_net.parameters(), lr=lr)
        
        # Experience replay
        self.memory = IronManReplayBuffer(MEMORY_SIZE)
        
        # Estatísticas
        self.losses = []
        self.episode_rewards = []
        self.episode_returns = []
        self.q_values_history = []
        
        print(f"🤖 Agente Iron Man DQN inicializado!")
        print(f"   🧠 Arquitetura: {state_size} → {HIDDEN_SIZE} → {action_size}")
        print(f"   ⚡ Device: {device}")
        print(f"   🎯 Epsilon: {self.epsilon}")
        
    def get_action(self, state, training=True):
        """
        Seleciona ação usando ε-greedy com rede neural
        """
        if training and random.random() < self.epsilon:
            # Exploração
            return random.choice(range(self.action_size)), True
        
        # Exploitação usando rede neural
        state_tensor = torch.FloatTensor(state).unsqueeze(0).to(device)
        
        with torch.no_grad():
            q_values = self.main_net(state_tensor)
            action = q_values.argmax().item()
            
        # Registrar Q-values para análise
        if training:
            self.q_values_history.append(q_values.cpu().numpy().flatten())
            
        return action, False
    
    def remember(self, state, action, reward, next_state, done):
        """Armazena experiência no buffer"""
        self.memory.push(state, action, reward, next_state, done)
    
    def replay(self):
        """
        Treina a rede usando batch de experiências (Experience Replay)
        """
        if len(self.memory) < BATCH_SIZE:
            return None
            
        # Sample batch
        experiences = self.memory.sample(BATCH_SIZE)
        
        states = torch.FloatTensor([e.state for e in experiences]).to(device)
        actions = torch.LongTensor([e.action for e in experiences]).to(device)
        rewards = torch.FloatTensor([e.reward for e in experiences]).to(device)
        next_states = torch.FloatTensor([e.next_state if not e.done else np.zeros(self.state_size) for e in experiences]).to(device)
        dones = torch.BoolTensor([e.done for e in experiences]).to(device)
        
        # Q-values atuais
        current_q_values = self.main_net(states).gather(1, actions.unsqueeze(1))
        
        # Q-values do próximo estado (usando target network)
        with torch.no_grad():
            next_q_values = self.target_net(next_states).max(1)[0]
            target_q_values = rewards + (GAMMA * next_q_values * ~dones)
        
        # Loss (Huber loss para estabilidade)
        loss = F.smooth_l1_loss(current_q_values.squeeze(), target_q_values)
        
        # Backpropagation
        self.optimizer.zero_grad()
        loss.backward()
        
        # Gradient clipping para estabilidade
        torch.nn.utils.clip_grad_norm_(self.main_net.parameters(), 1.0)
        
        self.optimizer.step()
        
        # Decay epsilon
        if self.epsilon > self.epsilon_min:
            self.epsilon *= self.epsilon_decay
        
        self.losses.append(loss.item())
        return loss.item()
    
    def update_target_network(self):
        """Atualiza target network"""
        self.target_net.load_state_dict(self.main_net.state_dict())
    
    def train_episode(self, env):
        """Treina um episódio completo"""
        state = env.reset()
        episode_reward = 0
        episode_loss = 0
        loss_count = 0
        
        while True:
            # Selecionar ação
            action, is_exploration = self.get_action(state, training=True)
            
            # Executar ação
            next_state, reward, done, info = env.step(action)
            
            # Armazenar experiência
            self.remember(state, action, reward, next_state, done)
            
            # Treinar se temos experiências suficientes
            if len(self.memory) >= BATCH_SIZE:
                loss = self.replay()
                if loss is not None:
                    episode_loss += loss
                    loss_count += 1
            
            episode_reward += reward
            
            if done:
                break
                
            state = next_state
        
        # Calcular métricas do episódio
        performance = env.get_performance_metrics()
        
        # Registrar estatísticas
        self.episode_rewards.append(episode_reward)
        if performance:
            self.episode_returns.append(performance['total_return'])
        
        return {
            'episode_reward': episode_reward,
            'episode_loss': episode_loss / max(1, loss_count),
            'epsilon': self.epsilon,
            'performance': performance,
            'memory_size': len(self.memory)
        }
    
    def get_training_stats(self, window=100):
        """Retorna estatísticas de treinamento"""
        if not self.episode_rewards:
            return None
            
        recent_rewards = self.episode_rewards[-window:]
        recent_returns = self.episode_returns[-window:] if self.episode_returns else [0]
        recent_losses = self.losses[-window:] if self.losses else [0]
        
        return {
            'episodes': len(self.episode_rewards),
            'avg_reward': np.mean(recent_rewards),
            'avg_return': np.mean(recent_returns),
            'avg_loss': np.mean(recent_losses),
            'epsilon': self.epsilon,
            'memory_size': len(self.memory)
        }

# Inicializar agente Iron Man
if df_ironman is not None:
    ironman_agent = IronManDQNAgent(STATE_SIZE, len(IronManActions.get_actions()), LEARNING_RATE)
    
    print("✅ Agente Iron Man DQN pronto!")
    print(f"   🧠 Parâmetros: {sum(p.numel() for p in ironman_agent.main_net.parameters()):,}")
    print(f"   💾 Memory capacity: {MEMORY_SIZE:,}")
    print(f"   🎯 Target update: cada {TARGET_UPDATE} episódios")
    
else:
    print("❌ Agente Iron Man não pode ser inicializado!")

## 🚀 TREINAMENTO IRON MAN

Treinamento avançado com monitoramento em tempo real e otimizações modernas.

In [None]:
# 🚀 Treinamento Avançado Iron Man
def train_ironman_agent(agent, env, num_episodes=NUM_EPISODES, print_every=200):
    """
    Treinamento avançado com todas as técnicas modernas
    """
    print(f"🚀 Iniciando treinamento Iron Man DQN - {num_episodes} episódios")
    print(f"🧠 Arquitetura: {agent.state_size} inputs → {HIDDEN_SIZE} hidden → {agent.action_size} outputs")
    print(f"💾 Experience replay: {MEMORY_SIZE:,} experiências")
    print(f"🎯 Target network update: cada {TARGET_UPDATE} episódios")
    print("=" * 70)
    
    training_history = {
        'episodes': [],
        'avg_reward': [],
        'avg_return': [],
        'avg_loss': [],
        'epsilon': [],
        'memory_size': [],
        'sharpe_ratio': [],
        'max_drawdown': []
    }
    
    best_performance = -float('inf')
    
    for episode in range(1, num_episodes + 1):
        # Treinar episódio
        episode_info = agent.train_episode(env)
        
        # Atualizar target network
        if episode % TARGET_UPDATE == 0:
            agent.update_target_network()
        
        # Relatório periódico
        if episode % print_every == 0 or episode <= WARMUP_EPISODES:
            stats = agent.get_training_stats()
            performance = episode_info.get('performance', {})
            
            print(f"🚀 Episódio {episode}/{num_episodes}")
            print(f"   💰 Reward médio: {stats['avg_reward']:+.2f}")
            print(f"   📊 Retorno médio: {stats['avg_return']:+.2%}")
            print(f"   🧠 Loss médio: {stats['avg_loss']:.4f}")
            print(f"   🔍 Epsilon: {stats['epsilon']:.3f}")
            print(f"   💾 Memory: {stats['memory_size']:,}/{MEMORY_SIZE:,}")
            
            if performance:
                print(f"   📈 Último retorno: {performance['total_return']:+.2%}")
                print(f"   ⚡ Sharpe ratio: {performance['sharpe_ratio']:.3f}")
                print(f"   📉 Max drawdown: {performance['max_drawdown']:+.2%}")
                print(f"   🔄 Trades: {performance['num_trades']}")
                
                # Salvar melhor modelo
                if performance['total_return'] > best_performance:
                    best_performance = performance['total_return']
                    print(f"   🏆 NOVO RECORD: {best_performance:+.2%}!")
            
            print("-" * 50)
            
            # Salvar histórico
            training_history['episodes'].append(episode)
            training_history['avg_reward'].append(stats['avg_reward'])
            training_history['avg_return'].append(stats['avg_return'])
            training_history['avg_loss'].append(stats['avg_loss'])
            training_history['epsilon'].append(stats['epsilon'])
            training_history['memory_size'].append(stats['memory_size'])
            
            if performance:
                training_history['sharpe_ratio'].append(performance['sharpe_ratio'])
                training_history['max_drawdown'].append(performance['max_drawdown'])
            else:
                training_history['sharpe_ratio'].append(0)
                training_history['max_drawdown'].append(0)
    
    print("✅ Treinamento Iron Man concluído!")
    final_stats = agent.get_training_stats()
    print(f"📊 Estatísticas finais:")
    print(f"   🧠 Parâmetros da rede: {sum(p.numel() for p in agent.main_net.parameters()):,}")
    print(f"   💾 Experiências coletadas: {final_stats['memory_size']:,}")
    print(f"   🎯 Epsilon final: {final_stats['epsilon']:.4f}")
    print(f"   💰 Reward médio final: {final_stats['avg_reward']:+.2f}")
    print(f"   📈 Retorno médio final: {final_stats['avg_return']:+.2%}")
    print(f"   🏆 Melhor performance: {best_performance:+.2%}")
    
    return training_history

# Executar treinamento Iron Man
if df_ironman is not None and 'ironman_agent' in locals():
    print(f"🚀 Iniciando treinamento Iron Man para {TICKER_SYMBOL}")
    print(f"🧮 Usando {device}")
    
    # Treinamento principal
    ironman_training_history = train_ironman_agent(
        ironman_agent, 
        ironman_env, 
        NUM_EPISODES, 
        print_every=250
    )
    
else:
    print("❌ Componentes Iron Man não disponíveis para treinamento!")
    print("💡 Verifique se PyTorch está instalado: pip install torch")

## 📊 AVALIAÇÃO E ANÁLISE IRON MAN

Avaliação completa com métricas avançadas e visualizações modernas.

In [None]:
# 📊 Avaliação Completa Iron Man
def evaluate_ironman_agent(agent, env, num_episodes=50):
    """
    Avaliação avançada com métricas de trading profissionais
    """
    print("🧪 Avaliando agente Iron Man DQN...")
    
    test_results = []
    portfolio_curves = []
    
    # Desativar exploração para teste
    original_epsilon = agent.epsilon
    agent.epsilon = 0.0
    
    for episode in range(num_episodes):
        state = env.reset()
        episode_portfolio = [env.initial_capital]
        
        while True:
            action, _ = agent.get_action(state, training=False)
            next_state, reward, done, info = env.step(action)
            
            episode_portfolio.append(info['portfolio_value'])
            
            if done:
                break
            state = next_state
        
        performance = env.get_performance_metrics()
        if performance:
            test_results.append(performance)
            portfolio_curves.append(episode_portfolio)
    
    # Restaurar epsilon
    agent.epsilon = original_epsilon
    
    # Análise dos resultados
    if test_results:
        returns = [r['total_return'] for r in test_results]
        sharpes = [r['sharpe_ratio'] for r in test_results]
        drawdowns = [r['max_drawdown'] for r in test_results]
        
        avg_return = np.mean(returns)
        avg_sharpe = np.mean(sharpes)
        avg_drawdown = np.mean(drawdowns)
        win_rate = len([r for r in returns if r > 0]) / len(returns)
        volatility = np.std(returns)
        
        # Buy & Hold comparison
        buy_hold_return = (prices_ironman[-1] - prices_ironman[env.window_size]) / prices_ironman[env.window_size]
        
        print(f"📈 Resultados Iron Man DQN ({num_episodes} episódios):")
        print(f"   💰 Retorno médio: {avg_return:+.2%}")
        print(f"   ⚡ Sharpe ratio: {avg_sharpe:.3f}")
        print(f"   📉 Max drawdown: {avg_drawdown:+.2%}")
        print(f"   🎯 Taxa de sucesso: {win_rate:.1%}")
        print(f"   📊 Volatilidade: {volatility:.2%}")
        print(f"   📈 Buy & Hold: {buy_hold_return:+.2%}")
        print(f"   🏆 Alpha vs B&H: {avg_return - buy_hold_return:+.2%}")
        
        # Information Ratio
        if volatility > 0:
            info_ratio = (avg_return - buy_hold_return) / volatility
            print(f"   📊 Information Ratio: {info_ratio:.3f}")
        
        return {
            'avg_return': avg_return,
            'avg_sharpe': avg_sharpe,
            'avg_drawdown': avg_drawdown,
            'win_rate': win_rate,
            'volatility': volatility,
            'buy_hold_return': buy_hold_return,
            'alpha': avg_return - buy_hold_return,
            'test_results': test_results,
            'portfolio_curves': portfolio_curves
        }
    
    return None

def plot_ironman_results(training_history, evaluation_results):
    """
    Visualizações avançadas dos resultados Iron Man
    """
    plt.style.use('default')
    fig, axes = plt.subplots(3, 2, figsize=(16, 12))
    fig.suptitle(f'🤖 Iron Man DQN Results - {TICKER_SYMBOL}', fontsize=16, fontweight='bold')
    
    # 1. Learning Curve (Retorno)
    episodes = training_history['episodes']
    axes[0,0].plot(episodes, training_history['avg_return'], 'b-', linewidth=2, label='DQN Return')
    axes[0,0].axhline(y=0, color='r', linestyle='--', alpha=0.5)
    if evaluation_results:
        axes[0,0].axhline(y=evaluation_results['buy_hold_return'], color='orange', linestyle='--', label='Buy & Hold')
    axes[0,0].set_title('Evolução do Retorno (Treinamento)')
    axes[0,0].set_xlabel('Episódio')
    axes[0,0].set_ylabel('Retorno Médio')
    axes[0,0].legend()
    axes[0,0].grid(True, alpha=0.3)
    
    # 2. Loss Function
    axes[0,1].plot(episodes, training_history['avg_loss'], 'r-', linewidth=2)
    axes[0,1].set_title('Loss Function (DQN)')
    axes[0,1].set_xlabel('Episódio')
    axes[0,1].set_ylabel('Loss Médio')
    axes[0,1].grid(True, alpha=0.3)
    
    # 3. Epsilon Decay
    axes[1,0].plot(episodes, training_history['epsilon'], 'g-', linewidth=2)
    axes[1,0].set_title('Exploration Decay (Epsilon)')
    axes[1,0].set_xlabel('Episódio')
    axes[1,0].set_ylabel('Epsilon')
    axes[1,0].grid(True, alpha=0.3)
    
    # 4. Sharpe Ratio Evolution
    axes[1,1].plot(episodes, training_history['sharpe_ratio'], 'purple', linewidth=2)
    axes[1,1].axhline(y=0, color='r', linestyle='--', alpha=0.5)
    axes[1,1].set_title('Sharpe Ratio Evolution')
    axes[1,1].set_xlabel('Episódio')
    axes[1,1].set_ylabel('Sharpe Ratio')
    axes[1,1].grid(True, alpha=0.3)
    
    # 5. Memory Usage
    axes[2,0].plot(episodes, training_history['memory_size'], 'brown', linewidth=2)
    axes[2,0].axhline(y=MEMORY_SIZE, color='r', linestyle='--', alpha=0.5, label='Max Capacity')
    axes[2,0].set_title('Experience Replay Buffer')
    axes[2,0].set_xlabel('Episódio')
    axes[2,0].set_ylabel('Experiências Armazenadas')
    axes[2,0].legend()
    axes[2,0].grid(True, alpha=0.3)
    
    # 6. Performance Comparison
    if evaluation_results:
        methods = ['Iron Man DQN', 'Buy & Hold']
        returns = [evaluation_results['avg_return'], evaluation_results['buy_hold_return']]
        colors = ['red', 'orange']
        
        bars = axes[2,1].bar(methods, returns, color=colors, alpha=0.7)
        axes[2,1].set_title('Performance Comparison')
        axes[2,1].set_ylabel('Retorno')
        axes[2,1].axhline(y=0, color='black', linestyle='--', alpha=0.5)
        
        for bar, return_val in zip(bars, returns):
            height = bar.get_height()
            axes[2,1].text(bar.get_x() + bar.get_width()/2., height,
                          f'{return_val:+.2%}',
                          ha='center', va='bottom' if height > 0 else 'top')
    
    plt.tight_layout()
    plt.show()
    
    # Portfolio curves (se disponível)
    if evaluation_results and evaluation_results['portfolio_curves']:
        plt.figure(figsize=(12, 6))
        
        # Plotar algumas curvas de portfólio
        curves_to_plot = min(10, len(evaluation_results['portfolio_curves']))
        for i in range(curves_to_plot):
            curve = evaluation_results['portfolio_curves'][i]
            plt.plot(curve, alpha=0.3, color='blue')
        
        # Média das curvas
        avg_curve = np.mean(evaluation_results['portfolio_curves'], axis=0)
        plt.plot(avg_curve, color='red', linewidth=3, label='Média Iron Man')
        
        # Buy & Hold
        buy_hold_curve = [INITIAL_CAPITAL * (1 + evaluation_results['buy_hold_return'] * i / len(avg_curve)) 
                         for i in range(len(avg_curve))]
        plt.plot(buy_hold_curve, color='orange', linewidth=2, linestyle='--', label='Buy & Hold')
        
        plt.axhline(y=INITIAL_CAPITAL, color='black', linestyle=':', alpha=0.5, label='Capital Inicial')
        plt.title('Evolução do Portfólio - Iron Man DQN')
        plt.xlabel('Dias de Trading')
        plt.ylabel('Valor do Portfólio (R$)')
        plt.legend()
        plt.grid(True, alpha=0.3)
        plt.show()

# Executar avaliação Iron Man
if df_ironman is not None and 'ironman_training_history' in locals():
    print("📊 Executando avaliação completa Iron Man...")
    ironman_evaluation = evaluate_ironman_agent(ironman_agent, ironman_env, num_episodes=30)
    plot_ironman_results(ironman_training_history, ironman_evaluation)
else:
    print("⚠️ Execute primeiro o treinamento para avaliar o agente Iron Man!")

## 🔧 FLEXIBILIDADE IRON MAN

Sistema completamente flexível - use com qualquer ativo alterando apenas `TICKER_SYMBOL`!

In [None]:
# 🔧 Sistema Iron Man - Flexibilidade Total
"""
🤖 IRON MAN DQN - GUIA DE USO AVANÇADO

Para testar com outros ativos:

1️⃣ Altere TICKER_SYMBOL na segunda célula:
   TICKER_SYMBOL = "VALE3.SA"  # ou BRFS3.SA, ITUB4.SA, etc.

2️⃣ Re-execute todas as células do notebook

3️⃣ O sistema Iron Man automaticamente:
   ✅ Baixa dados históricos do novo ativo
   ✅ Calcula features avançadas (RSI, MACD, Bollinger, etc.)
   ✅ Reconstrói a rede neural DQN
   ✅ Treina o agente com Deep Q-Learning
   ✅ Avalia performance com métricas profissionais
   ✅ Gera visualizações avançadas

🎯 ATIVOS RECOMENDADOS PARA TESTE:
- PETR3.SA, PETR4.SA (Petrobras) - Alta liquidez
- VALE3.SA (Vale) - Commodities
- BRFS3.SA (BRF) - Consumo
- ITUB4.SA (Itaú) - Financeiro
- ABEV3.SA (Ambev) - Bebidas
- WEGE3.SA (WEG) - Industrial

⚡ DIFERENÇAS vs BATMAN:
✅ Estados contínuos (vs discretos)
✅ Redes neurais (vs tabela Q)
✅ Experience replay (vs aprendizado direto)
✅ Target networks (vs rede única)
✅ Features avançadas (vs preços simples)
✅ Métricas profissionais (vs básicas)

💡 REQUISITOS:
- PyTorch instalado: pip install torch
- Mais RAM para redes neurais
- GPU opcional (acelera treinamento)

🔧 OTIMIZAÇÕES DISPONÍVEIS:
- Ajustar HIDDEN_SIZE para complexidade
- Modificar BATCH_SIZE para velocidade
- Alterar LEARNING_RATE para convergência
- Aumentar MEMORY_SIZE para mais experiência
"""

def save_ironman_model(agent, filepath="ironman_model.pth"):
    """Salva o modelo treinado"""
    torch.save({
        'main_net_state_dict': agent.main_net.state_dict(),
        'target_net_state_dict': agent.target_net.state_dict(),
        'optimizer_state_dict': agent.optimizer.state_dict(),
        'epsilon': agent.epsilon,
        'training_stats': {
            'episode_rewards': agent.episode_rewards,
            'episode_returns': agent.episode_returns,
            'losses': agent.losses
        }
    }, filepath)
    print(f"💾 Modelo Iron Man salvo em: {filepath}")

def load_ironman_model(agent, filepath="ironman_model.pth"):
    """Carrega modelo salvo"""
    try:
        checkpoint = torch.load(filepath, map_location=device)
        agent.main_net.load_state_dict(checkpoint['main_net_state_dict'])
        agent.target_net.load_state_dict(checkpoint['target_net_state_dict'])
        agent.optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
        agent.epsilon = checkpoint['epsilon']
        print(f"📥 Modelo Iron Man carregado de: {filepath}")
        return True
    except FileNotFoundError:
        print(f"⚠️ Arquivo não encontrado: {filepath}")
        return False

# Resumo final Iron Man
print("🤖 SISTEMA IRON MAN DQN COMPLETO!")
print("=" * 60)
print("✅ Deep Q-Network com PyTorch")
print("✅ Estados contínuos avançados")
print("✅ Experience Replay Buffer")  
print("✅ Target Network para estabilidade")
print("✅ Features técnicas profissionais")
print("✅ Métricas de trading avançadas")
print("✅ Visualizações state-of-the-art")
print("✅ Sistema flexível para qualquer ativo")
print("✅ Otimizações modernas (batch norm, dropout)")
print("✅ Saving/Loading de modelos")
print("=" * 60)
print(f"🎯 Configurado para: {TICKER_SYMBOL}")
print(f"🧠 Arquitetura: {STATE_SIZE} → {HIDDEN_SIZE} → 3")
print(f"⚡ Device: {device}")

if 'ironman_agent' in locals():
    print(f"🤖 Status: Agente treinado e pronto!")
    print("💡 Para salvar modelo: save_ironman_model(ironman_agent)")
else:
    print("⚠️ Status: Execute as células para treinar")

print("\n🚀 Iron Man tech at your service!")

# 🤖 IRON MAN APPROACH - Reinforcement Learning Trading

## Estratégia: Inovadora e Tecnológica

### Filosofia Iron Man
- **Tecnologia de ponta**: Deep Q-Networks (DQN) com redes neurais
- **Abordagem moderna**: Estados contínuos e approximação de função
- **Inovação constante**: Experience replay, target networks, double DQN
- **Performance focada**: Otimizado para resultados reais de trading
- **Escalabilidade**: Arquitetura preparada para múltiplos ativos

### Objetivo
Desenvolver um agente de Reinforcement Learning avançado usando **Deep Q-Learning**.
O sistema deve ser **state-of-the-art** e funcionar com qualquer ativo (PETR3, VALE3, BRFS3, etc.).

### Características da Implementação
- ✅ Deep Q-Network (DQN) com PyTorch/TensorFlow
- ✅ Estados contínuos (preços normalizados)
- ✅ Experience Replay Buffer
- ✅ Target Network para estabilidade
- ✅ Indicadores técnicos como features
- ✅ Arquitetura moderna e escalável
- ✅ Performance otimizada