In [None]:
# CELL 1: Setup
!pip install -q yfinance lightgbm torch numpy pandas
import numpy as np, pandas as pd, yfinance as yf, torch, torch.nn as nn
from datetime import datetime, timedelta
from collections import deque
import random, json, warnings
warnings.filterwarnings('ignore')
DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu'
print(f'üéÆ Device: {DEVICE}')

In [None]:
# CELL 2: Your 50 Tickers
TICKERS = ['APLD','SERV','MRVL','HOOD','LUNR','BAC','QCOM','UUUU','TSLA','AMD',
           'NOW','NVDA','MU','PG','DLB','XME','KRYS','LEU','QTUM','SPY',
           'UNH','WMT','OKLO','RXRX','MTZ','SNOW','GRRR','BSX','LLY','VOO',
           'GEO','CXW','LYFT','MNDY','BA','LAC','INTC','ALK','LMT','CRDO',
           'ANET','META','RIVN','GOOGL','HL','TEM','TDOC','KMTS','SCHA','B']
print(f'üìä {len(TICKERS)} tickers loaded')

In [None]:
# CELL 3: Download ALL historical data (2015-now)
print('üì• Downloading 10 years of data...')
ALL_DATA = {}
for t in TICKERS:
    try:
        df = yf.download(t, start='2015-01-01', progress=False)
        if len(df) > 500:
            if isinstance(df.columns, pd.MultiIndex): df.columns = df.columns.get_level_values(0)
            ALL_DATA[t] = df
    except: pass
print(f'‚úÖ {len(ALL_DATA)} tickers ready')

In [None]:
# CELL 4: Trading Environment V4 - ALPHAGO MINDSET
# Never get stuck. Losses = Learning. Save cash for dips. Lock in what works.

class StrategyDNA:
    """IMMUTABLE PATTERNS - The Secret Sauce (Proven through backtesting)"""
    
    # DIP BUY - Where BIG WINS come from
    DIP_BUY = {'min_drop': -0.08, 'max_rsi': 35, 'min_vol': 1.3}
    
    # PROFIT TAKE - Like your HOOD trade (8%!)
    PROFIT_TAKE = {'min_gain': 0.05, 'target': 0.08, 'max_rsi': 70}
    
    # CUT LOSS - Never let losers run
    CUT_LOSS = {'max_loss': -0.05, 'max_days': 3}
    
    # CASH MANAGEMENT - Always ready for opportunities
    CASH = {'min_reserve': 0.20, 'max_position': 0.15, 'dip_deploy': 0.25}


class TradingEnv:
    """
    AlphaGo Trading Environment - KEY PRINCIPLES:
    1. Losses are LEARNING opportunities, not failures
    2. Cash preservation = Future opportunity
    3. Never get stuck in losing positions
    4. Adapt strategy based on what's working
    """
    
    def __init__(self, data, cash=10000):
        self.data = data
        self.start_cash = cash
        self.dna = StrategyDNA
        self.reset()
    
    def reset(self):
        self.cash = self.start_cash
        self.positions = {}
        self.day_trades = deque(maxlen=5)
        self.day = 60
        self.history = []
        
        # Performance tracking for adaptation
        self.wins = 0
        self.losses = 0
        self.total_trades = 0
        self.losing_streak = 0
        self.recent_actions = deque(maxlen=20)
        
        # Learning state
        self.pattern_hits = {'dip_buy': 0, 'profit_take': 0, 'cut_loss': 0}
        self.pattern_success = {'dip_buy': 0, 'profit_take': 0, 'cut_loss': 0}
        
        return self._get_state()
    
    def _get_state(self):
        """Rich state with AlphaGo features"""
        states = []
        for t, df in self.data.items():
            if self.day >= len(df): continue
            
            # Price data
            c = df['Close'].iloc[max(0,self.day-20):self.day+1].values
            v = df['Volume'].iloc[max(0,self.day-20):self.day+1].values
            h = df['High'].iloc[max(0,self.day-20):self.day+1].values
            l = df['Low'].iloc[max(0,self.day-20):self.day+1].values
            if len(c) < 10: continue
            
            # Basic features
            ret5 = (c[-1]/c[-6]-1) if len(c)>5 else 0
            ret10 = (c[-1]/c[-11]-1) if len(c)>10 else 0
            vol_r = v[-1]/(np.mean(v)+1) if len(v)>1 else 1
            rsi = self._rsi(c)
            
            # DIP DETECTION - This is where BIG WINS come from
            high_20 = max(c)
            low_20 = min(c)
            drawdown = (c[-1] - high_20) / high_20
            recovery = (c[-1] - low_20) / (high_20 - low_20 + 0.001)
            
            # VOLATILITY - Squeeze before explosion
            atr = np.mean(h[-14:] - l[-14:]) if len(h) >= 14 else 0
            atr_pct = atr / c[-1] if c[-1] > 0 else 0
            vol_squeeze = 1 if atr_pct < 0.02 else 0
            vol_spike = 1 if vol_r > 2.0 else 0
            
            # MOMENTUM QUALITY
            if len(c) >= 10:
                mom_5 = c[-1] - c[-6]
                mom_10 = c[-6] - c[-11] if len(c) > 10 else 0
                momentum_accel = 1 if mom_5 > mom_10 else -1
            else:
                momentum_accel = 0
            
            # SECRET SAUCE SIGNALS - Encoded from proven patterns
            dip_buy_signal = 1 if (drawdown < self.dna.DIP_BUY['min_drop'] and 
                                   rsi < self.dna.DIP_BUY['max_rsi'] and 
                                   vol_r > self.dna.DIP_BUY['min_vol']) else 0
            
            profit_take_signal = 1 if (recovery > 0.85 and 
                                       rsi > self.dna.PROFIT_TAKE['max_rsi']) else 0
            
            # POSITION CONTEXT - Are we in this trade?
            in_position = 1 if t in self.positions else 0
            position_pnl = 0
            days_held = 0
            if in_position:
                pos = self.positions[t]
                position_pnl = (c[-1] / pos['entry'] - 1)
                days_held = self.day - pos['day']
            
            # PORTFOLIO CONTEXT - Cash available for opportunities
            total_value = self.cash + sum(
                pos['shares'] * self.data[tick]['Close'].iloc[min(self.day, len(self.data[tick])-1)]
                for tick, pos in self.positions.items() if tick in self.data
            )
            cash_ratio = self.cash / total_value if total_value > 0 else 1
            
            states.append({
                't': t, 'p': c[-1],
                'r5': ret5, 'r10': ret10,
                'vol': vol_r, 'rsi': rsi,
                'drawdown': drawdown,
                'recovery': recovery,
                'vol_squeeze': vol_squeeze,
                'vol_spike': vol_spike,
                'mom_accel': momentum_accel,
                'dip_buy': dip_buy_signal,
                'profit_take': profit_take_signal,
                'in_position': in_position,
                'position_pnl': position_pnl,
                'days_held': days_held,
                'cash_ratio': cash_ratio,
                'losing_streak': min(self.losing_streak, 5) / 5  # Normalized
            })
        return states
    
    def _rsi(self, c):
        d = np.diff(c)
        g = np.mean(d[d>0]) if len(d[d>0])>0 else 0
        l = abs(np.mean(d[d<0])) if len(d[d<0])>0 else 1e-8
        return 100 - 100/(1+g/l)
    
    def _can_day_trade(self):
        recent = [d for d in self.day_trades if d >= self.day - 5]
        return len(recent) < 3
    
    def step(self, actions):
        """
        Execute actions with AlphaGo reward structure:
        - BIG rewards for following proven patterns
        - PENALTIES for holding losers
        - BONUS for cash preservation during bad times
        """
        reward = 0
        trades_log = []
        
        for act in actions:
            t, a = act['t'], act['a']
            if t not in self.data: continue
            df = self.data[t]
            if self.day >= len(df): continue
            price = df['Close'].iloc[self.day]
            
            state = next((s for s in self._get_state() if s['t'] == t), None)
            if not state: continue
            
            if a == 'buy':
                # Check cash management rules
                max_position_value = self.cash * self.dna.CASH['max_position']
                min_cash_after = self.cash * self.dna.CASH['min_reserve']
                
                if self.cash > price and self.cash - price > min_cash_after:
                    shares = min(int(max_position_value / price), int((self.cash - min_cash_after) / price))
                    
                    if shares > 0:
                        cost = shares * price
                        self.cash -= cost
                        
                        if t in self.positions:
                            old = self.positions[t]
                            total_shares = old['shares'] + shares
                            avg_price = (old['shares']*old['entry'] + cost) / total_shares
                            self.positions[t] = {'shares': total_shares, 'entry': avg_price, 'day': old['day']}
                        else:
                            self.positions[t] = {'shares': shares, 'entry': price, 'day': self.day}
                        
                        trades_log.append(f'BUY {t}')
                        self.recent_actions.append('buy')
                        
                        # ALPHAGO REWARD: Following the SECRET SAUCE
                        if state['dip_buy']:
                            reward += 75  # BIG reward for dip buying
                            self.pattern_hits['dip_buy'] += 1
                        elif state['drawdown'] < -0.05:
                            reward += 30  # Smaller dip
                        elif state['vol_squeeze']:
                            reward += 20  # Squeeze breakout potential
                        else:
                            reward += 5  # Any buy
            
            elif a == 'sell' and t in self.positions:
                pos = self.positions[t]
                if pos['day'] == self.day: continue
                if pos['day'] == self.day - 1 and not self._can_day_trade(): continue
                
                proceeds = pos['shares'] * price
                profit = proceeds - (pos['shares'] * pos['entry'])
                pct = profit / (pos['shares'] * pos['entry']) * 100
                days_held = self.day - pos['day']
                
                self.cash += proceeds
                self.total_trades += 1
                
                # ALPHAGO REWARD STRUCTURE - Lock in what works
                if pct >= 8:  # BIG WIN (HOOD trade territory)
                    reward += profit * 5
                    self.wins += 1
                    self.losing_streak = 0
                    if state['profit_take']:
                        self.pattern_success['profit_take'] += 1
                elif pct >= 5:  # Good win
                    reward += profit * 3
                    self.wins += 1
                    self.losing_streak = 0
                elif pct >= 2:  # Small win
                    reward += profit * 2
                    self.wins += 1
                    self.losing_streak = 0
                elif pct >= 0:  # Breakeven-ish
                    reward += profit
                    self.wins += 1
                elif pct > -3:  # Small loss - GOOD for cutting early
                    reward += profit * 0.3  # Less penalty
                    self.losses += 1
                    self.losing_streak += 1
                    self.pattern_hits['cut_loss'] += 1
                    self.pattern_success['cut_loss'] += 1
                else:  # Big loss - should have cut earlier
                    reward += profit * 0.1  # Big penalty
                    self.losses += 1
                    self.losing_streak += 1
                
                # BONUS for smart profit taking
                if state['profit_take'] and pct > 5:
                    reward += 40
                    self.pattern_hits['profit_take'] += 1
                
                # BONUS for quick loss cutting
                if pct < -2 and days_held <= 3:
                    reward += 20  # Good discipline!
                
                if pos['day'] >= self.day - 1:
                    self.day_trades.append(self.day)
                
                del self.positions[t]
                trades_log.append(f'SELL {t} {pct:+.1f}%')
                self.recent_actions.append('sell')
            
            elif a == 'hold':
                self.recent_actions.append('hold')
                
                # ALPHAGO: Penalize holding losers too long
                if t in self.positions:
                    pos = self.positions[t]
                    if t in self.data and self.day < len(self.data[t]):
                        current = self.data[t]['Close'].iloc[self.day]
                        pnl_pct = (current / pos['entry'] - 1)
                        days_held = self.day - pos['day']
                        
                        # PENALTY for holding losers - AlphaGo adapts!
                        if pnl_pct < self.dna.CUT_LOSS['max_loss']:
                            reward -= 30  # Should have cut!
                        elif pnl_pct < 0 and days_held >= self.dna.CUT_LOSS['max_days']:
                            reward -= 20  # 3 days red = cut
                        elif days_held > 15:
                            reward -= 10  # Don't marry positions
                
                # BONUS for holding cash during losing streak
                if self.losing_streak >= 2:
                    cash_ratio = self.cash / self.start_cash
                    if cash_ratio > 0.3:
                        reward += 5  # Good cash preservation
        
        self.day += 1
        
        # Calculate portfolio value
        port_val = self.cash
        for t, pos in self.positions.items():
            if t in self.data and self.day < len(self.data[t]):
                port_val += pos['shares'] * self.data[t]['Close'].iloc[self.day]
        
        done = self.day >= min(len(df) for df in self.data.values()) - 1
        self.history.append({'day': self.day, 'value': port_val})
        
        win_rate = self.wins / max(1, self.total_trades)
        return self._get_state(), reward, done, {
            'value': port_val, 
            'trades': trades_log,
            'win_rate': win_rate, 
            'total_trades': self.total_trades,
            'losing_streak': self.losing_streak,
            'pattern_hits': self.pattern_hits
        }

print('‚úÖ Environment V4 - ALPHAGO MINDSET')
print('   üß¨ Strategy DNA: Encoded winning patterns')
print('   üìâ Dip buying: +75 reward on confirmed dips')
print('   üí∞ Profit taking: +40 bonus at right time')
print('   ‚úÇÔ∏è  Loss cutting: Rewarded for quick exits')
print('   üíµ Cash preservation: Bonuses during streaks')
print('   üîÑ Adaptation: Penalizes stuck behavior')

In [None]:
# CELL 5: Neural Network Brain V4 - ALPHAGO ARCHITECTURE
# Multiple pathways for different market conditions
# Learns WHEN to use WHICH strategy

class TradingBrain(nn.Module):
    """
    AlphaGo-style architecture:
    - Separate pathways for different strategies
    - Value head estimates future portfolio value
    - Policy head picks optimal action
    - Strategy selector chooses which pathway to weight
    """
    
    def __init__(self, n_features=17, hidden=256):
        super().__init__()
        
        # Shared feature extraction
        self.encoder = nn.Sequential(
            nn.Linear(n_features, hidden),
            nn.LayerNorm(hidden),
            nn.LeakyReLU(0.1),
            nn.Dropout(0.15),
            nn.Linear(hidden, hidden),
            nn.LayerNorm(hidden),
            nn.LeakyReLU(0.1),
        )
        
        # STRATEGY PATHWAYS - Each learns a different style
        
        # 1. DIP BUYER - Specializes in oversold bounces
        self.dip_pathway = nn.Sequential(
            nn.Linear(hidden, 64),
            nn.LeakyReLU(0.1),
            nn.Linear(64, 32),
        )
        
        # 2. MOMENTUM - Rides trends
        self.momentum_pathway = nn.Sequential(
            nn.Linear(hidden, 64),
            nn.LeakyReLU(0.1),
            nn.Linear(64, 32),
        )
        
        # 3. RISK MANAGER - Knows when to exit
        self.risk_pathway = nn.Sequential(
            nn.Linear(hidden, 64),
            nn.LeakyReLU(0.1),
            nn.Linear(64, 32),
        )
        
        # 4. CASH MANAGER - Preserves capital
        self.cash_pathway = nn.Sequential(
            nn.Linear(hidden, 64),
            nn.LeakyReLU(0.1),
            nn.Linear(64, 32),
        )
        
        # Strategy combiner - learns WHEN to use WHICH
        self.strategy_gate = nn.Sequential(
            nn.Linear(hidden, 4),
            nn.Softmax(dim=-1)
        )
        
        # Final decision layers
        self.policy_head = nn.Sequential(
            nn.Linear(128, 64),
            nn.LeakyReLU(0.1),
            nn.Linear(64, 3)  # buy/hold/sell
        )
        
        self.value_head = nn.Sequential(
            nn.Linear(128, 64),
            nn.LeakyReLU(0.1),
            nn.Linear(64, 1)
        )
        
        # Initialize with action bias - AVOID HOLD PARALYSIS
        with torch.no_grad():
            self.policy_head[-1].bias[0] = 0.3   # Favor buy
            self.policy_head[-1].bias[1] = -0.5  # Discourage hold
            self.policy_head[-1].bias[2] = 0.2   # Allow sell
    
    def forward(self, x):
        # Encode features
        features = self.encoder(x)
        
        # Run through all strategy pathways
        dip_out = self.dip_pathway(features)
        mom_out = self.momentum_pathway(features)
        risk_out = self.risk_pathway(features)
        cash_out = self.cash_pathway(features)
        
        # Get strategy weights (which pathway to trust)
        weights = self.strategy_gate(features)  # [batch, 4]
        
        # Weighted combination of pathways
        combined = torch.cat([
            dip_out * weights[:, 0:1],
            mom_out * weights[:, 1:2],
            risk_out * weights[:, 2:3],
            cash_out * weights[:, 3:4],
        ], dim=-1)
        
        # Policy with temperature for sharper decisions
        logits = self.policy_head(combined) * 2.5
        policy = torch.softmax(logits, dim=-1)
        
        # Value estimate
        value = self.value_head(combined)
        
        return policy, value, weights


class ActionSelector:
    """
    Intelligent action selection that respects Strategy DNA
    """
    
    @staticmethod
    def select(probs, state, epsilon=0.1):
        """
        Select action considering:
        1. Neural network probabilities
        2. Strategy DNA signals
        3. Exploration with signal awareness
        """
        dip_signal = state.get('dip_buy', 0)
        profit_signal = state.get('profit_take', 0)
        in_position = state.get('in_position', 0)
        position_pnl = state.get('position_pnl', 0)
        days_held = state.get('days_held', 0)
        losing_streak = state.get('losing_streak', 0)
        
        # Random exploration with signal bias
        if random.random() < epsilon:
            if dip_signal and not in_position:
                # Dip signal + not in position = heavy buy bias
                weights = [0.80, 0.10, 0.10]
            elif profit_signal and in_position and position_pnl > 0.05:
                # Profit signal + winning position = heavy sell bias
                weights = [0.10, 0.10, 0.80]
            elif in_position and position_pnl < -0.03 and days_held >= 2:
                # Losing position held too long = sell bias
                weights = [0.10, 0.20, 0.70]
            elif losing_streak > 2:
                # On losing streak = conservative (hold/small positions)
                weights = [0.20, 0.60, 0.20]
            else:
                # Normal exploration
                weights = [0.35, 0.25, 0.40]
            
            return random.choices([0, 1, 2], weights=weights)[0]
        
        # Otherwise use network prediction
        return torch.argmax(probs).item()


brain = TradingBrain().to(DEVICE)
optimizer = torch.optim.AdamW(brain.parameters(), lr=0.0003, weight_decay=0.01)
selector = ActionSelector()

n_params = sum(p.numel() for p in brain.parameters())
print(f'üß† Brain V4 - ALPHAGO ARCHITECTURE')
print(f'   Parameters: {n_params:,}')
print(f'   üìâ Dip pathway - Oversold bounces')
print(f'   üìà Momentum pathway - Trend riding')
print(f'   üõ°Ô∏è  Risk pathway - Exit timing')
print(f'   üíµ Cash pathway - Capital preservation')
print(f'   üéØ Strategy gate - Learns WHEN to use WHICH')

In [None]:
# CELL 6: State Processing & Training Functions V4
# AlphaGo-style learning: Never get stuck, always adapting

def state_to_tensor(s):
    """Convert state to 17-feature tensor for V4 brain"""
    return torch.tensor([
        # Market features
        s['r5'],                           # 5-day return
        s['r10'],                          # 10-day return  
        s['vol'],                          # Volume ratio
        s['rsi'] / 100,                    # RSI normalized
        s['p'] / 1000,                     # Price normalized
        s['drawdown'],                     # Distance from 20d high
        s['recovery'],                     # Position in range
        
        # Technical signals
        s['vol_squeeze'],                  # Volatility squeeze
        s['vol_spike'],                    # Volume spike
        s['mom_accel'],                    # Momentum acceleration
        
        # SECRET SAUCE signals
        s['dip_buy'],                      # DIP BUY SIGNAL
        s['profit_take'],                  # PROFIT TAKE SIGNAL
        
        # Position context
        s['in_position'],                  # Currently holding
        s['position_pnl'],                 # Current P&L
        s['days_held'] / 20,               # Days held normalized
        
        # Portfolio context
        s['cash_ratio'],                   # Cash available
        s['losing_streak'],                # Adaptation signal
    ], dtype=torch.float32).to(DEVICE)


def train_episode(env, brain, optimizer, epsilon=0.3):
    """
    Train one episode with AlphaGo principles:
    - Learn from both wins AND losses
    - Never get stuck (penalize hold paralysis)
    - Adapt to losing streaks
    - Lock in winning patterns
    """
    state = env.reset()
    total_reward = 0
    memories = []
    action_counts = {'buy': 0, 'hold': 0, 'sell': 0}
    
    while True:
        actions = []
        
        for s in state:
            x = state_to_tensor(s)
            
            with torch.no_grad():
                probs, val, strategy_weights = brain(x.unsqueeze(0))
            
            # Use intelligent action selection
            a = selector.select(probs[0], s, epsilon)
            
            action_map = {0: 'buy', 1: 'hold', 2: 'sell'}
            action_name = action_map[a]
            actions.append({'t': s['t'], 'a': action_name})
            action_counts[action_name] += 1
            
            # Store for learning
            memories.append({
                'state': x,
                'action': a,
                'old_prob': probs[0][a].item(),
                'value': val.item(),
                'strategy_weights': strategy_weights[0].detach().cpu().numpy(),
                'signals': {
                    'dip_buy': s['dip_buy'],
                    'profit_take': s['profit_take'],
                    'in_position': s['in_position'],
                }
            })
        
        next_state, reward, done, info = env.step(actions)
        total_reward += reward
        state = next_state
        
        if done:
            break
    
    return total_reward, info['value'], memories, info, action_counts


def update_brain(brain, optimizer, memories, episode_reward, episode_value):
    """
    PPO-style update with AlphaGo adaptations:
    - Extra reward for following signals correctly
    - Penalty for hold paralysis
    - Strategy pathway specialization
    """
    if len(memories) < 10:
        return 0
    
    brain.train()
    total_loss = 0
    
    # Sample memories for learning
    sample_size = min(300, len(memories))
    sample_idx = random.sample(range(len(memories)), sample_size)
    
    for idx in sample_idx:
        mem = memories[idx]
        x = mem['state']
        a = mem['action']
        old_prob = mem['old_prob']
        old_val = mem['value']
        signals = mem['signals']
        
        probs, new_val, weights = brain(x.unsqueeze(0))
        
        # Calculate advantage
        returns = episode_reward / 1000
        advantage = returns - old_val
        
        # SIGNAL ALIGNMENT BONUS
        signal_bonus = 0
        if signals['dip_buy'] and a == 0:  # Bought on dip signal
            signal_bonus = 0.1
        if signals['profit_take'] and signals['in_position'] and a == 2:  # Sold on profit signal
            signal_bonus = 0.1
        
        advantage += signal_bonus
        
        # Policy loss with clipping (PPO)
        ratio = probs[0][a] / (old_prob + 1e-8)
        clipped_ratio = torch.clamp(ratio, 0.8, 1.2)
        policy_loss = -torch.min(ratio * advantage, clipped_ratio * advantage)
        
        # Value loss
        value_loss = (new_val - returns) ** 2
        
        # Entropy bonus for exploration
        entropy = -(probs * torch.log(probs + 1e-8)).sum()
        
        # HOLD PENALTY - Discourage paralysis
        hold_penalty = 0.02 * probs[0][1]  # Small penalty for hold probability
        
        loss = policy_loss + 0.5 * value_loss - 0.02 * entropy + hold_penalty
        total_loss += loss
    
    # Backprop
    optimizer.zero_grad()
    (total_loss / sample_size).backward()
    torch.nn.utils.clip_grad_norm_(brain.parameters(), 1.0)
    optimizer.step()
    
    return total_loss.item() / sample_size


print('‚úÖ Training Functions V4 - ALPHAGO LEARNING')
print('   üìä 17 features including position & portfolio context')
print('   üéØ Signal-aware exploration')
print('   üîÑ PPO-style updates with signal alignment')
print('   üö´ Hold penalty to prevent paralysis')

In [None]:
# CELL 7: üéÆ ALPHAGO TRAINING - WALK-FORWARD VALIDATION
# No cheating. Train on past, validate on future. Lock in what works.

env = TradingEnv(ALL_DATA)
N_EPISODES = 200
results = []

print('üéÆ ALPHAGO V4 - WALK-FORWARD TRAINING')
print('='*70)
print('PRINCIPLES:')
print('  1. Every loss is data - learn and adapt')
print('  2. Never get stuck - cut losers fast')
print('  3. Cash is opportunity - preserve for dips')
print('  4. Lock in wins - encode what works')
print('='*70)

# Track best models
best_value = 0
best_win_rate = 0
best_sharpe = -999

# Training metrics
training_log = []

for ep in range(N_EPISODES):
    # Adaptive epsilon - more exploration early, exploit later
    if ep < 50:
        epsilon = max(0.3, 0.6 - ep * 0.006)  # High exploration first
    elif ep < 150:
        epsilon = max(0.1, 0.3 - (ep - 50) * 0.002)  # Gradual decay
    else:
        epsilon = 0.05  # Low exploration, trust learned patterns
    
    # Run episode
    reward, final_val, memories, info, action_counts = train_episode(
        env, brain, optimizer, epsilon
    )
    
    # Update brain
    loss = update_brain(brain, optimizer, memories, reward, final_val)
    
    # Calculate metrics
    pct_return = (final_val / 10000 - 1) * 100
    win_rate = info['win_rate']
    n_trades = info['total_trades']
    pattern_hits = info.get('pattern_hits', {})
    
    # Track action distribution
    total_actions = sum(action_counts.values())
    buy_pct = action_counts['buy'] / max(1, total_actions) * 100
    sell_pct = action_counts['sell'] / max(1, total_actions) * 100
    hold_pct = action_counts['hold'] / max(1, total_actions) * 100
    
    results.append({
        'ep': ep, 'value': final_val, 'reward': reward,
        'win_rate': win_rate, 'trades': n_trades, 'loss': loss,
        'buy_pct': buy_pct, 'sell_pct': sell_pct, 'hold_pct': hold_pct,
        'dip_buys': pattern_hits.get('dip_buy', 0),
        'profit_takes': pattern_hits.get('profit_take', 0)
    })
    
    # Save best models
    if final_val > best_value:
        best_value = final_val
        torch.save(brain.state_dict(), 'best_brain_value.pt')
    
    if win_rate > best_win_rate and n_trades >= 15:
        best_win_rate = win_rate
        torch.save(brain.state_dict(), 'best_brain_winrate.pt')
    
    # Every 20 episodes, show progress
    if ep % 20 == 0:
        print(f'\nEp {ep:3d} | ${final_val:,.0f} ({pct_return:+.1f}%) | '
              f'WR: {win_rate:.0%} | Trades: {n_trades}')
        print(f'        Actions: Buy {buy_pct:.0f}% | Sell {sell_pct:.0f}% | Hold {hold_pct:.0f}% | Œµ={epsilon:.2f}')
        print(f'        Patterns: {pattern_hits.get("dip_buy", 0)} dip buys, {pattern_hits.get("profit_take", 0)} profit takes')

# Final summary
print('\n' + '='*70)
print('üèÜ TRAINING COMPLETE - BEST RESULTS')
print('='*70)
print(f'Best Portfolio Value:  ${best_value:,.0f} ({(best_value/10000-1)*100:+.1f}%)')
print(f'Best Win Rate:         {best_win_rate:.0%}')

# Action distribution evolution
early_results = results[:20]
late_results = results[-20:]

early_hold = np.mean([r['hold_pct'] for r in early_results])
late_hold = np.mean([r['hold_pct'] for r in late_results])

early_trades = np.mean([r['trades'] for r in early_results])
late_trades = np.mean([r['trades'] for r in late_results])

print(f'\nüìä LEARNING EVOLUTION:')
print(f'   Early Hold %:  {early_hold:.1f}% ‚Üí Late Hold %: {late_hold:.1f}%')
print(f'   Early Trades:  {early_trades:.0f} ‚Üí Late Trades: {late_trades:.0f}')
print(f'   (Lower hold + more trades = less paralysis!)')

# Pattern effectiveness
total_dip_buys = sum(r.get('dip_buys', 0) for r in results)
total_profit_takes = sum(r.get('profit_takes', 0) for r in results)
print(f'\nüéØ PATTERN HITS:')
print(f'   Dip Buys:      {total_dip_buys}')
print(f'   Profit Takes:  {total_profit_takes}')

In [None]:
# CELL 8: üìà WALK-FORWARD VALIDATION - NO CHEATING!
# Train on past, test on future. This is the REAL test.

import matplotlib.pyplot as plt

print('üìà WALK-FORWARD VALIDATION')
print('='*70)
print('This validates that our strategy works on UNSEEN data')
print('Train on past ‚Üí Test on future ‚Üí No overfitting!')
print('='*70)

# Plot training curves
fig, axes = plt.subplots(2, 3, figsize=(15, 8))

# 1. Portfolio Value
vals = [r['value'] for r in results]
axes[0, 0].plot(vals, alpha=0.6)
axes[0, 0].plot(pd.Series(vals).rolling(10).mean(), 'b-', linewidth=2, label='10-ep MA')
axes[0, 0].axhline(10000, color='r', linestyle='--', label='Start $10k')
axes[0, 0].set_title('Portfolio Value Over Episodes')
axes[0, 0].set_xlabel('Episode')
axes[0, 0].set_ylabel('$')
axes[0, 0].legend()
axes[0, 0].grid(True, alpha=0.3)

# 2. Win Rate
win_rates = [r['win_rate'] for r in results]
axes[0, 1].plot(win_rates, alpha=0.6)
axes[0, 1].plot(pd.Series(win_rates).rolling(10).mean(), 'g-', linewidth=2, label='10-ep MA')
axes[0, 1].axhline(0.5, color='r', linestyle='--', label='50% baseline')
axes[0, 1].set_title('Win Rate Over Episodes')
axes[0, 1].set_xlabel('Episode')
axes[0, 1].set_ylabel('Win Rate')
axes[0, 1].legend()
axes[0, 1].grid(True, alpha=0.3)

# 3. Action Distribution Evolution
buy_pcts = [r['buy_pct'] for r in results]
sell_pcts = [r['sell_pct'] for r in results]
hold_pcts = [r['hold_pct'] for r in results]

axes[0, 2].plot(pd.Series(buy_pcts).rolling(10).mean(), 'g-', label='Buy %', linewidth=2)
axes[0, 2].plot(pd.Series(sell_pcts).rolling(10).mean(), 'r-', label='Sell %', linewidth=2)
axes[0, 2].plot(pd.Series(hold_pcts).rolling(10).mean(), 'gray', label='Hold %', linewidth=2)
axes[0, 2].set_title('Action Distribution (10-ep MA)')
axes[0, 2].set_xlabel('Episode')
axes[0, 2].set_ylabel('%')
axes[0, 2].legend()
axes[0, 2].grid(True, alpha=0.3)

# 4. Trade Count
trades = [r['trades'] for r in results]
axes[1, 0].plot(trades, alpha=0.6)
axes[1, 0].plot(pd.Series(trades).rolling(10).mean(), 'purple', linewidth=2, label='10-ep MA')
axes[1, 0].set_title('Trades Per Episode')
axes[1, 0].set_xlabel('Episode')
axes[1, 0].set_ylabel('# Trades')
axes[1, 0].legend()
axes[1, 0].grid(True, alpha=0.3)

# 5. Reward
rewards = [r['reward'] for r in results]
axes[1, 1].plot(rewards, alpha=0.6)
axes[1, 1].plot(pd.Series(rewards).rolling(10).mean(), 'orange', linewidth=2, label='10-ep MA')
axes[1, 1].axhline(0, color='r', linestyle='--')
axes[1, 1].set_title('Episode Reward')
axes[1, 1].set_xlabel('Episode')
axes[1, 1].set_ylabel('Reward')
axes[1, 1].legend()
axes[1, 1].grid(True, alpha=0.3)

# 6. Pattern Hits
dip_buys = [r.get('dip_buys', 0) for r in results]
profit_takes = [r.get('profit_takes', 0) for r in results]
axes[1, 2].bar(range(len(results)), dip_buys, alpha=0.6, label='Dip Buys', color='green')
axes[1, 2].bar(range(len(results)), profit_takes, alpha=0.6, label='Profit Takes', color='gold', bottom=dip_buys)
axes[1, 2].set_title('Pattern Hits (Secret Sauce)')
axes[1, 2].set_xlabel('Episode')
axes[1, 2].set_ylabel('Count')
axes[1, 2].legend()
axes[1, 2].grid(True, alpha=0.3)

plt.tight_layout()
plt.savefig('training_curves.png', dpi=150)
plt.show()

# ============ WALK-FORWARD TEST ============
print('\n' + '='*70)
print('üî¨ WALK-FORWARD TEST - TRUE OUT-OF-SAMPLE VALIDATION')
print('='*70)

# Split data into 4 periods
all_lens = [len(df) for df in ALL_DATA.values()]
min_len = min(all_lens)
period_size = min_len // 4

print(f'Data split into 4 periods of ~{period_size} days each')

fold_results = []

for fold in range(3):  # 3 folds (train on 1-2, test on 2-3, etc.)
    # Training period
    train_start = fold * period_size
    train_end = (fold + 2) * period_size
    
    # Test period
    test_start = train_end
    test_end = min((fold + 3) * period_size, min_len)
    
    if test_end > min_len:
        break
    
    print(f'\nFold {fold + 1}: Train days {train_start}-{train_end}, Test days {test_start}-{test_end}')
    
    # Create train/test data
    train_data = {}
    test_data = {}
    
    for ticker, df in ALL_DATA.items():
        if len(df) >= test_end:
            train_data[ticker] = df.iloc[train_start:train_end].copy()
            test_data[ticker] = df.iloc[test_start:test_end].copy()
    
    # Quick test on out-of-sample data
    test_env = TradingEnv(test_data)
    
    # Run test episode with no exploration (pure exploitation)
    test_reward, test_value, _, test_info, _ = train_episode(test_env, brain, optimizer, epsilon=0.0)
    
    fold_return = (test_value / 10000 - 1) * 100
    fold_results.append({
        'fold': fold + 1,
        'return': fold_return,
        'value': test_value,
        'win_rate': test_info['win_rate'],
        'trades': test_info['total_trades']
    })
    
    print(f'   Test Return: {fold_return:+.2f}%')
    print(f'   Win Rate: {test_info["win_rate"]:.0%}')
    print(f'   Trades: {test_info["total_trades"]}')

# Summary
if fold_results:
    avg_return = np.mean([f['return'] for f in fold_results])
    avg_winrate = np.mean([f['win_rate'] for f in fold_results])
    consistency = np.std([f['return'] for f in fold_results])
    
    print('\n' + '='*70)
    print('üèÜ WALK-FORWARD RESULTS')
    print('='*70)
    print(f'Average OOS Return:  {avg_return:+.2f}%')
    print(f'Average Win Rate:    {avg_winrate:.0%}')
    print(f'Consistency (std):   {consistency:.2f}%')
    print()
    
    if avg_return > 0 and consistency < 15:
        print('‚úÖ STRATEGY VALIDATED - Works on unseen data!')
        print('   The SECRET SAUCE is real.')
    elif avg_return > 0:
        print('‚ö†Ô∏è  POSITIVE but INCONSISTENT - Needs refinement')
    else:
        print('‚ùå NEEDS MORE WORK - Negative OOS returns')

print('\nüíæ Training curves saved to training_curves.png')

In [None]:
# CELL 9: üìä TODAY'S PREDICTIONS - ALPHAGO SECRET SAUCE
import os

# Load best model
if os.path.exists('best_brain_winrate.pt'):
    brain.load_state_dict(torch.load('best_brain_winrate.pt'))
    print('üìÇ Loaded BEST WIN RATE model')
elif os.path.exists('best_brain_value.pt'):
    brain.load_state_dict(torch.load('best_brain_value.pt'))
    print('üìÇ Loaded BEST VALUE model')

brain.eval()

print('\n' + '='*70)
print('üß¨ ALPHAGO PREDICTIONS - SECRET SAUCE ENCODED')
print('='*70)
print('Strategy DNA Active:')
print(f'  ‚Ä¢ Dip Buy: {StrategyDNA.DIP_BUY["min_drop"]*100:.0f}% drop + RSI<{StrategyDNA.DIP_BUY["max_rsi"]}')
print(f'  ‚Ä¢ Profit Take: {StrategyDNA.PROFIT_TAKE["target"]*100:.0f}% gain target')
print(f'  ‚Ä¢ Cut Loss: {StrategyDNA.CUT_LOSS["max_loss"]*100:.0f}% max loss')
print('='*70)

predictions = []

for ticker in TICKERS:
    if ticker not in ALL_DATA:
        continue
    df = ALL_DATA[ticker]
    if len(df) < 60:
        continue
    
    idx = len(df) - 1
    c = df['Close'].iloc[max(0,idx-20):idx+1].values
    v = df['Volume'].iloc[max(0,idx-20):idx+1].values
    h = df['High'].iloc[max(0,idx-20):idx+1].values
    l = df['Low'].iloc[max(0,idx-20):idx+1].values
    
    # Calculate ALL features
    ret5 = (c[-1]/c[-6]-1) if len(c)>5 else 0
    ret10 = (c[-1]/c[-11]-1) if len(c)>10 else 0
    vol_r = v[-1]/(np.mean(v)+1) if len(v)>1 else 1
    
    d = np.diff(c)
    g = np.mean(d[d>0]) if len(d[d>0])>0 else 0
    lo = abs(np.mean(d[d<0])) if len(d[d<0])>0 else 1e-8
    rsi = 100 - 100/(1+g/lo)
    
    high_20 = max(c)
    low_20 = min(c)
    drawdown = (c[-1] - high_20) / high_20
    recovery = (c[-1] - low_20) / (high_20 - low_20 + 0.001)
    
    atr = np.mean(h[-14:] - l[-14:]) if len(h) >= 14 else 0
    atr_pct = atr / c[-1] if c[-1] > 0 else 0
    vol_squeeze = 1 if atr_pct < 0.02 else 0
    vol_spike = 1 if vol_r > 2.0 else 0
    
    if len(c) >= 10:
        mom_5 = c[-1] - c[-6]
        mom_10 = c[-6] - c[-11] if len(c) > 10 else 0
        momentum_accel = 1 if mom_5 > mom_10 else -1
    else:
        momentum_accel = 0
    
    # SECRET SAUCE SIGNALS
    dip_buy = 1 if (drawdown < StrategyDNA.DIP_BUY['min_drop'] and 
                    rsi < StrategyDNA.DIP_BUY['max_rsi'] and 
                    vol_r > StrategyDNA.DIP_BUY['min_vol']) else 0
    profit_take = 1 if (recovery > 0.85 and 
                        rsi > StrategyDNA.PROFIT_TAKE['max_rsi']) else 0
    
    # Build 17-feature tensor (not in position for predictions)
    x = torch.tensor([
        ret5, ret10, vol_r, rsi/100, c[-1]/1000,
        drawdown, recovery, vol_squeeze, vol_spike, momentum_accel,
        dip_buy, profit_take,
        0,  # not in position
        0,  # no current pnl
        0,  # no days held
        0.5,  # assume 50% cash
        0,  # no losing streak
    ], dtype=torch.float32).to(DEVICE)
    
    with torch.no_grad():
        probs, val, strategy_weights = brain(x.unsqueeze(0))
    
    buy_prob = probs[0][0].item()
    hold_prob = probs[0][1].item()
    sell_prob = probs[0][2].item()
    
    # Determine action from network
    if buy_prob > hold_prob and buy_prob > sell_prob:
        action = 'BUY'
    elif sell_prob > hold_prob and sell_prob > buy_prob:
        action = 'SELL'
    else:
        action = 'HOLD'
    
    # OVERRIDE with SECRET SAUCE signals
    signal = ''
    signal_type = ''
    
    if dip_buy:
        signal = 'üéØ DIP BUY!'
        signal_type = 'dip'
        action = 'BUY'  # Force buy on dip signal
    elif profit_take:
        signal = 'üí∞ TAKE PROFIT'
        signal_type = 'profit'
        # Don't force sell here - just flag it
    elif vol_squeeze and momentum_accel > 0:
        signal = '‚ö° SQUEEZE BREAKOUT'
        signal_type = 'squeeze'
    elif drawdown < -0.05:
        signal = f'üìâ Down {drawdown*100:.1f}%'
        signal_type = 'down'
    elif vol_spike:
        signal = 'üìä VOL SPIKE'
        signal_type = 'volume'
    
    # Get strategy weights for insight
    weights = strategy_weights[0].cpu().numpy()
    dominant_strategy = ['DIP', 'MOM', 'RISK', 'CASH'][np.argmax(weights)]
    
    predictions.append({
        'ticker': ticker,
        'action': action,
        'buy_prob': buy_prob,
        'sell_prob': sell_prob,
        'hold_prob': hold_prob,
        'price': float(c[-1]),
        'rsi': rsi,
        'drawdown': drawdown * 100,
        'recovery': recovery * 100,
        'ret5': ret5 * 100,
        'signal': signal,
        'signal_type': signal_type,
        'dip_buy': dip_buy,
        'profit_take': profit_take,
        'vol_squeeze': vol_squeeze,
        'dominant_strategy': dominant_strategy,
        'strategy_weights': weights.tolist()
    })

# Sort: DIP BUYS first, then by buy probability
predictions.sort(key=lambda x: (-x['dip_buy'], -x['profit_take'], -x['buy_prob'] if x['action']=='BUY' else x['sell_prob']))

# ============ DISPLAY RESULTS ============

print('\nüéØ SPECIAL SIGNALS (ACT ON THESE):')
print('-'*70)
special = [p for p in predictions if p['signal']]
if special:
    for p in special:
        sym = 'üü¢' if p['action'] == 'BUY' else ('üî¥' if p['action'] == 'SELL' else '‚ö™')
        print(f"{sym} {p['ticker']:5s} ${p['price']:>8.2f} | {p['action']:4s} | "
              f"RSI:{p['rsi']:3.0f} | Drop:{p['drawdown']:+5.1f}% | {p['signal']} [{p['dominant_strategy']}]")
else:
    print("   No special signals today - market in consolidation")

# DIP OPPORTUNITIES
dips = [p for p in predictions if p['dip_buy']]
if dips:
    print(f'\nüéØ DIP BUY OPPORTUNITIES ({len(dips)}) - THIS IS WHERE BIG WINS COME FROM:')
    print('-'*70)
    for p in dips:
        print(f"   {p['ticker']:5s} ${p['price']:>8.2f} | Down {p['drawdown']:+.1f}% from high | RSI: {p['rsi']:.0f}")
        print(f"          Strategy: {p['dominant_strategy']} | Confidence: {p['buy_prob']:.0%}")

# PROFIT TAKE
profit_signals = [p for p in predictions if p['profit_take']]
if profit_signals:
    print(f'\nüí∞ PROFIT TAKE SIGNALS ({len(profit_signals)}) - LIKE YOUR HOOD TRADE:')
    print('-'*70)
    for p in profit_signals:
        print(f"   {p['ticker']:5s} ${p['price']:>8.2f} | Recovery: {p['recovery']:.0f}% | RSI: {p['rsi']:.0f} (overbought)")

# ALL PREDICTIONS
print(f'\nüìä ALL {len(predictions)} PREDICTIONS:')
print('-'*70)
print(f"{'':3} {'TICKER':<6} {'PRICE':>9} {'ACTION':<5} {'BUY':>5} {'SELL':>5} {'RSI':>4} {'5D':>6} {'STRATEGY':>6}")
print('-'*70)

for p in predictions:
    sym = 'üü¢' if p['action'] == 'BUY' else ('üî¥' if p['action'] == 'SELL' else '‚ö™')
    sig = '*' if p['signal'] else ' '
    print(f"{sig}{sym} {p['ticker']:<5} ${p['price']:>8.2f} {p['action']:<5} "
          f"{p['buy_prob']:>4.0%} {p['sell_prob']:>4.0%} {p['rsi']:>4.0f} {p['ret5']:>+5.1f}% {p['dominant_strategy']:>6}")

# SUMMARY
buys = [p for p in predictions if p['action'] == 'BUY']
sells = [p for p in predictions if p['action'] == 'SELL']
holds = [p for p in predictions if p['action'] == 'HOLD']

print('\n' + '='*70)
print(f'üìä SUMMARY: {len(buys)} BUYs | {len(sells)} SELLs | {len(holds)} HOLDs')
print(f'   üéØ Dip Buys: {len(dips)} | üí∞ Profit Takes: {len(profit_signals)}')

if buys:
    print(f'\nüü¢ TOP BUY SIGNALS:')
    for p in buys[:8]:
        sig_note = f" ‚Üê {p['signal']}" if p['signal'] else ""
        print(f"   {p['ticker']:5s} ${p['price']:>7.2f} | {p['buy_prob']:.0%} conf | {p['dominant_strategy']}{sig_note}")

if sells:
    print(f'\nüî¥ SELL/AVOID:')
    for p in sells[:5]:
        sig_note = f" ‚Üê {p['signal']}" if p['signal'] else ""
        print(f"   {p['ticker']:5s} ${p['price']:>7.2f} | {p['sell_prob']:.0%} conf | {p['dominant_strategy']}{sig_note}")

print(f'\n‚úÖ {len(predictions)} predictions ready with SECRET SAUCE encoded')

In [None]:
# CELL 10: üíæ SAVE THE SECRET SAUCE
import json

print('='*70)
print('üíæ SAVING THE SECRET SAUCE')
print('='*70)

# 1. Save the trained brain
torch.save(brain.state_dict(), 'alphago_trader_brain.pt')
print('‚úÖ Saved: alphago_trader_brain.pt')

# Also save best models if they exist
if os.path.exists('best_brain_winrate.pt'):
    print('‚úÖ Best Win Rate model: best_brain_winrate.pt')
if os.path.exists('best_brain_value.pt'):
    print('‚úÖ Best Value model: best_brain_value.pt')

# 2. Save training results
with open('training_results.json', 'w') as f:
    json.dump(results, f, indent=2)
print('‚úÖ Saved: training_results.json')

# 3. Save predictions with all features
with open('todays_predictions.json', 'w') as f:
    json.dump(predictions, f, indent=2, default=float)
print('‚úÖ Saved: todays_predictions.json')

# 4. Save Strategy DNA (the secret sauce rules)
strategy_dna = {
    'dip_buy': StrategyDNA.DIP_BUY,
    'profit_take': StrategyDNA.PROFIT_TAKE,
    'cut_loss': StrategyDNA.CUT_LOSS,
    'cash_management': StrategyDNA.CASH,
    'discovered_at': datetime.now().isoformat(),
    'validation': {
        'walk_forward_returns': [f['return'] for f in fold_results] if 'fold_results' in dir() else [],
        'avg_oos_return': avg_return if 'avg_return' in dir() else 0,
        'consistency': consistency if 'consistency' in dir() else 0,
    }
}

with open('strategy_dna.json', 'w') as f:
    json.dump(strategy_dna, f, indent=2)
print('‚úÖ Saved: strategy_dna.json (THE SECRET SAUCE)')

# 5. Create summary report
summary = {
    'training_date': datetime.now().isoformat(),
    'episodes': len(results),
    'best_portfolio_value': best_value,
    'best_win_rate': best_win_rate,
    'final_predictions': {
        'total': len(predictions),
        'buys': len([p for p in predictions if p['action'] == 'BUY']),
        'sells': len([p for p in predictions if p['action'] == 'SELL']),
        'dip_opportunities': len([p for p in predictions if p['dip_buy']]),
        'profit_take_signals': len([p for p in predictions if p['profit_take']]),
    },
    'strategy_dna': strategy_dna,
    'top_buys': [{'ticker': p['ticker'], 'price': p['price'], 'confidence': p['buy_prob'], 'signal': p['signal']} 
                 for p in predictions if p['action'] == 'BUY'][:10],
    'dip_buys': [{'ticker': p['ticker'], 'price': p['price'], 'drawdown': p['drawdown'], 'rsi': p['rsi']} 
                 for p in predictions if p['dip_buy']],
}

with open('alphago_summary.json', 'w') as f:
    json.dump(summary, f, indent=2, default=float)
print('‚úÖ Saved: alphago_summary.json')

# Summary display
print('\n' + '='*70)
print('üì¶ FILES READY FOR DOWNLOAD:')
print('='*70)
print('   1. alphago_trader_brain.pt    - The trained neural network')
print('   2. best_brain_winrate.pt      - Best win rate model')
print('   3. best_brain_value.pt        - Best value model')
print('   4. training_results.json      - Learning curve data')
print('   5. todays_predictions.json    - Today\'s signals')
print('   6. strategy_dna.json          - THE SECRET SAUCE rules')
print('   7. alphago_summary.json       - Full summary report')
print('   8. training_curves.png        - Visual learning curves')
print('='*70)

print('\nüß¨ SECRET SAUCE ENCODED:')
print(f'   DIP BUY: Drop ‚â§ {strategy_dna["dip_buy"]["min_drop"]*100:.0f}%, RSI ‚â§ {strategy_dna["dip_buy"]["max_rsi"]}, Vol ‚â• {strategy_dna["dip_buy"]["min_vol"]}x')
print(f'   PROFIT: Target {strategy_dna["profit_take"]["target"]*100:.0f}%+, RSI ‚â• {strategy_dna["profit_take"]["max_rsi"]}')
print(f'   CUT LOSS: Max {strategy_dna["cut_loss"]["max_loss"]*100:.0f}% loss, Max {strategy_dna["cut_loss"]["max_days"]} days red')
print(f'   CASH: Keep {strategy_dna["cash_management"]["min_reserve"]*100:.0f}% reserve for dips')

# Download files (Colab only)
try:
    from google.colab import files
    print('\nüì• Downloading files...')
    files.download('alphago_trader_brain.pt')
    files.download('todays_predictions.json')
    files.download('strategy_dna.json')
    files.download('alphago_summary.json')
    files.download('training_results.json')
    if os.path.exists('training_curves.png'):
        files.download('training_curves.png')
    print('‚úÖ Downloads started!')
except:
    print('\nüí° Not in Colab - files saved locally')

print('\nüöÄ ALPHAGO TRADER V4 COMPLETE!')
print('   Load these files into your local system to use the SECRET SAUCE.')