# Batch test the mr+hydra strategies

In [None]:
# batch_donchian_backtest_monte_carlo.py

import os
import glob
import numpy as np
import pandas as pd
import joblib
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt
from scipy import stats
import warnings
warnings.filterwarnings('ignore')

def load_model(model_path):
    """Load a saved model"""
    if not os.path.exists(model_path):
        raise FileNotFoundError(f"Model not found: {model_path}")
    return joblib.load(model_path)

def load_and_preprocess(filename):
    """Load and preprocess stock data"""
    if not os.path.exists(filename):
        raise FileNotFoundError(f"Data file not found: {filename}")
    
    df = pd.read_csv(filename, parse_dates=["Date"])
    df["Direction"] = (df["Close"].shift(-1) > df["Close"]).astype(int)
    df.replace([np.inf, -np.inf], np.nan, inplace=True)
    df.dropna(inplace=True)
    return df

def rolling_windows(df, window_size=20, drop_cols=None):
    """Create rolling windows for time series data"""
    if drop_cols is None:
        drop_cols = ["Date", "Close", "Target", "Direction",
                    "High", "Low", "Open", "High_lag1", "Low_lag1", "Open_lag1"]
    Xs, ys = [], []
    features = df.drop(columns=drop_cols, errors='ignore')
    y = df["Direction"].values
    for i in range(window_size, len(df)):
        Xs.append(features.iloc[i-window_size : i].values)
        ys.append(y[i])
    return np.array(Xs), np.array(ys)

def reshape_for_aeon(X_3d):
    """Reshape data for aeon transformers"""
    return np.transpose(X_3d, (0, 2, 1))

def get_model_predictions(df, model_path, test_ratio=0.2):
    """Get model predictions for the test set"""
    model_data = load_model(model_path)
    
    window_size = model_data['window_size']
    drop_cols = model_data.get('drop_cols', ["Date", "Close", "Target", "Direction",
                                           "High", "Low", "Open", "High_lag1", "Low_lag1", "Open_lag1"])
    
    Xw, yw = rolling_windows(df, window_size=window_size, drop_cols=drop_cols)
    
    if len(Xw) == 0:
        raise ValueError("No data available after creating rolling windows")
    
    # Use saved scaler
    scaler = model_data['scaler']
    ns, ts, nf = Xw.shape
    X_flat = Xw.reshape(ns * ts, nf)
    X_flat = scaler.transform(X_flat).astype("float32")
    X_scaled = X_flat.reshape(ns, ts, nf)
    X = reshape_for_aeon(X_scaled)
    
    # Get test set indices
    split_idx = int((1 - test_ratio) * ns)
    X_test = X[split_idx:]
    
    if len(X_test) == 0:
        raise ValueError("No test data available")
    
    # Transform and predict
    X_hydra_test = model_data['hydra'].transform(X_test)
    X_mr_test = model_data['multirocket'].transform(X_test)
    X_test_full = np.hstack([X_hydra_test, X_mr_test])
    
    predictions = model_data['classifier'].predict(X_test_full)
    confidence_scores = model_data['classifier'].decision_function(X_test_full)
    
    # Get corresponding dates and prices for test set
    test_start_idx = window_size + split_idx
    test_df = df.iloc[test_start_idx:test_start_idx + len(predictions)].copy()
    
    return test_df, predictions, confidence_scores

def donchian_breakout_strategy(df, predictions, confidence_scores, 
                             donchian_period=20, confidence_threshold=0.0):
    """
    Donchian Breakout Strategy with ML predictions
    """
    df = df.copy()
    df['Predictions'] = predictions
    df['Confidence'] = np.abs(confidence_scores)
    
    # Calculate Donchian Channels
    df['Donchian_High'] = df['High'].rolling(window=donchian_period).max()
    df['Donchian_Low'] = df['Low'].rolling(window=donchian_period).min()
    
    # Generate signals
    df['Long_Signal'] = ((df['Close'] > df['Donchian_High'].shift(1)) & 
                        (df['Predictions'] == 1) & 
                        (df['Confidence'] > confidence_threshold))
    
    df['Short_Signal'] = ((df['Close'] < df['Donchian_Low'].shift(1)) & 
                         (df['Predictions'] == 0) & 
                         (df['Confidence'] > confidence_threshold))
    
    # Calculate positions and returns
    df['Position'] = 0
    df.loc[df['Long_Signal'], 'Position'] = 1
    df.loc[df['Short_Signal'], 'Position'] = -1
    df['Position'] = df['Position'].fillna(method='ffill')
    
    # Calculate returns
    df['Market_Return'] = df['Close'].pct_change()
    df['Strategy_Return'] = df['Position'].shift(1) * df['Market_Return']
    
    # Remove NaN values
    df = df.dropna()
    
    return df

def calculate_profit_factor(returns):
    """Calculate profit factor: sum of gains / sum of losses"""
    gains = returns[returns > 0].sum()
    losses = abs(returns[returns < 0].sum())
    
    if losses == 0:
        return np.inf if gains > 0 else 0
    
    return gains / losses

def monte_carlo_permutation_test(df, predictions, confidence_scores, n_permutations=1000):
    """
    Monte Carlo permutation test to calculate p-value
    """
    # Calculate actual strategy profit factor
    actual_strategy_df = donchian_breakout_strategy(df, predictions, confidence_scores)
    actual_returns = actual_strategy_df['Strategy_Return'].dropna()
    
    if len(actual_returns) == 0:
        return 0.0, np.array([0.0]), 1.0
    
    actual_profit_factor = calculate_profit_factor(actual_returns)
    
    # Generate random permutations
    random_profit_factors = []
    
    for i in range(n_permutations):
        # Shuffle the predictions
        shuffled_predictions = np.random.permutation(predictions)
        
        # Run strategy with shuffled predictions
        try:
            random_strategy_df = donchian_breakout_strategy(df, shuffled_predictions, confidence_scores)
            random_returns = random_strategy_df['Strategy_Return'].dropna()
            
            if len(random_returns) > 0:
                random_pf = calculate_profit_factor(random_returns)
                random_profit_factors.append(random_pf)
            else:
                random_profit_factors.append(0.0)
        except:
            random_profit_factors.append(0.0)
    
    random_profit_factors = np.array(random_profit_factors)
    
    # Calculate p-value (one-tailed test)
    p_value = np.sum(random_profit_factors >= actual_profit_factor) / n_permutations
    
    return actual_profit_factor, random_profit_factors, p_value

def run_single_backtest(data_file, model_file, stock_name):
    """Run backtest for a single stock"""
    try:
        print(f"Processing {stock_name}...")
        
        # Load data and generate predictions
        df = load_and_preprocess(data_file)
        test_df, predictions, confidence_scores = get_model_predictions(df, model_file)
        
        if len(test_df) < 50:  # Minimum test samples
            print(f"  ⚠️  Insufficient test data for {stock_name} ({len(test_df)} samples)")
            return None
        
        # Run Donchian strategy
        strategy_df = donchian_breakout_strategy(test_df, predictions, confidence_scores)
        
        # Calculate metrics
        strategy_returns = strategy_df['Strategy_Return'].dropna()
        market_returns = strategy_df['Market_Return'].dropna()
        
        if len(strategy_returns) == 0:
            print(f"  ⚠️  No strategy returns for {stock_name}")
            return None
        
        # Basic metrics
        total_strategy_return = (1 + strategy_returns).cumprod().iloc[-1] - 1
        total_market_return = (1 + market_returns).cumprod().iloc[-1] - 1
        
        strategy_sharpe = strategy_returns.mean() / strategy_returns.std() * np.sqrt(252) if strategy_returns.std() > 0 else 0
        market_sharpe = market_returns.mean() / market_returns.std() * np.sqrt(252) if market_returns.std() > 0 else 0
        
        # Profit factor
        profit_factor = calculate_profit_factor(strategy_returns)
        market_profit_factor = calculate_profit_factor(market_returns)
        
        # Monte Carlo test (reduced permutations for batch processing)
        actual_pf, random_pfs, p_value = monte_carlo_permutation_test(
            test_df, predictions, confidence_scores, n_permutations=500
        )
        
        # Calculate additional metrics
        win_rate = (strategy_returns > 0).mean()
        num_trades = strategy_df['Position'].diff().abs().sum() / 2
        
        results = {
            'stock': stock_name,
            'test_samples': len(test_df),
            'total_strategy_return': total_strategy_return,
            'total_market_return': total_market_return,
            'excess_return': total_strategy_return - total_market_return,
            'strategy_sharpe': strategy_sharpe,
            'market_sharpe': market_sharpe,
            'profit_factor': profit_factor,
            'market_profit_factor': market_profit_factor,
            'p_value': p_value,
            'win_rate': win_rate,
            'num_trades': num_trades,
            'prediction_accuracy': (predictions == test_df['Direction'].values[:len(predictions)]).mean(),
            'test_start': test_df['Date'].min(),
            'test_end': test_df['Date'].max()
        }
        
        print(f"  ✓ {stock_name}: Return={total_strategy_return:.3f}, PF={profit_factor:.3f}, p={p_value:.3f}")
        return results
        
    except Exception as e:
        print(f"  ❌ Error processing {stock_name}: {str(e)}")
        return None

def batch_backtest_all_stocks(data_dir="data/processed/stock_data/", 
                             models_dir="models/", 
                             results_dir="backtest_results/"):
    """Run backtests for all stocks with saved models"""
    
    # Create results directory
    os.makedirs(results_dir, exist_ok=True)
    
    # Find all data files
    data_files = glob.glob(os.path.join(data_dir, "*_daily_features.csv"))
    
    if not data_files:
        print(f"No data files found in {data_dir}")
        return
    
    print(f"Found {len(data_files)} data files")
    
    # Process each stock
    all_results = []
    successful = 0
    failed = 0
    
    for i, data_file in enumerate(data_files, 1):
        # Extract stock name
        filename = os.path.basename(data_file)
        stock_name = filename.replace('_daily_features.csv', '')
        
        # Check if corresponding model exists
        model_file = os.path.join(models_dir, f"{stock_name}_mr_hydra_model.pkl")
        
        if not os.path.exists(model_file):
            print(f"[{i}/{len(data_files)}] ❌ No model found for {stock_name}")
            failed += 1
            continue
        
        print(f"[{i}/{len(data_files)}] Processing {stock_name}...")
        
        # Run backtest
        result = run_single_backtest(data_file, model_file, stock_name)
        
        if result:
            all_results.append(result)
            successful += 1
        else:
            failed += 1
    
    if not all_results:
        print("No successful backtests completed!")
        return
    
    # Convert to DataFrame and save
    results_df = pd.DataFrame(all_results)
    results_file = os.path.join(results_dir, "batch_backtest_results.csv")
    results_df.to_csv(results_file, index=False)
    
    # Summary statistics
    print(f"\n{'='*80}")
    print("BATCH BACKTEST SUMMARY")
    print(f"{'='*80}")
    print(f"Total stocks processed: {len(data_files)}")
    print(f"Successful backtests: {successful}")
    print(f"Failed backtests: {failed}")
    
    # Performance summary
    print(f"\nPERFORMance STATISTICS:")
    print(f"Average Strategy Return: {results_df['total_strategy_return'].mean():.4f}")
    print(f"Average Market Return: {results_df['total_market_return'].mean():.4f}")
    print(f"Average Excess Return: {results_df['excess_return'].mean():.4f}")
    print(f"Average Profit Factor: {results_df['profit_factor'].mean():.4f}")
    print(f"Average Sharpe Ratio: {results_df['strategy_sharpe'].mean():.4f}")
    print(f"Average Win Rate: {results_df['win_rate'].mean():.4f}")
    print(f"Average P-value: {results_df['p_value'].mean():.4f}")
    
    # Statistical significance
    significant_stocks = results_df[results_df['p_value'] < 0.05]
    print(f"\nStatistically significant strategies (p < 0.05): {len(significant_stocks)}/{len(results_df)}")
    
    if len(significant_stocks) > 0:
        print(f"Significant stocks:")
        for _, row in significant_stocks.iterrows():
            print(f"  - {row['stock']}: Return={row['total_strategy_return']:.3f}, PF={row['profit_factor']:.3f}, p={row['p_value']:.4f}")
    
    # Top performers by return
    top_performers = results_df.nlargest(5, 'total_strategy_return')
    print(f"\nTop 5 performers by return:")
    for _, row in top_performers.iterrows():
        print(f"  - {row['stock']}: {row['total_strategy_return']:.4f} ({row['total_strategy_return']*100:.2f}%)")
    
    # Top performers by profit factor
    top_pf = results_df.nlargest(5, 'profit_factor')
    print(f"\nTop 5 performers by profit factor:")
    for _, row in top_pf.iterrows():
        print(f"  - {row['stock']}: {row['profit_factor']:.4f}")
    
    print(f"\nDetailed results saved to: {results_file}")
    
    # Create summary plots
    create_summary_plots(results_df, results_dir)
    
    return results_df

def create_summary_plots(results_df, results_dir):
    """Create summary plots for batch backtest results"""
    
    fig, ((ax1, ax2), (ax3, ax4)) = plt.subplots(2, 2, figsize=(15, 10))
    
    # Returns distribution
    ax1.hist(results_df['total_strategy_return'], bins=20, alpha=0.7, label='Strategy')
    ax1.hist(results_df['total_market_return'], bins=20, alpha=0.7, label='Market')
    ax1.set_title('Returns Distribution')
    ax1.set_xlabel('Total Return')
    ax1.legend()
    ax1.grid(True)
    
    # Profit Factor distribution
    ax2.hist(results_df['profit_factor'], bins=20, alpha=0.7)
    ax2.set_title('Profit Factor Distribution')
    ax2.set_xlabel('Profit Factor')
    ax2.grid(True)
    
    # P-value distribution
    ax3.hist(results_df['p_value'], bins=20, alpha=0.7)
    ax3.axvline(0.05, color='red', linestyle='--', label='p=0.05')
    ax3.set_title('P-value Distribution')
    ax3.set_xlabel('P-value')
    ax3.legend()
    ax3.grid(True)
    
    # Scatter: Return vs P-value
    colors = ['red' if p < 0.05 else 'blue' for p in results_df['p_value']]
    ax4.scatter(results_df['p_value'], results_df['total_strategy_return'], c=colors, alpha=0.6)
    ax4.axvline(0.05, color='red', linestyle='--', alpha=0.5)
    ax4.set_xlabel('P-value')
    ax4.set_ylabel('Total Strategy Return')
    ax4.set_title('Return vs Statistical Significance')
    ax4.grid(True)
    
    plt.tight_layout()
    plt.savefig(os.path.join(results_dir, 'batch_backtest_summary.png'), dpi=300, bbox_inches='tight')
    plt.show()

if __name__ == "__main__":
    # Run batch backtests
    results = batch_backtest_all_stocks(
        data_dir="data/processed/stock_data/",
        models_dir="models/",
        results_dir="backtest_results/"
    )
    
    if results is not None:
        print(f"\nBatch processing complete! Results saved to backtest_results/")