In [2]:
import ta_functions as ta
import yfinance as yf
import pandas as pd
import numpy as np

from scipy.stats import norm

from datetime import datetime, timedelta
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import RandomForestRegressor
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split

from xgboost import XGBRegressor # Keep if you plan to use it, currently unused
from sklearn.pipeline import Pipeline # Keep if you plan to use it, currently unused

from tabulate import tabulate
import matplotlib.pyplot as plt
import seaborn as sns

import warnings
warnings.filterwarnings('ignore')

# --- Global Configurations ---
TICKERS = ["COIN", "TSLA", "GOOGL", "NVDA", "AAPL", "NKE", "SMCI", "XPEV", "NIO", "UNH", "XYZ"]
YEARS_OF_DATA = 2 # For initial data fetch for live predictions
PROFIT_TARGET = 0.05
STOP_LOSS = 0.04
FORWARD_DAYS = 14 # For prediction window
future_window = 14 # For backtesting trade evaluation

tolerance = 1.07 # Allows entry if current price is within 7% of predicted entry

# Time window for current predictions (latest data)
end_date = datetime.now()
start_date = end_date - timedelta(days=365 * YEARS_OF_DATA)

# Shared model components
FEATURES = ['RSI', 'RSI_SMA', 'SMA1', 'SMA2', 'SMA3',
            'SMA_Ratio', 'MACD', 'Signal_Line', 'Upper_Band', 'Lower_Band', 'Volume_MA20',
            '5_day_return', '10_day_return', 'Volatility', 'CCI', 'OBV', '+DI',
            '-DI', 'ADX', 'ATR', 'VWMA', 'VI+','VI-', 'KCu','KCl', 'STu', 'STl', 'Candlesticks', # Changed 'VI+' to 'VI-' to match calculation
            'Bear', 'Bull', 'vSpike', 'DD'] # Added 'Candlesticks' from add_technical_indicators

results = [] # To store live prediction results

# --- Data Acquisition and Feature Engineering Functions ---
def get_stock_data(ticker, start_date, end_date):
    """Downloads stock data from Yahoo Finance."""
    df = yf.download(ticker, start=start_date, end=end_date + timedelta(days=1), # +1 day to ensure end_date is included
                     interval='1d', auto_adjust=False, progress=False)
    df = df.reset_index()
    df['Date'] = pd.to_datetime(df['Date'])
    df.set_index('Date', inplace=True)
    df.columns = [col[0] if isinstance(col, tuple) else col for col in df.columns] # Handle multi-level columns
    df = df.dropna() # Drop rows with any NaN after initial download
    return df

def add_technical_indicators(df):
    """Adds various technical indicators to the DataFrame."""
    # Ensure all necessary columns exist before calculations
    if not all(col in df.columns for col in ['Open', 'High', 'Low', 'Close', 'Volume']):
        raise ValueError("DataFrame must contain 'Open', 'High', 'Low', 'Close', 'Volume' columns.")

    df['SMA1'] = df['Close'].rolling(window=12).mean()
    df['SMA2'] = df['Close'].rolling(window=24).mean()
    df['SMA3'] = df['Close'].rolling(window=52).mean()
    df['SMA_Ratio'] = df['SMA1'] / df['SMA2']
    
    df['Bear'] = (df['SMA1'] < df['SMA2']).astype(int)
    df['Bull'] = (df['SMA2'] < df['SMA1']).astype(int)
    
    df['RSI'] = ta.calculate_rsi(df)
    df['RSI_SMA'] = df['RSI'] / df['RSI'].rolling(14).mean() # Corrected to avoid division by zero if RSI_SMA is used as a feature
    
    ema12 = df['Close'].ewm(span=12, adjust=False).mean()
    ema26 = df['Close'].ewm(span=24, adjust=False).mean()
    df['MACD'] = ema12 - ema26
    df['Signal_Line'] = df['MACD'].ewm(span=9, adjust=False).mean()
    
    df['Upper_Band'] = df['SMA1'] + (2 * df['Close'].rolling(20).std())
    df['Lower_Band'] = df['SMA1'] - (2 * df['Close'].rolling(20).std())
    
    df['Volume_MA20'] = df['Volume'].rolling(window=20).mean()
    df['vSpike'] = (df['Volume'] > 2 * df['Volume_MA20']).astype(int)
    
    df['CCI'] = ta.calculate_cci(df)
    df['OBV'] = ta.calculate_obv(df)
    
    dmi_results = ta.calculate_dmi(df, n=14)
    if dmi_results is not None and len(dmi_results) == 3: # Ensure 3 columns are returned
        df[['+DI', '-DI', 'ADX']] = dmi_results
    else:
        # Handle cases where dmi might return unexpected output or NaNs
        df[['+DI', '-DI', 'ADX']] = np.nan, np.nan, np.nan # Fill with NaN if not calculated
        
    df['ATR'] = ta.calculate_atr(high=df.High, low=df.Low, close=df.Close)
    
    df['VWMA'] = ta.calculate_vwma(df)
    
    keltner_results = ta.calculate_keltner(df)
    if keltner_results is not None and len(keltner_results) == 3:
        df[['KCm', 'KCu', 'KCl']] = keltner_results
    else:
        df[['KCm', 'KCu', 'KCl']] = np.nan, np.nan, np.nan
        
    vortex_results = ta.calculate_vortex(df)
    if vortex_results is not None and len(vortex_results) == 2:
        df[['VI+', 'VI-']] = vortex_results
    else:
        df[['VI+', 'VI-']] = np.nan, np.nan
        
    supertrend_results = ta.calculate_supertrend(df)
    if supertrend_results is not None and len(supertrend_results) == 2:
        df[['STu', 'STl']] = supertrend_results
    else:
        df[['STu', 'STl']] = np.nan, np.nan
    
    # Add Candlestick Patterns
    df = ta.add_candlestickpatterns(df) # This function usually adds columns like 'Candlesticks'
    if 'Candlesticks' not in df.columns: # Ensure 'Candlesticks' column is created
        df['Candlesticks'] = 0 # Default to 0 if no patterns found or function fails

    df['DD'] = df['Close'].where(df['Close'] < df['Close'].shift(1)).std() # Daily Drawdown std

    df['5_day_return'] = df['Close'].pct_change(5)
    df['10_day_return'] = df['Close'].pct_change(10)
    df['Volatility'] = df['Close'].rolling(14).std()
    
    return df

# --- Target Variable Calculation Functions (Look-ahead for training, not for prediction features) ---
def compute_expected_return(df, forward_days=FORWARD_DAYS):
    """Calculates the maximum future return within a forward_days window."""
    df['Expected_Return'] = np.nan
    close_prices = df['Close'].values
    for i in range(len(close_prices) - forward_days):
        current_price = close_prices[i]
        future_max = np.nanmax(close_prices[i + 1:i + 1 + forward_days])
        expected_return = (future_max - current_price) / current_price
        df.iloc[i, df.columns.get_loc('Expected_Return')] = expected_return
    return df

def compute_expected_loss(df, forward_days=FORWARD_DAYS):
    """Calculates the minimum future return (max loss) within a forward_days window."""
    df['Expected_Loss'] = np.nan
    close_prices = df['Close'].values
    for i in range(len(close_prices) - forward_days):
        current_price = close_prices[i]
        future_min = np.nanmin(close_prices[i + 1:i + 1 + forward_days])
        expected_loss = (future_min - current_price) / current_price
        df.iloc[i, df.columns.get_loc('Expected_Loss')] = expected_loss
    return df

def compute_expected_entry(df, n=3):
    """
    Predicts a potential optimal entry price, e.g., the minimum low in the next 'n' days.
    This is for training the entry model.
    """
    # Shift(-n) looks 'n' days into the future for the current row
    df['Expected_Entry'] = df['Low'].rolling(window=n, min_periods=1).min().shift(-(n-1)) # shifted by -(n-1) to align with actual next day, e.g., n=3, shift(-2)
    return df

def label_tp_hit(df, window=FORWARD_DAYS, profit_target=PROFIT_TARGET, stop_loss=STOP_LOSS):
    """
    Labels each row:
    1 = TP hit before SL
    0 = SL hit before TP or neither hit (or if TP is hit but SL is hit earlier or simultaneously)
    """
    labels = []
    close_prices = df['Close'].values
    high_prices = df['High'].values # Use High for TP check
    low_prices = df['Low'].values   # Use Low for SL check

    for i in range(len(close_prices) - window):
        current_price = close_prices[i]
        tp_level = current_price * (1 + profit_target)
        sl_level = current_price * (1 - stop_loss) # Stop loss is a positive value, so 1 - SL

        # Future window for evaluation
        future_highs = high_prices[i + 1:i + 1 + window]
        future_lows = low_prices[i + 1:i + 1 + window]

        tp_hit_day = next((j for j, price in enumerate(future_highs) if price >= tp_level), None)
        sl_hit_day = next((j for j, price in enumerate(future_lows) if price <= sl_level), None)

        if tp_hit_day is not None and (sl_hit_day is None or tp_hit_day < sl_hit_day):
            labels.append(1) # TP hit before or without SL
        else:
            labels.append(0) # SL hit before TP, or neither hit, or SL hit first

    labels += [np.nan] * window
    df['TP_Hit_Label'] = labels
    return df

# --- Live Prediction Function ---
def make_live_predictions(tickers, start_date, end_date, features):
    """
    Generates live predictions for a list of tickers using models trained on
    the entire available historical data.
    """
    all_results = []
    
    for ticker in tickers:
        try:
            df = get_stock_data(ticker, start_date, end_date)
            if df.empty:
                print(f"Skipping {ticker}: No data downloaded.")
                continue

            df['Volume'] = pd.to_numeric(df['Volume'], errors='coerce')
            df = add_technical_indicators(df)
            
            # These are for training targets, we calculate them on the full dataset
            # up to the second to last day. The last day is for actual prediction.
            df_for_training = df.iloc[:-1].copy() # Exclude the very last day for target calculation

            df_for_training = compute_expected_return(df_for_training, FORWARD_DAYS)
            df_for_training = compute_expected_loss(df_for_training, FORWARD_DAYS)
            df_for_training = compute_expected_entry(df_for_training, 3) # Using n=3 for entry prediction

            # Label for classification model
            df_for_training = label_tp_hit(df_for_training, FORWARD_DAYS, PROFIT_TARGET, STOP_LOSS)

            # Prepare data for model training
            # We need to ensure FEATURES are available and valid in the training data
            required_features_and_labels = features + ['Expected_Return', 'Expected_Loss', 'Expected_Entry', 'TP_Hit_Label']
            df_model = df_for_training.dropna(subset=required_features_and_labels)
            
            if len(df_model) < 20:
                print(f"Skipping {ticker}: Not enough clean data for training ({len(df_model)} rows).")
                continue

            X = df_model[features]
            
            # --- Scalers for each model ---
            scaler_return = StandardScaler()
            scaler_loss = StandardScaler()
            scaler_entry = StandardScaler()
            scaler_class = StandardScaler()

            X_scaled_return = scaler_return.fit_transform(X)
            X_scaled_loss = scaler_loss.fit_transform(X)
            X_scaled_entry = scaler_entry.fit_transform(X)
            X_scaled_class = scaler_class.fit_transform(X) # Can reuse scaler_return if features are the same

            # --- Train Return Model ---
            y_return = df_model['Expected_Return']
            model_return = RandomForestRegressor(
                n_estimators=200, max_depth=10, min_samples_leaf=5, max_features='sqrt', ccp_alpha=0.01, random_state=42
            )
            model_return.fit(X_scaled_return, y_return)

            # --- Train Loss Model ---
            y_loss = df_model['Expected_Loss']
            model_loss = RandomForestRegressor(
                n_estimators=200, max_depth=10, min_samples_leaf=5, max_features='sqrt', ccp_alpha=0.01, random_state=42
            )
            model_loss.fit(X_scaled_loss, y_loss)

            # --- Train Entry Model ---
            y_entry = df_model['Expected_Entry']
            model_entry = RandomForestRegressor(
                n_estimators=200, max_depth=10, min_samples_leaf=5, max_features='sqrt', ccp_alpha=0.01, random_state=42
            )
            model_entry.fit(X_scaled_entry, y_entry)

            # --- Train Classification Model ---
            y_class = df_model['TP_Hit_Label'].astype(int)
            model_class = RandomForestClassifier(
                n_estimators=200, max_depth=10, min_samples_leaf=5, random_state=42
            )
            model_class.fit(X_scaled_class, y_class)

            # --- Make Predictions for the latest day ---
            latest = df.iloc[[-1]] # The actual latest row of the original df
            if latest[features].isnull().values.any():
                print(f"Skipping {ticker}: Latest data point has missing features.")
                continue
            
            # Scale latest features for prediction
            latest_scaled_return = scaler_return.transform(latest[features])
            latest_scaled_loss = scaler_loss.transform(latest[features])
            latest_scaled_entry = scaler_entry.transform(latest[features])
            latest_scaled_class = scaler_class.transform(latest[features])
            
            predicted_return = model_return.predict(latest_scaled_return)[0]
            predicted_loss = model_loss.predict(latest_scaled_loss)[0]
            predicted_entry = model_entry.predict(latest_scaled_entry)[0]
            
            tp_hit_prob = model_class.predict_proba(latest_scaled_class)[0][1] # Probability of TP hit

            current_price = latest['Close'].values[0]
            
            # Determine the 'best_entry' as the predicted entry
            best_entry = predicted_entry
            
            # Adjust TP/SL based on this best_entry
            predicted_tp = best_entry * (1 + predicted_return)
            
            # Ensure predicted_loss is negative for SL calculation
            # Use min(predicted_loss, -STOP_LOSS) to ensure the predicted loss is at least the predefined STOP_LOSS
            predicted_sl_value = predicted_loss # raw prediction from model
            final_sl_factor = min(predicted_sl_value, -STOP_LOSS) # Use the more conservative (larger absolute) stop loss
            predicted_sl = best_entry * (1 + final_sl_factor) # predicted_loss from model is already negative
            
            entry_discount_pct = ((best_entry - current_price) / current_price) * 100

            # Confidence score calculation
            # Avoid division by zero and handle cases where predicted_loss might be positive
            if predicted_loss < 0: # Ensure loss is indeed a loss (negative)
                rr = predicted_return / abs(predicted_loss)
            else:
                rr = 0 # No risk (or positive loss, which is wrong), so RR is zero or undefined
            
            confidence_score = tp_hit_prob * max(rr, 0) # Only consider positive risk-reward

            # Technical condition check for "Signal"
            sma1 = latest['SMA1'].values[0]
            sma2 = latest['SMA2'].values[0]
            rsi = latest['RSI'].values[0]

            signal = "⚠️ Neutral"
            # Using current_price relative to SMA for signal
            if (current_price > sma1 and sma1 > sma2 and rsi > 52):
                signal = "✅ Bullish"
            elif (current_price < sma1 and sma1 < sma2 and rsi < 48):
                 signal = "🔻 Bearish"
            
            # Risk assessment
            risk_label = "🟢 Low Risk" if abs(predicted_loss) <= STOP_LOSS else "🔴 High Risk"

            all_results.append({
                "Ticker": ticker,
                "Date": latest.index[-1].date(),
                "Current_Price": round(current_price, 2), # Changed from "Price" to "Current_Price"
                "Predicted_Entry": round(best_entry, 2), # Renamed "Entry" to "Predicted_Entry"
                "Entry_Diff_Pct": round(entry_discount_pct, 2), # Renamed "Entry%"
                "Predicted_Max_Return_Pct": round(predicted_return * 100, 2), # Renamed "Max (%)"
                "Predicted_TP_Price": round(predicted_tp, 2), # Renamed "TP"
                "Predicted_SL_Price": round(predicted_sl, 2), # Renamed "SL"
                "Predicted_Max_Loss_Pct": round(predicted_loss * 100, 2), # Renamed "Loss (%)"
                "Technical_Signal": signal, # Renamed "Signal"
                "Risk_Assessment": risk_label, # Renamed "Risk"
                "TP_Hit_Probability": round(tp_hit_prob * 100, 2), # Renamed "TP_Prob"
                "Confidence_Score": round(confidence_score * 100, 2), # Renamed "Confidence"
            })

        except Exception as e:
            print(f"Error processing {ticker} for live prediction: {e}")
            
    return pd.DataFrame(all_results)

# --- Backtesting Function (Revised for Rolling Window) ---
def train_and_backtest_revised(ticker="", initial_train_years=2, backtest_days=252, show_every_n=1):
    """
    Performs a rolling window backtest for a given ticker.
    initial_train_years: Number of years for the initial training set.
    backtest_days: Number of trading days to backtest (approx 252 days per year).
    show_every_n: For plotting, shows every Nth trade annotation to avoid clutter.
    """
    print(f"Starting backtest for {ticker}...")
    
    # 1. Get and prepare full data
    # Download more data than strictly needed to ensure enough training history
    end_date_full = datetime.now()
    # Ensure enough historical data for initial training and indicator calculation
    start_date_full = end_date_full - timedelta(days=365 * (initial_train_years + (backtest_days / 365) + 0.5)) # Added 0.5 years buffer
    
    df_full = get_stock_data(ticker, start_date_full, end_date_full)
    if df_full.empty:
        print(f"[{ticker}] No data downloaded for backtest.")
        return pd.DataFrame(), {}
    
    df_full['Volume'] = pd.to_numeric(df_full['Volume'], errors='coerce')
    df_full = add_technical_indicators(df_full)
    
    signals = [] # To store all generated signals (including no trades)

    # Determine the actual start of the backtesting period
    # Ensure there's at least initial_train_years worth of data before backtest_start_date
    min_data_points_for_initial_train = int(365 * initial_train_years) + 52 # approx days for 2 years + longest SMA window
    
    if len(df_full) <= min_data_points_for_initial_train + FORWARD_DAYS + 1: # +1 for current day, +FORWARD_DAYS for future window
        print(f"[{ticker}] Not enough data for backtest. Requires at least {min_data_points_for_initial_train + FORWARD_DAYS + 1} data points.")
        return pd.DataFrame(), {}

    # Iterate day by day through the backtesting period
    # The loop starts from where enough training data is available
    start_idx_for_backtest_loop = min_data_points_for_initial_train 
    
    for i in range(start_idx_for_backtest_loop, len(df_full) - FORWARD_DAYS): # Loop up to last day minus FORWARD_DAYS to allow future window
        current_date_idx = i
        
        # Define the training window (all data *before* current_date_idx)
        train_window_df = df_full.iloc[:current_date_idx].copy()
        
        # Define the current day for prediction
        current_day_df = df_full.iloc[current_date_idx:current_date_idx+1].copy()
        
        if current_day_df.empty or current_day_df[FEATURES].isnull().values.any():
            # If current day's features are incomplete, skip this day
            signals.append({
                'Date': df_full.index[current_date_idx], # Still record the date
                'Signal_Generated': False,
                'Result': "Skipped (Missing Features)"
            })
            continue

        # Compute future-based labels for the TRAINING DATA ONLY
        train_window_df = compute_expected_return(train_window_df, FORWARD_DAYS)
        train_window_df = compute_expected_loss(train_window_df, FORWARD_DAYS)
        train_window_df = compute_expected_entry(train_window_df, 3) # Use n=3 for entry

        # Label for classification model
        train_window_df = label_tp_hit(train_window_df, FORWARD_DAYS, PROFIT_TARGET, STOP_LOSS)

        # Drop NaNs after calculating all features and labels for the training window
        required_train_cols = FEATURES + ['Expected_Return', 'Expected_Loss', 'Expected_Entry', 'TP_Hit_Label']
        train_window_df_cleaned = train_window_df.dropna(subset=required_train_cols)

        if len(train_window_df_cleaned) < 50: # Ensure sufficient training data
            print(f"[{ticker}] Not enough clean training data at {df_full.index[current_date_idx].date()}. ({len(train_window_df_cleaned)} rows). Skipping.")
            signals.append({
                'Date': df_full.index[current_date_idx],
                'Signal_Generated': False,
                'Result': "Skipped (Not Enough Training Data)"
            })
            continue
        
        # Define features and targets for training
        X_train = train_window_df_cleaned[FEATURES]
        y_return = train_window_df_cleaned['Expected_Return']
        y_loss = train_window_df_cleaned['Expected_Loss']
        y_entry = train_window_df_cleaned['Expected_Entry']
        y_class = train_window_df_cleaned['TP_Hit_Label'].astype(int)

        # Initialize and train models
        # Using a Pipeline with StandardScaler for consistent scaling within the loop
        pipeline_return = Pipeline([('scaler', StandardScaler()), ('regressor', RandomForestRegressor(n_estimators=200, max_depth=7, min_samples_leaf=5, max_features='sqrt', ccp_alpha=0.01, random_state=42))])
        pipeline_loss = Pipeline([('scaler', StandardScaler()), ('regressor', RandomForestRegressor(n_estimators=200, max_depth=7, min_samples_leaf=5, max_features='sqrt', ccp_alpha=0.01, random_state=42))])
        pipeline_entry = Pipeline([('scaler', StandardScaler()), ('regressor', RandomForestRegressor(n_estimators=200, max_depth=7, min_samples_leaf=5, max_features='sqrt', ccp_alpha=0.01, random_state=42))])
        pipeline_class = Pipeline([('scaler', StandardScaler()), ('classifier', RandomForestClassifier(n_estimators=200, max_depth=10, min_samples_leaf=5, random_state=42))])

        pipeline_return.fit(X_train, y_return)
        pipeline_loss.fit(X_train, y_loss)
        pipeline_entry.fit(X_train, y_entry)
        pipeline_class.fit(X_train, y_class)

        # Predict for the current day (using only its features)
        pred_return = pipeline_return.predict(current_day_df[FEATURES])[0]
        pred_loss = pipeline_loss.predict(current_day_df[FEATURES])[0]
        pred_entry = pipeline_entry.predict(current_day_df[FEATURES])[0]
        tp_hit_prob = pipeline_class.predict_proba(current_day_df[FEATURES])[0][1]

        current_price = current_day_df['Close'].values[0] 
        date = current_day_df.index[0]

        # Use the predicted entry price for trade calculations
        entry_price_for_trade = pred_entry 
        
        # Basic sanity check for predicted entry price (e.g., must be somewhat realistic)
        today_low = current_day_df['Low'].values[0]
        if not np.isnan(today_low) and (entry_price_for_trade < today_low * 0.8 or entry_price_for_trade > today_low * 1.2):
             signals.append({
                'Date': date,
                'Signal_Generated': False,
                'Result': "Skipped (Unrealistic Predicted Entry)"
            })
             continue # Skip unreasonable entry predictions
        
        # Calculate TP/SL levels based on the predicted entry price
        # Ensure predicted_loss is a negative value for SL calculation
        sl_price = entry_price_for_trade * (1 + min(pred_loss, -STOP_LOSS)) # Take the min of predicted loss and -STOP_LOSS (e.g., if pred_loss is -0.03 and STOP_LOSS is 0.04, then -0.04 is used)
        tp_price = entry_price_for_trade * (1 + pred_return)

        # Look into the actual future (relative to the current_date_idx) for trade outcome
        future_window_data = df_full.iloc[current_date_idx + 1 : current_date_idx + 1 + FORWARD_DAYS].copy()
        
        # Check if we can actually get into the trade based on the current market price
        # This simulates whether the predicted entry price was achievable
        if current_price <= entry_price_for_trade * tolerance: 
            # Trade is entered
            exit_price = np.nan
            result = "No Exit"
            holding_days = 0

            if not future_window_data.empty:
                # Find the first day TP or SL is hit
                tp_hit_day_idx = None
                sl_hit_day_idx = None

                for k, (fut_date, fut_row) in enumerate(future_window_data.iterrows()):
                    if fut_row['High'] >= tp_price:
                        tp_hit_day_idx = k
                    if fut_row['Low'] <= sl_price:
                        sl_hit_day_idx = k
                    
                    if tp_hit_day_idx is not None or sl_hit_day_idx is not None:
                        break # Exit loop once either hit is detected

                if tp_hit_day_idx is not None and (sl_hit_day_idx is None or tp_hit_day_idx <= sl_hit_day_idx):
                    exit_price = tp_price 
                    result = "TP Hit"
                    holding_days = tp_hit_day_idx + 1
                elif sl_hit_day_idx is not None:
                    exit_price = sl_price
                    result = "SL Hit"
                    holding_days = sl_hit_day_idx + 1
                else: # No TP or SL hit within the window
                    exit_price = future_window_data['Close'].iloc[-1]
                    result = "EOD Exit"
                    holding_days = FORWARD_DAYS
            else: # If future_window_data is empty for some reason
                exit_price = current_price # No exit, effectively
                result = "No Future Data"
                holding_days = 0

            trade_return = (exit_price - entry_price_for_trade) / entry_price_for_trade if not pd.isna(exit_price) else np.nan

            signals.append({
                'Date': date,
                'Price_at_Signal': current_price,
                'Predicted_Entry': entry_price_for_trade,
                'TP': tp_price,
                'SL': sl_price,
                'Exit': exit_price,
                'Result': result,
                'Holding_Days': holding_days,
                'Return': trade_return,
                'Signal_Generated': True,
                'Predicted_Return': pred_return, # For internal check
                'Predicted_Loss': pred_loss,     # For internal check
                'TP_Hit_Prob': tp_hit_prob       # For internal check
            })
        else:
            # Signal generated, but trade not taken due to price not reaching predicted entry within tolerance
            signals.append({
                'Date': date,
                'Price_at_Signal': current_price,
                'Predicted_Entry': entry_price_for_trade,
                'TP': tp_price,
                'SL': sl_price,
                'Exit': np.nan,
                'Result': "No Trade (Entry Price Too High)",
                'Holding_Days': np.nan,
                'Return': np.nan,
                'Signal_Generated': False,
                'Predicted_Return': pred_return,
                'Predicted_Loss': pred_loss,
                'TP_Hit_Prob': tp_hit_prob
            })

    # Convert signals to DataFrame and filter for actual trades for statistics
    signals_df = pd.DataFrame(signals).set_index('Date')
    trades = signals_df[signals_df['Signal_Generated']].copy()
    trades = trades[trades['Return'].notna()] # Ensure return is calculated

    if len(trades) == 0:
        print(f"[{ticker}] No valid trades generated after backtesting and filtering.")
        return pd.DataFrame(), {}

    # 4. Calculate Statistics
    successful = trades[trades['Return'] > 0]
    failed = trades[trades['Return'] <= 0]
    
    # Avoid division by zero for metrics like Win Rate, Avg Win/Loss, Profit Factor
    total_trades = len(trades)
    num_successful = len(successful)
    num_failed = len(failed)

    win_rate = num_successful / total_trades if total_trades > 0 else 0
    avg_return = trades['Return'].mean()
    avg_win = successful['Return'].mean() if num_successful > 0 else 0
    avg_loss = failed['Return'].mean() if num_failed > 0 else 0 # Avg Loss should be negative
    
    # Median return/win/loss can be zero if no such trades occurred
    median_return = trades['Return'].median()
    median_win = successful['Return'].median() if num_successful > 0 else 0
    median_loss = failed['Return'].median() if num_failed > 0 else 0

    max_gain = trades['Return'].max()
    max_loss = trades['Return'].min()
    
    avg_holding_days = trades['Holding_Days'].mean()

    tp_hit_rate = (trades['Result'] == 'TP Hit').mean()
    sl_hit_rate = (trades['Result'] == 'SL Hit').mean()
    
    # Return/Risk should be positive, so abs(avg_loss)
    return_risk = -avg_win / avg_loss if avg_loss < 0 else (float('inf') if avg_win > 0 else 0)
    
    # Profit Factor
    total_gross_profit = successful['Return'].sum()
    total_gross_loss = abs(failed['Return'].sum()) # Sum of absolute losses
    profit_factor = total_gross_profit / total_gross_loss if total_gross_loss > 0 else float('inf')

    stats = {
        'Total Trades': total_trades,
        'Win Rate': win_rate,
        'Avg Return': avg_return,
        'Median Return': median_return,
        'Avg Win': avg_win,
        'Median Win': median_win,
        'Max Gain': max_gain,
        'Avg Loss': avg_loss,
        'Median Loss': median_loss,
        'Max Loss': max_loss,
        'Avg Holding Days': avg_holding_days,
        'TP Hit Rate': tp_hit_rate,
        'SL Hit Rate': sl_hit_rate,
        'Return/Risk': return_risk,
        'Profit Factor': profit_factor
    }

    # 5. Enhanced Visualization
    plt.figure(figsize=(16, 12))
    grid = plt.GridSpec(4, 1, height_ratios=[3, 1, 1, 1])
    
    # Price Chart
    ax1 = plt.subplot(grid[0])
    # Plot the full dataframe data (including training portion)
    plt.plot(df_full.index, df_full['Close'], label=ticker, color='grey', alpha=0.6)
    plt.plot(df_full.index, df_full['SMA1'], label='MA12', color='orange', alpha=0.6)
    plt.plot(df_full.index, df_full['SMA2'], label='MA24', color='red', alpha=0.6)
    
    plt.fill_between(df_full.index, df_full['SMA1'], df_full['SMA2'],
                     where=(df_full['SMA1'] > df_full['SMA2']), interpolate=True,
                     color='limegreen', alpha=0.2, label='Bullish', zorder=0)
    
    plt.fill_between(df_full.index, df_full['SMA1'], df_full['SMA2'],
                     where=(df_full['SMA1'] < df_full['SMA2']), interpolate=True,
                     color='tomato', alpha=0.2, label='Bearish', zorder=0)
    
    # Plot actual trades (filtered for Signal_Generated=True)
    sample_trades_to_plot = trades.iloc[::show_every_n] # Sample to avoid clutter
    
    # Entry points (Predicted_Entry is the target price)
    plt.scatter(sample_trades_to_plot.index, sample_trades_to_plot['Predicted_Entry'], color='green', 
                marker='^', s=50, label='Predicted Entry', zorder=3, alpha = 0.7)
    
    # Exit points (color by result)
    colors = {'TP Hit':'blue', 'SL Hit':'red', 'EOD Exit':'gray'}
    for res_type, group in sample_trades_to_plot.groupby('Result'):
        if res_type in colors: # Only plot known result types
            plt.scatter(group.index, group['Exit'], 
                        color=colors[res_type], marker='o', 
                        s=50, label=f'{res_type}', zorder=2, alpha = 0.7)
    
    # Annotations for sampled trades
    for date, row in sample_trades_to_plot.iterrows():
        # Only annotate if Exit price is not NaN
        if not pd.isna(row['Exit']):
            if row['Return'] > 0:  # Winning trade
                plt.annotate(f"TP: {row['TP']:.1f}\n({row['Return']:.1%})",
                             (date, row['TP']), 
                             xytext=(0,10), textcoords='offset points', ha='center', 
                             va='bottom', color='green', fontsize=8,
                             bbox=dict(boxstyle='round,pad=0.2', facecolor='white', alpha=0.8, edgecolor='none'))
            else:  # Losing trade
                plt.annotate(f"SL: {row['SL']:.1f}\n({row['Return']:.1%})",
                             (date, row['SL']), 
                             xytext=(0,-10), textcoords='offset points', ha='center', 
                             va='top', color='red', fontsize=8,
                             bbox=dict(boxstyle='round,pad=0.2', facecolor='white', alpha=0.8, edgecolor='none'))

    ax1.text(0.5, 0.5, f'{ticker}', transform=ax1.transAxes, 
             fontsize=50, color='grey', alpha=0.2,
             horizontalalignment='center', verticalalignment='center',
             rotation=0, weight='bold', style='italic')
    
    plt.title(f"{ticker} Backtest Results | {df_full.index[start_idx_for_backtest_loop].date()} to {df_full.index[-1].date()}")
    plt.legend()
    plt.grid(True)
    
    # Add RSI with curved fill
    axRSI = plt.subplot(grid[1])
    plt.plot(df_full.index, df_full['RSI'], label='RSI', color='grey', alpha=0.6, zorder=2)
    
    plt.fill_between(df_full.index, df_full['RSI'], 50, where=(df_full['RSI'] >= 50),
                     interpolate=True, color='limegreen', alpha=0.2, zorder=1)
    
    plt.fill_between(df_full.index, df_full['RSI'], 50, where=(df_full['RSI'] <= 50),
                     interpolate=True, color='tomato', alpha=0.2, zorder=1)
    
    plt.axhline(50, color='red', linestyle='-', alpha=0.2, zorder=0)
    plt.legend()
    plt.grid(False)
    plt.ylabel('RSI')
            
    # Returns Distribution
    ax2 = plt.subplot(grid[2])
    plt.hist(trades['Return'], bins=20, color='skyblue', edgecolor='black')
    plt.axvline(x=0, color='red', linestyle='--')
    plt.title('Returns Distribution')
    plt.xlabel(f'{FORWARD_DAYS}-day Return')
    plt.ylabel('Frequency')
    plt.grid(True)
    
    # Cumulative Returns
    CAPITAL = 1000
    ax3 = plt.subplot(grid[3])
    # Ensure trades are sorted by date for cumulative returns
    cumulative_returns = (1 + trades.sort_index()['Return']).cumprod()
    (cumulative_returns * CAPITAL).plot(color='green', label='Strategy')
    plt.axhline(y=CAPITAL, color='red', linestyle='--')
    plt.title('Cumulative Returns')
    plt.ylabel(f'Growth of ${CAPITAL}')
    plt.grid(True)
    plt.yscale("log")  # Use log scale for better visualization of compounded returns
    plt.tight_layout()
    plt.show()
    
    # 6. Print Statistics
    print(f"\n=== {ticker} Backtest Results ===")
    if not trades.empty:
        print(f"Backtest Period: {trades.index[0].date()} to {trades.index[-1].date()}")
    else:
        print("No trades executed in the backtest period.")
    print(f"Total Trades: {stats['Total Trades']}")
    print(f"Win Rate: {stats['Win Rate']:.1%}")
    print(f"Avg Return: {stats['Avg Return']:.2%}")
    print(f"Avg Win: {stats['Avg Win']:.2%}")
    print(f"Avg Loss: {stats['Avg Loss']:.2%}")
    print(f"Median Return: {stats['Median Return']:.2%}")
    print(f"Avg Holding Days: {stats['Avg Holding Days']:.1f}")
    print(f"TP Hit Rate: {stats['TP Hit Rate']:.1%}")
    print(f"SL Hit Rate: {stats['SL Hit Rate']:.1%}")
    print(f"Max Gain: {stats['Max Gain']:.2%}")
    print(f"Max Loss: {stats['Max Loss']:.2%}")
    print(f"Return/Risk Ratio: {stats['Return/Risk']:.2f}")
    print(f"Profit Factor: {stats['Profit Factor']:.2f}")
    print(f"\nAvg Win/Loss {stats['Avg Win']:.2%} to {stats['Avg Loss']:.2%}")
    return trades, stats

# --- Main Execution Block ---

# 1. Run Live Predictions
print("--- Running Live Predictions ---")
live_predictions_df = make_live_predictions(TICKERS, start_date, end_date, FEATURES)

print("\n=== Live Predictions Summary ===")
if not live_predictions_df.empty:
    print(tabulate(live_predictions_df.set_index('Ticker'), headers='keys', tablefmt='psql', floatfmt=".2f"))

    # Plot Live Predictions
    df_plot = live_predictions_df.copy()
    fig, ax1 = plt.subplots(figsize=(12, 6), dpi=100)

    sns.barplot(x="Ticker", y="Predicted_Max_Return_Pct", data=df_plot, palette="Spectral", ax=ax1)
    ax1.set_ylabel('Predicted Max Return (%)', fontsize=12)
    ax1.set_xlabel('Ticker', fontsize=12)
    ax1.tick_params(axis='x', rotation=45)
    ax1.grid(True, axis='y', linestyle='--', alpha=0.7)

    ax2 = ax1.twinx()
    sns.lineplot(x="Ticker", y="Predicted_Max_Loss_Pct", data=df_plot, color='red', marker='o', 
                 ax=ax2, linewidth=2, markersize=8, label='Predicted Max Loss')
    ax2.set_ylabel('Predicted Max Loss (%)', fontsize=12, color='red')
    ax2.tick_params(axis='y', labelcolor='red')
    ax2.invert_yaxis()
    ax2.legend(loc='upper right')

    for i, (_, row) in enumerate(df_plot.iterrows()):
        # Values for plotting (ensure they are numerical)
        max_return_val = row["Predicted_Max_Return_Pct"]
        loss_val = row["Predicted_Max_Loss_Pct"]
        
        fcolor = 'green' if row.Technical_Signal == "✅ Bullish" else ('red' if row.Technical_Signal == "🔻 Bearish" else 'yellow')
        prob_color = 'green' if (row.Confidence_Score > 40 and row.TP_Hit_Probability > 40) else 'white'
                
        ax1.text(i, max_return_val + 0.5, f'{max_return_val:.1f}%', ha='center', va='bottom', fontsize=9)
        ax2.text(i, loss_val - 0.5, f'{loss_val:.1f}%', ha='center', va='top', color='red', fontsize=9)
        
        vertical_offset = 5 # Base offset in percentage points
        off1 = vertical_offset + 3 # Additional space for Entry box
        off2 = vertical_offset # TP/SL at base offset

        ax1.text(i, -off2,
                 f'Cur: ${row["Current_Price"]:.1f}\nPred Entry: ${row["Predicted_Entry"]:.1f}\n{row["Entry_Diff_Pct"]:.1f}%\n{row["Technical_Signal"]}',
                 ha='center', va='top', fontsize=8, 
                 bbox=dict(facecolor=fcolor, alpha=0.1, linewidth=0.3))
        
        ax1.text(i, -off1,
                 f'TP: ${row["Predicted_TP_Price"]:.1f}\nSL: ${row["Predicted_SL_Price"]:.1f}\n\nProb: {row["TP_Hit_Probability"]:.0f}\nConf: {row["Confidence_Score"]:.0f}', 
                 ha='center', va='top', fontsize=8, 
                 bbox=dict(facecolor=prob_color, alpha=0.1, linewidth=0.3))

    textbox = AnchoredText(
        "Hint: Buy closer to predicted Entry price for better risk-reward.",
        loc='lower left', frameon=True, borderpad=1.5,
        prop=dict(size=10, color='gray', weight='bold')
    )
    ax1.add_artist(textbox)
    textbox.patch.set_facecolor('honeydew')
    textbox.patch.set_edgecolor('darkgreen')
    textbox.patch.set_alpha(0.8)

    plt.title("ML Predictions (Return/Loss %)", fontsize=16, pad=20)
    plt.tight_layout()
    plt.subplots_adjust(bottom=0.3)
    plt.show()
else:
    print("No live predictions generated.")


# 2. Run Backtests
print("\n--- Running Backtests ---")
all_stats = []

for tick in TICKERS:
    # Use the revised backtesting function
    trades, stats = train_and_backtest_revised(tick, initial_train_years=2, backtest_days=252*1, show_every_n=10) # show every 10th trade on plot
    
    if trades.empty:
        print(f"{tick} has no trades after filtering in backtest.\n")
        continue
    
    stats_row = {
        'Ticker': tick,
        'Total Trades': stats['Total Trades'], # Added total trades to summary
        'Win Rate': stats['Win Rate'],
        'TP Hit Rate': stats['TP Hit Rate'],
        'SL Hit Rate': stats['SL Hit Rate'],
        'Avg Return': stats['Avg Return'],
        'Avg Win': stats['Avg Win'],
        'Avg Loss': stats['Avg Loss'],
        'Profit Factor': stats['Profit Factor'],
        'Avg Holding Days': stats['Avg Holding Days']
    }
    all_stats.append(stats_row)

summary_df = pd.DataFrame(all_stats)
summary_df = summary_df.sort_values(by='Profit Factor', ascending=False)

# Format for display
summary_df['Win Rate'] = (summary_df['Win Rate'] * 100).round(1).astype(str) + '%'
summary_df['TP Hit Rate'] = (summary_df['TP Hit Rate'] * 100).round(1).astype(str) + '%'
summary_df['SL Hit Rate'] = (summary_df['SL Hit Rate'] * 100).round(1).astype(str) + '%'
summary_df['Avg Return'] = (summary_df['Avg Return'] * 100).round(2).astype(str) + '%'
summary_df['Avg Win'] = (summary_df['Avg Win'] * 100).round(2).astype(str) + '%'
summary_df['Avg Loss'] = (summary_df['Avg Loss'] * 100).round(2).astype(str) + '%'
summary_df['Profit Factor'] = summary_df['Profit Factor'].round(2).astype(str)
summary_df['Avg Holding Days'] = summary_df['Avg Holding Days'].round(1).astype(str)

print("\n=== Ticker Backtest Stats Summary ===")
print(tabulate(summary_df, headers='keys', tablefmt='psql', showindex=False))

--- Running Live Predictions ---
Skipping COIN: Not enough clean data for training (0 rows).
Skipping TSLA: Not enough clean data for training (0 rows).
Skipping GOOGL: Not enough clean data for training (0 rows).
Skipping NVDA: Not enough clean data for training (0 rows).
Skipping AAPL: Not enough clean data for training (0 rows).
Skipping NKE: Not enough clean data for training (0 rows).
Skipping SMCI: Not enough clean data for training (0 rows).
Skipping XPEV: Not enough clean data for training (0 rows).
Skipping NIO: Not enough clean data for training (0 rows).
Skipping UNH: Not enough clean data for training (0 rows).
Skipping XYZ: Not enough clean data for training (0 rows).

=== Live Predictions Summary ===
No live predictions generated.

--- Running Backtests ---
Starting backtest for COIN...


KeyError: 'Return'