In [None]:
import ccxt
import time
import pandas as pd
import numpy as np
from datetime import datetime, timezone
from ta.momentum import RSIIndicator
from ta.trend import MACD
from ta.volatility import BollingerBands, AverageTrueRange
from tensorflow.keras.models import load_model
import joblib
from collections import Counter
import warnings

# Suppress minor warnings for cleaner output
warnings.filterwarnings("ignore", category=FutureWarning)
warnings.filterwarnings("ignore", category=UserWarning)

# ----------------------------
# Load saved models and preprocessors
# ----------------------------
# Ensure these files are present in the directory
try:
    forest_model = joblib.load('forest_model.pkl')
    xgb_model = joblib.load('xgb_model.pkl')
    label_encoder = joblib.load('label_encoder.pkl')
    scaler_tabular = joblib.load('scaler_tabular.pkl')
    scaler_long = joblib.load('scaler_long.pkl')
    # Use compile=False when loading a model only for inference
    neural_model = load_model('neural_model.keras', compile=False)
except FileNotFoundError as e:
    print(f"Error: One or more model files not found. Please ensure all required files are in the directory. Missing: {e}")
    exit()

# ----------------------------
# Define Activity Helpers (DUMMY IMPLEMENTATION)
# -> REPLACE THESE DUMMIES with your actual logic from utils.py
#    AFTER ENSURING they DO NOT reference 'next_high' or 'next_low'.
# ----------------------------

def buy_col(row):
    """Placeholder for buy signal strength."""
    # Example: Buy when RSI is low
    if 'rsi_6' in row and row['rsi_6'] < 30:
        return 1
    return 0

def sell_col(row):
    """Placeholder for sell signal strength."""
    # Example: Sell when RSI is high
    if 'rsi_6' in row and row['rsi_6'] > 70:
        return 1
    return 0

def define_activity(row):
    """Placeholder for final trading activity."""
    if row['buy_index'] > row['sell_index']:
        return 'BUY'
    elif row['sell_index'] > row['buy_index']:
        return 'SELL'
    return 'HOLD'


# ----------------------------
# Configuration and Helper Functions
# ----------------------------
binance = ccxt.binance()
symbol = 'BTC/USDT'
timeframe = '15m'
limit_historical = 200 # Sufficient history for SMA_50 and other indicators
prediction_interval_sec = 5 * 60 # Check price and re-predict every 10 seconds

# Helper to calculate the start timestamp of the current candle
def get_current_candle_start_ms(tf):
    now_ms = int(datetime.now(timezone.utc).timestamp() * 1000)

    # Simple parsing of timeframe string (e.g., '15m' -> 15 minutes)
    unit = tf[-1]
    value = int(tf[:-1])

    if unit == 'm':
        interval_ms = value * 60 * 1000
    elif unit == 'h':
        interval_ms = value * 60 * 60 * 1000
    else:
        raise ValueError(f"Timeframe {tf} not supported for calculation.")

    # Calculate the start time of the current interval
    start_ms = now_ms - (now_ms % interval_ms)
    return start_ms

# ----------------------------
# 1. Fetch Historical Context (Closed Candles Only)
# ----------------------------
since_dt = datetime.now(timezone.utc) - pd.Timedelta(days=7)
since = int(since_dt.timestamp() * 1000)

all_candles = []
while True:
    try:
        # Fetch closed candles up to the current moment
        candles = binance.fetch_ohlcv(symbol, timeframe=timeframe, since=since, limit=limit_historical)
        if not candles:
            break
        all_candles.extend(candles)
        since = candles[-1][0] + 1
        time.sleep(0.1)
        if len(candles) < limit_historical:
             break
    except Exception as e:
        print(f"Error fetching historical data: {e}. Retrying in 5 seconds.")
        time.sleep(5)
        break

historical_df = pd.DataFrame(all_candles, columns=['timestamp', 'open', 'high', 'low', 'close', 'volume'])
historical_df['timestamp'] = pd.to_datetime(historical_df['timestamp'], unit='ms')
historical_df = historical_df.tail(limit_historical)


# =========================================================
# LIVE PREDICTION LOOP
# =========================================================
print("="*60)
print(f"🚀 Starting Real-Time Prediction for {symbol} / {timeframe}")
print(f"   Checking price and re-predicting every {prediction_interval_sec} seconds...")
print("   Press Ctrl+C to stop.")
print("="*60)

while True:
    try:
        # --- 2. Get Live Price and Construct Forming Candle Data ---

        # Get the latest closed candle's data
        last_closed_candle = historical_df.iloc[-1]

        # 1. Get live price
        ticker = binance.fetch_ticker(symbol)
        live_price = ticker['last']

        # 2. Determine the start of the current forming candle
        current_candle_start_ms = get_current_candle_start_ms(timeframe)
        current_candle_start_dt = pd.to_datetime(current_candle_start_ms, unit='ms')

        # 3. Synthesize the row for the currently forming candle
        current_open = last_closed_candle['close']

        # High/Low approximation: The highest/lowest seen so far in this interval is tracked
        # but for simplicity using current price.
        current_high = max(live_price, current_open)
        current_low = min(live_price, current_open)

        # Volume placeholder
        current_volume = 0

        # Create the new row
        new_row = pd.DataFrame({
            'timestamp': [current_candle_start_dt],
            'open': [current_open],
            'high': [current_high],
            'low': [current_low],
            'close': [live_price],
            'volume': [current_volume]
        })

        # --- 3. Combine Data and Recalculate Features ---

        df = pd.concat([historical_df, new_row]).drop_duplicates(subset=['timestamp'], keep='last').sort_values('timestamp').reset_index(drop=True)

        # ----------------------------
        # Feature engineering (Must be re-run on the entire df every time)
        # ----------------------------
        df['rsi_6'] = RSIIndicator(df['close'], window=6).rsi()
        df['rsi_12'] = RSIIndicator(df['close'], window=12).rsi()

        macd_indicator = MACD(close=df['close'], window_slow=26, window_fast=12, window_sign=9)
        df['macd'] = macd_indicator.macd()
        df['macd_signal'] = macd_indicator.macd_signal()
        df['macd_hist'] = macd_indicator.macd_diff()

        df['ema_21'] = df['close'].ewm(span=21, adjust=False).mean()
        df['sma_50'] = df['close'].rolling(window=50).mean()
        df['ema/sma crossover'] = df['ema_21'] - df['sma_50']
        df['trends with sma'] = df['close'] / df['sma_50'] - 1

        # NOTE: Removed df['next_high'] and df['next_low'] as they are future-looking targets.

        df['price change'] = np.log(df['close'] / df['close'].shift(1))

        # Re-apply utility functions using only current candle features
        df['buy_index'] = df.apply(buy_col, axis=1)
        df['sell_index'] = df.apply(sell_col, axis=1)
        # Assuming your training defined a 'signal' column previously, or use a placeholder
        scale = 1 # Placeholder if 'signal' is not a column
        df['signal_scaled'] = 0.5 * (np.tanh((df['buy_index'] - df['sell_index']) / scale) + 1)
        df['activity'] = df.apply(define_activity, axis=1)

        bollinger = BollingerBands(close=df['close'], window=20, window_dev=2)
        df['bollinger_hband'] = bollinger.bollinger_hband()
        df['bollinger_lband'] = bollinger.bollinger_lband()
        df['bollinger_mavg'] = bollinger.bollinger_mavg()
        df['bollinger_bandwidth'] = bollinger.bollinger_wband()

        atr = AverageTrueRange(high=df['high'], low=df['low'], close=df['close'], window=14)
        df['atr'] = atr.average_true_range()

        df['volume_change'] = df['volume'].pct_change()
        df['volume_sma_10'] = df['volume'].rolling(window=10).mean()

        df.dropna(inplace=True)
        df.set_index('timestamp', inplace=True)

        # --- 4. Select features and Predict on the Latest Row ---

        if df.empty:
            print("Dataframe is empty after dropping NaNs. Waiting for more data.")
            time.sleep(prediction_interval_sec)
            continue

        # Explicitly exclude the target/utility columns and the old future-looking columns from the feature set
        feature_cols = [c for c in df.columns if c not in ['date', 'next_high', 'next_low', 'buy_index', 'sell_index', 'signal', 'signal_scaled', 'activity']]
        X = df[feature_cols]
        X_latest = X.iloc[[-1]]

        # Scale features
        X_tabular_scaled = scaler_tabular.transform(X_latest)
        X_long_scaled = scaler_long.transform(X_latest)

        # Get probabilities
        proba_xgb = xgb_model.predict_proba(X_tabular_scaled)
        proba_nn = neural_model.predict(X_long_scaled)

        # Get predicted classes
        pred_rf = label_encoder.inverse_transform(forest_model.predict(X_tabular_scaled))
        pred_xgb = label_encoder.inverse_transform(np.argmax(proba_xgb, axis=1))
        pred_nn = label_encoder.inverse_transform(np.argmax(proba_nn, axis=1))

        # Majority voting
        votes = [pred_rf[0], pred_xgb[0], pred_nn[0]]
        voted_pred = Counter(votes).most_common(1)[0][0]

        # Prepare for printing probabilities
        class_names = label_encoder.classes_

        # --- 5. Print Results ---

        print("\n" + "—"*50)
        print(f"PREDICTING for CANDLE: {timeframe} / {symbol}")
        print(f"  Current UTC Time:  {datetime.now(timezone.utc).strftime('%Y-%m-%d %H:%M:%S')}")
        print(f"  Candle Start Time: {current_candle_start_dt.strftime('%H:%M:%S')} (Forming)")
        print(f"  Current Price:   {live_price:.2f} USDT")
        print("—"*50)

        print(f"RandomForest Prediction: {pred_rf[0]}")

        print("\n— XGBoost Results —")
        print(f"XGBoost Prediction: {pred_xgb[0]}")
        print("XGBoost Probabilities:")
        for i, name in enumerate(class_names):
            print(f"  {name}: {proba_xgb[0][i]:.4f}")

        print("\n— NeuralNet Results —")
        print(f"NeuralNet Prediction: {pred_nn[0]}")
        print("NeuralNet Probabilities:")
        for i, name in enumerate(class_names):
            print(f"  {name}: {proba_nn[0][i]:.4f}")

        print("\n" + "="*50)
        print(f"🚨 MAJORITY VOTED SIGNAL: {voted_pred} 🚨")
        print("="*50)

        # ----------------------------
        # 6. Wait for the next prediction interval
        # ----------------------------
        time.sleep(prediction_interval_sec)

    except KeyboardInterrupt:
        print("\nPrediction loop stopped by user.")
        break
    except Exception as e:
        print(f"\nAn error occurred in the prediction loop: {e}")
        time.sleep(prediction_interval_sec * 3)

🚀 Starting Real-Time Prediction for BTC/USDT / 15m
   Checking price and re-predicting every 300 seconds...
   Press Ctrl+C to stop.
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 74ms/step

——————————————————————————————————————————————————
PREDICTING for CANDLE: 15m / BTC/USDT
  Current UTC Time:  2025-10-13 11:53:33
  Candle Start Time: 11:45:00 (Forming)
  Current Price:   114149.99 USDT
——————————————————————————————————————————————————
RandomForest Prediction: Buy

— XGBoost Results —
XGBoost Prediction: Buy
XGBoost Probabilities:
  Buy: 0.5090
  Sell: 0.4910

— NeuralNet Results —
NeuralNet Prediction: Buy
NeuralNet Probabilities:
  Buy: 0.6284
  Sell: 0.3716

🚨 MAJORITY VOTED SIGNAL: Buy 🚨
