# Crypto Trading AI Agent Simulation v12
Phase 1: Feature Enrichment & Realistic Backtest (No dropna subset)

In [13]:
!pip install yfinance scikit-learn matplotlib pandas --quiet

In [14]:
import yfinance as yf
import pandas as pd
import numpy as np
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
from IPython.display import display
import warnings
warnings.filterwarnings('ignore')

# Parameters
symbols = ['BTC-USD']
period = '1y'
profit_target = 0.02    # 2%
stop_loss = 0.015       # 1.5%
confidence_threshold = 65
initial_capital = 10000  # starting capital
risk_pct = 0.01          # 1% of capital per trade
spread = 0.0005          # 0.05% round-trip spread
commission = 0.001       # 0.1% round-trip commission

In [15]:
import requests
import pandas as pd

def fetch_data(symbol='BTCUSDT', period=None, interval='1h', profit_target=2.0, limit=1000):
    # Binance API interval mapping
    interval_map = {
        '1h': '1h',
        '1d': '1d',
        '15m': '15m',
        '5m': '5m',
        '1m': '1m',
    }
    binance_interval = interval_map.get(interval, '1h')
    url = f"https://api.binance.com/api/v3/klines"
    params = {
        'symbol': symbol,
        'interval': binance_interval,
        'limit': limit
    }
    r = requests.get(url, params=params)
    data = r.json()
    if isinstance(data, dict) and data.get('code'):
        print("Failed to fetch data:", data.get('msg', 'Unknown error'))
        return None
    df = pd.DataFrame(data, columns=[
        'open_time', 'Open', 'High', 'Low', 'Close', 'Volume',
        'close_time', 'Quote_asset_volume', 'Number_of_trades',
        'Taker_buy_base', 'Taker_buy_quote', 'Ignore'
    ])
    df['open_time'] = pd.to_datetime(df['open_time'], unit='ms')
    df.set_index('open_time', inplace=True)
    for col in ['Open', 'High', 'Low', 'Close', 'Volume']:
        df[col] = df[col].astype(float)

    # Feature engineering (same as before)
    m = df['Close'].rolling(20).mean()
    s = df['Close'].rolling(20).std()
    df['Bollinger_Dist'] = (df['Close'] - m) / (s + 1e-9)
    df['EMA50'] = df['Close'].ewm(span=50).mean()
    df['EMA200'] = df['Close'].ewm(span=200).mean()
    df['EMA50_above_EMA200'] = (df['EMA50'] > df['EMA200']).astype(int)
    df['MACD_diff'] = df['Close'].ewm(span=12).mean() - df['Close'].ewm(span=26).mean()
    delta = df['Close'].diff()
    gain = delta.where(delta > 0, 0)
    loss = -delta.where(delta < 0, 0)
    avg_gain = gain.ewm(com=14-1, adjust=False).mean()
    avg_loss = loss.ewm(com=14-1, adjust=False).mean()
    rs = avg_gain / avg_loss
    df['RSI'] = 100 - (100 / (1 + rs))
    df['Volume_Spike'] = (df['Volume'] > df['Volume'].rolling(20).mean()).astype(int)
    df['Future_Return'] = df['Close'].shift(-1) / df['Close'] - 1
    df['Target'] = (df['Future_Return'] >= profit_target / 100).astype(int)
    df.dropna(inplace=True)
    return df

In [16]:
def run_simulation(
    symbols,
    profit_target,
    stop_loss,
    confidence_threshold,
    period,
    initial_capital,
    risk_pct,
    spread,
    commission
):
    results = []

    for symbol in symbols:
        df = fetch_data(symbol, period=period, profit_target=profit_target * 100)
        if df is None or df.empty:
            print(f"No data for {symbol}")
            continue

        features = ['RSI', 'MACD_diff', 'EMA50_above_EMA200', 'Volume_Spike', 'Bollinger_Dist']
        X = df[features]
        y = df['Target']

        model = RandomForestClassifier(n_estimators=100, max_depth=10, random_state=42)
        if y.nunique() > 1:
            X_train, _, y_train, _ = train_test_split(
                X, y, test_size=0.2, random_state=42, stratify=y
            )
            model.fit(X_train, y_train)
        else:
            model.fit(X, y)


        df['Confidence'] = model.predict_proba(X)[:, 1] * 100
        df['Buy_Signal'] = (df['Confidence'] >= confidence_threshold).astype(int)

        capital = float(initial_capital) # Initialize capital as float
        equity = [capital]
        trades = []
        position = None

        for idx in range(len(df) - 1):
            # Use .iloc for integer-based indexing
            buy_signal = df.iloc[idx]['Buy_Signal'].item() # Access the scalar value
            row = df.iloc[idx]
            nxt = df.iloc[idx + 1]

            if buy_signal == 1 and position is None:
                risk_amount = capital * risk_pct
                entry_price = row['Close'] * (1 + spread)
                stop_price = entry_price * (1 - stop_loss)
                risk_per_unit = entry_price - stop_price
                qty = risk_amount / (risk_per_unit + 1e-9)

                nxt_high = float(nxt['High'])
                nxt_low = float(nxt['Low'])
                
                if nxt_high >= entry_price * (1 + profit_target):
                    exit_price = entry_price * (1 + profit_target) * (1 - spread)
                    result = 'profit'
                elif nxt_low <= stop_price:
                    exit_price = stop_price * (1 - spread)
                    result = 'loss'
                else:
                    continue

                pnl = (exit_price - entry_price) * qty
                fee_cost = commission * (entry_price + exit_price) * qty
                net_pnl = pnl - fee_cost

                capital += net_pnl
                equity.append(capital)
                trades.append({
                    'entry_time': row.name,
                    'exit_time': nxt.name,
                    'entry_price': entry_price,
                    'exit_price': exit_price,
                    'qty': qty,
                    'net_pnl': net_pnl,
                    'result': result,
                    'confidence': row['Confidence']
                })
                position = None

        df_trades = pd.DataFrame(trades)
        total_trades = len(df_trades)
        win_rate = df_trades['result'].value_counts(normalize=True).get('profit', 0) * 100
        total_return = (capital / initial_capital - 1) * 100
        returns = df_trades['net_pnl'] / initial_capital
        sharpe = returns.mean() / returns.std() if len(returns) > 1 else np.nan
        peaks = np.maximum.accumulate(equity)
        drawdowns = (equity - peaks) / peaks
        max_dd = drawdowns.min()

        print(f"=== {symbol} ===")
        # Convert to float before formatting
        capital_float = float(capital)
        total_return_float = float(total_return)
        sharpe_float = float(sharpe)
        max_dd_float = float(max_dd)


        print(f"Final Capital: ${capital_float:.2f}")
        print(f"Return: {total_return_float:.2f}% | Trades: {total_trades} | Win Rate: {win_rate:.2f}%")
        print(f"Sharpe: {sharpe_float:.2f} | Max Drawdown: {max_dd_float:.2%}")

        plt.figure(figsize=(10, 4))
        plt.plot(equity, label='Equity Curve')
        plt.title(f'{symbol} Equity Over Trades')
        plt.ylabel('Capital ($)')
        plt.legend()
        plt.show()

        results.append({
            'symbol': symbol,
            'final_capital': capital,
            'total_return_pct': total_return,
            'total_trades': total_trades,
            'win_rate_pct': win_rate,
            'sharpe': sharpe,
            'max_drawdown_pct': max_dd
        })

    return pd.DataFrame(results)

In [17]:
# Run the enriched simulation
metrics = run_simulation(symbols, profit_target, stop_loss,
                         confidence_threshold, period,
                         initial_capital, risk_pct,
                         spread, commission)
display(metrics)

Failed to fetch data: Invalid symbol.
No data for BTC-USD


In [18]:
import requests
import pandas as pd

def fetch_btc_data(interval='1h', limit=1000):
    symbol = 'BTCUSDT'
    url = "https://api.binance.com/api/v3/klines"
    params = {
        'symbol': symbol,
        'interval': interval,
        'limit': limit
    }
    r = requests.get(url, params=params)
    data = r.json()
    if isinstance(data, dict) and data.get('code'):
        print("Failed to fetch data:", data.get('msg', 'Unknown error'))
        return None
    df = pd.DataFrame(data, columns=[
        'open_time', 'Open', 'High', 'Low', 'Close', 'Volume',
        'close_time', 'Quote_asset_volume', 'Number_of_trades',
        'Taker_buy_base', 'Taker_buy_quote', 'Ignore'
    ])
    df['open_time'] = pd.to_datetime(df['open_time'], unit='ms')
    df.set_index('open_time', inplace=True)
    for col in ['Open', 'High', 'Low', 'Close', 'Volume']:
        df[col] = df[col].astype(float)
    return df

# Example usage:
btc_df = fetch_btc_data()
if btc_df is not None:
    print(btc_df.head())
else:
    print("Failed to fetch BTCUSDT data.")

                          Open       High        Low      Close     Volume  \
open_time                                                                    
2025-06-20 20:00:00  103307.21  103828.00  103300.11  103693.50  712.51715   
2025-06-20 21:00:00  103693.51  103719.99  103353.06  103532.00  311.76605   
2025-06-20 22:00:00  103531.99  103532.00  102980.00  103107.11  435.14640   
2025-06-20 23:00:00  103107.12  103339.99  102963.67  103297.99  295.95141   
2025-06-21 00:00:00  103297.98  103339.99  103127.48  103248.07  202.14606   

                        close_time Quote_asset_volume  Number_of_trades  \
open_time                                                                 
2025-06-20 20:00:00  1750453199999  73807178.75044590            108512   
2025-06-20 21:00:00  1750456799999  32276280.35361600             72265   
2025-06-20 22:00:00  1750460399999  44906297.05060590            102543   
2025-06-20 23:00:00  1750463999999  30542243.14020910             61390   
202