# Implimeting ML model into trade strategy

In [35]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from ta import momentum, volatility, trend
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.utils import resample

In [37]:
import asyncio
import os
import pandas as pd
import numpy as np
from dotenv import load_dotenv
from alpaca.data.live import StockDataStream
from alpaca.trading.client import TradingClient
from alpaca.trading.requests import MarketOrderRequest
from alpaca.trading.enums import OrderSide, TimeInForce
from alpaca.data.enums import DataFeed
from alpaca.data.timeframe import TimeFrame
import matplotlib.pyplot as plt
from datetime import datetime
from alpaca.data.historical import StockHistoricalDataClient
from alpaca.data.requests import StockBarsRequest

# Load environment variables
load_dotenv()  # Load .env file

API_KEY = os.getenv("ALPACA_API_KEY")
API_SECRET = os.getenv("ALPACA_SECRET_KEY")

# Initialize TradingClient
Trading_Client = TradingClient(API_KEY, API_SECRET, paper=True)  # paper=True for paper trading
data_client = StockHistoricalDataClient(API_KEY, API_SECRET)
SYMBOL = "MSFT"

In [41]:
# === Parameters ===
STOP_LOSS_PCT = 0.8
TAKE_PROFIT_PCT = 1.5
LOOKAHEAD_BARS = 30
BREAKOUT_WINDOW = 60
MIN_BREAKOUT_RATIO = 0.0001  # 0.01% above recent high

# === Load historical data ===
print(f"Fetching historical data for {SYMBOL}...")
data_client = StockHistoricalDataClient(API_KEY, API_SECRET)

request = StockBarsRequest(
    symbol_or_symbols=[SYMBOL],
    timeframe=TimeFrame.Minute,
    start=datetime(2024, 1, 1),
    end=datetime(2024, 12, 30),
    adjustment='all'
)
bars = data_client.get_stock_bars(request).df

# === Clean dataframe ===
if isinstance(bars.index, pd.MultiIndex):
    df = bars.xs(SYMBOL, level='symbol').reset_index()
else:
    df = bars.reset_index()

df['timestamp'] = pd.to_datetime(df['timestamp'])
df['date'] = df['timestamp'].dt.date

# === Feature engineering ===
print("Generating features...")
df['rsi'] = momentum.RSIIndicator(df['close'], window=14).rsi()
df['atr'] = volatility.AverageTrueRange(df['high'], df['low'], df['close'], window=14).average_true_range()
df['macd_diff'] = trend.MACD(df['close']).macd_diff()
df['vol_rolling'] = df['volume'].rolling(30).mean()
df['atr_ratio'] = df['atr'] / df['close']

# Daily trend using SMA
df['daily_close'] = df.groupby('date')['close'].transform('last')
df['sma_trend'] = df['daily_close'].rolling(20).mean()
df['daily_trend'] = np.where(df['daily_close'] > df['sma_trend'], 'up', 'down')
df['trend_num'] = df['daily_trend'].map({'up': 1, 'down': -1})

# Breakout features
df['recent_high'] = df['high'].rolling(BREAKOUT_WINDOW).max()
df['recent_low'] = df['low'].rolling(BREAKOUT_WINDOW).min()
df['breakout_strength'] = df['close'] - df['recent_high']
df['dist_from_sma'] = df['close'] - df['sma_trend']

# === Breakout-Aware Label Generation ===
def generate_breakout_labels(df, sl_pct=1.0, tp_pct=2.0, lookahead=30, breakout_window=20, min_breakout_ratio=0.01):
    """
    Generates labels for breakout strategy:
    - Only label trades where a breakout occurred (close > recent high + threshold)
    - Label 1 if TP is hit first
    - Label 0 if SL is hit first
    - np.nan if neither
    """
    labels = pd.Series([np.nan] * len(df), index=df.index)

    for i in range(lookahead, len(df) - lookahead):
        recent_high = df['high'].iloc[i - breakout_window:i].max()
        current_close = df['close'].iloc[i]

        if current_close < recent_high * (1 + min_breakout_ratio):
            continue  # Not a valid breakout

        entry_price = current_close
        tp_price = entry_price * (1 + tp_pct / 100)
        sl_price = entry_price * (1 - sl_pct / 100)

        for j in range(1, lookahead + 1):
            future_high = df['high'].iloc[i + j]
            future_low = df['low'].iloc[i + j]

            if future_high >= tp_price:
                labels.iloc[i] = 1
                break
            if future_low <= sl_price:
                labels.iloc[i] = 0
                break

    return labels

print("Labeling data...")
df['label'] = generate_breakout_labels(df, STOP_LOSS_PCT, TAKE_PROFIT_PCT, LOOKAHEAD_BARS, BREAKOUT_WINDOW, MIN_BREAKOUT_RATIO)

# === Prepare training data ===
features = [
    'rsi', 'macd_diff', 'vol_rolling', 'atr', 'atr_ratio', 'trend_num',
    'breakout_strength', 'dist_from_sma'
]

df = df.dropna(subset=features + ['label'])

X = df[features]
y = df['label']

print("\nLabel distribution before split:")
print(y.value_counts(dropna=False))

# === Train/test split ===
print("Splitting and training model...")
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, shuffle=False)

# === Balance training data BEFORE fitting ===
train_data = pd.concat([X_train, y_train], axis=1)
winners = train_data[train_data['label'] == 1.0]
losers = train_data[train_data['label'] == 0.0]

if len(winners) > 0:
    winners_upsampled = resample(winners, 
                                 replace=True,
                                 n_samples=len(losers),
                                 random_state=42)

    train_balanced = pd.concat([losers, winners_upsampled]).sample(frac=1, random_state=42)
    X_train = train_balanced[features]
    y_train = train_balanced['label']
else:
    print("⚠️ No winning trades to balance. Model will remain biased.")

# === Model training ===
model = RandomForestClassifier(n_estimators=200, max_depth=6, random_state=42)
model.fit(X_train, y_train)

# === Evaluation ===
y_pred = model.predict(X_test)
print("\n=== Classification Report ===")
print(classification_report(y_test, y_pred, zero_division=0))
print("\nConfusion Matrix:")
print(confusion_matrix(y_test, y_pred))

# === Output predictions for strategy use ===
df.loc[X_test.index, 'predicted_label'] = y_pred
df.loc[X_test.index, 'predicted_prob'] = model.predict_proba(X_test)[:, 1]

# Save predictions
df[['timestamp', 'close', 'label', 'predicted_label', 'predicted_prob']].to_csv("ml_predictions.csv", index=False)
print("\nSaved predictions to ml_predictions.csv")

Fetching historical data for MSFT...
Generating features...
Labeling data...

Label distribution before split:
label
0.0    198
1.0      8
Name: count, dtype: int64
Splitting and training model...

=== Classification Report ===
              precision    recall  f1-score   support

         0.0       0.98      1.00      0.99        61
         1.0       0.00      0.00      0.00         1

    accuracy                           0.98        62
   macro avg       0.49      0.50      0.50        62
weighted avg       0.97      0.98      0.98        62


Confusion Matrix:
[[61  0]
 [ 1  0]]

Saved predictions to ml_predictions.csv
