# Implimeting ML model into trade strategy

In [34]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from ta import momentum, volatility, trend
from sklearn.metrics import classification_report, confusion_matrix

In [36]:
import asyncio
import os
import pandas as pd
import numpy as np
from dotenv import load_dotenv
from alpaca.data.live import StockDataStream
from alpaca.trading.client import TradingClient
from alpaca.trading.requests import MarketOrderRequest
from alpaca.trading.enums import OrderSide, TimeInForce
from alpaca.data.enums import DataFeed
from alpaca.data.timeframe import TimeFrame
import matplotlib.pyplot as plt
from datetime import datetime
from alpaca.data.historical import StockHistoricalDataClient
from alpaca.data.requests import StockBarsRequest

# Load environment variables
load_dotenv()  # Load .env file

API_KEY = os.getenv("ALPACA_API_KEY")
API_SECRET = os.getenv("ALPACA_SECRET_KEY")

# Initialize TradingClient
Trading_Client = TradingClient(API_KEY, API_SECRET, paper=True)  # paper=True for paper trading
data_client = StockHistoricalDataClient(API_KEY, API_SECRET)
SYMBOL = "MSFT"

In [38]:
# === Parameters ===
STOP_LOSS_PCT = 2.1
TAKE_PROFIT_PCT = 3.5
LOOKAHEAD_BARS = 30

# === Load historical data ===
client = StockHistoricalDataClient(API_KEY, API_SECRET)
print(f"Fetching historical data for {SYMBOL}...")

request = StockBarsRequest(
    symbol_or_symbols=[SYMBOL],
    timeframe=TimeFrame.Minute,
    start=datetime(2024, 1, 1),
    end=datetime(2024, 6, 30),
    adjustment='all'
)
bars = client.get_stock_bars(request).df

# === Clean dataframe ===
if isinstance(bars.index, pd.MultiIndex):
    df = bars.xs(SYMBOL, level='symbol').reset_index()
else:
    df = bars.reset_index()

df['timestamp'] = pd.to_datetime(df['timestamp'])
df['date'] = df['timestamp'].dt.date

# === Feature engineering ===
print("Generating features...")
df['rsi'] = momentum.RSIIndicator(df['close'], window=14).rsi()
df['atr'] = volatility.AverageTrueRange(df['high'], df['low'], df['close'], window=14).average_true_range()
df['macd_diff'] = trend.MACD(df['close']).macd_diff()
df['vol_rolling'] = df['volume'].rolling(30).mean()
df['atr_ratio'] = df['atr'] / df['close']

# Create a dummy trend column
df['daily_close'] = df.groupby('date')['close'].transform('last')
df['sma_trend'] = df['daily_close'].rolling(20).mean()
df['daily_trend'] = np.where(df['daily_close'] > df['sma_trend'], 'up', 'down')
df['trend_num'] = df['daily_trend'].map({'up': 1, 'down': -1})

# === Label generation ===
def generate_labels(df, sl_pct=2.1, tp_pct=3.5, lookahead=30):
    labels = [np.nan] * len(df)
    for i in range(len(df) - lookahead):
        entry = df.iloc[i]['close']
        future_high = df.iloc[i+1:i+1+lookahead]['high'].max()
        future_low = df.iloc[i+1:i+1+lookahead]['low'].min()

        tp_price = entry * (1 + tp_pct / 100)
        sl_price = entry * (1 - sl_pct / 100)

        if future_high >= tp_price:
            labels[i] = 1  # profitable trade
        elif future_low <= sl_price:
            labels[i] = 0  # loss trade
        else:
            labels[i] = 0  # flat trade

    return pd.Series(labels, index=df.index)

print("Labeling data...")
df['label'] = generate_labels(df, STOP_LOSS_PCT, TAKE_PROFIT_PCT, LOOKAHEAD_BARS)

# === Prepare training data ===
features = ['rsi', 'macd_diff', 'vol_rolling', 'atr', 'atr_ratio', 'trend_num']
df = df.dropna(subset=features + ['label'])
X = df[features]
y = df['label']

# === Train/test split ===
print("Splitting and training model...")
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, shuffle=False)

# === Model training ===
model = RandomForestClassifier(n_estimators=200, max_depth=6, random_state=42)
model.fit(X_train, y_train)

# === Evaluation ===
y_pred = model.predict(X_test)
print("\n=== Classification Report ===")
print(classification_report(y_test, y_pred))
print("\nConfusion Matrix:")
print(confusion_matrix(y_test, y_pred))

# === Output predictions for strategy use ===
df.loc[X_test.index, 'predicted_label'] = y_pred
df.loc[X_test.index, 'predicted_prob'] = model.predict_proba(X_test)[:, 1]

# Save to file (optional)
df[['timestamp', 'close', 'label', 'predicted_label', 'predicted_prob']].to_csv("ml_predictions.csv", index=False)
print("\nSaved predictions to ml_predictions.csv")

Fetching historical data for MSFT...
Generating features...
Labeling data...
Splitting and training model...

=== Classification Report ===
              precision    recall  f1-score   support

         0.0       1.00      1.00      1.00     23040

    accuracy                           1.00     23040
   macro avg       1.00      1.00      1.00     23040
weighted avg       1.00      1.00      1.00     23040


Confusion Matrix:
[[23040]]





Saved predictions to ml_predictions.csv
