In [1]:
!pip install alpaca-trade-api pandas numpy scikit-learn matplotlib

Collecting alpaca-trade-api
  Downloading alpaca_trade_api-3.2.0-py3-none-any.whl.metadata (29 kB)
Collecting urllib3<2,>1.24 (from alpaca-trade-api)
  Downloading urllib3-1.26.20-py2.py3-none-any.whl.metadata (50 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m50.1/50.1 kB[0m [31m1.9 MB/s[0m eta [36m0:00:00[0m
Collecting websockets<11,>=9.0 (from alpaca-trade-api)
  Downloading websockets-10.4-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (6.4 kB)
Collecting msgpack==1.0.3 (from alpaca-trade-api)
  Downloading msgpack-1.0.3.tar.gz (123 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m123.8/123.8 kB[0m [31m3.9 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting PyYAML==6.0.1 (from alpaca-trade-api)
  Downloading PyYAML-6.0.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (2.1 kB)
Collecting deprecation==2.1.0 (fr

In [None]:
# Stock Market ML Model with Alpaca API
# This notebook demonstrates how to build a model that analyzes real-time market data

# Import necessary libraries
import alpaca_trade_api as tradeapi
import pandas as pd
import numpy as np
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report
import matplotlib.pyplot as plt
import time
from datetime import datetime, timedelta

# Set API credentials (enter your keys here)
API_KEY = "##"
SECRET_KEY = "##"
BASE_URL = "https://paper-api.alpaca.markets"  # Use paper trading URL for testing

# Initialize Alpaca API
api = tradeapi.REST(API_KEY, SECRET_KEY, base_url=BASE_URL, api_version='v2')

# Function to get historical stock data and calculate technical indicators
def get_historical_data(symbols, timeframe='1D', start_date=None, end_date=None, limit=1000):
    """
    Fetch historical data and calculate technical indicators

    Parameters:
    - symbols: List of stock symbols
    - timeframe: Time interval for bars (1D, 1H, 15Min, etc.)
    - start_date: Start date for historical data
    - end_date: End date for historical data
    - limit: Maximum number of bars to retrieve

    Returns:
    - Dictionary of DataFrames with technical indicators for each symbol
    """
    # Set default dates if not provided
    if end_date is None:
        end_date = datetime.now()
    if start_date is None:
        start_date = end_date - timedelta(days=365)  # 1 year of data

    print(f"Fetching data from {start_date} to {end_date}")

    data_dict = {}

    for symbol in symbols:
        print(f"Processing {symbol}...")

        # Get historical bars
        bars = api.get_bars(symbol, timeframe, start=start_date.isoformat(),
                           end=end_date.isoformat(), limit=limit).df

        # Skip if no data
        if len(bars) == 0:
            print(f"No data available for {symbol}")
            continue

        # Basic features
        bars['return'] = bars['close'].pct_change()
        bars['range'] = bars['high'] - bars['low']
        bars['daily_volatility'] = bars['return'].rolling(window=20).std()

        # Moving averages
        for window in [5, 10, 20, 50, 200]:
            bars[f'ma_{window}'] = bars['close'].rolling(window=window).mean()

        # MACD
        bars['ema_12'] = bars['close'].ewm(span=12, adjust=False).mean()
        bars['ema_26'] = bars['close'].ewm(span=26, adjust=False).mean()
        bars['macd'] = bars['ema_12'] - bars['ema_26']
        bars['macd_signal'] = bars['macd'].ewm(span=9, adjust=False).mean()
        bars['macd_hist'] = bars['macd'] - bars['macd_signal']

        # RSI (Relative Strength Index)
        delta = bars['close'].diff()
        gain = (delta.where(delta > 0, 0)).rolling(window=14).mean()
        loss = (-delta.where(delta < 0, 0)).rolling(window=14).mean()
        rs = gain / loss
        bars['rsi'] = 100 - (100 / (1 + rs))

        # Target variable (next day's movement)
        # 1 if price goes up, 0 if price goes down
        bars['target'] = np.where(bars['close'].shift(-1) > bars['close'], 1, 0)

        # Store in dictionary
        data_dict[symbol] = bars.dropna()

    return data_dict

# Function to train ML model
def train_model(data, features, test_size=0.2, random_state=42):
    """
    Train a machine learning model on the prepared data

    Parameters:
    - data: DataFrame with features and target
    - features: List of feature column names
    - test_size: Proportion of data to use for testing
    - random_state: Random seed for reproducibility

    Returns:
    - Trained model and test data
    """
    # Prepare features and target
    X = data[features]
    y = data['target']

    # Split data
    X_train, X_test, y_train, y_test = train_test_split(
        X, y, test_size=test_size, random_state=random_state, shuffle=False
    )

    # Initialize and train model
    model = RandomForestClassifier(
        n_estimators=100,
        max_depth=5,
        random_state=random_state
    )

    model.fit(X_train, y_train)

    # Evaluate model
    y_pred = model.predict(X_test)
    accuracy = accuracy_score(y_test, y_pred)
    print(f"Model accuracy: {accuracy:.4f}")
    print("\nClassification Report:")
    print(classification_report(y_test, y_pred))

    # Feature importance
    feature_importance = pd.DataFrame({
        'Feature': features,
        'Importance': model.feature_importances_
    }).sort_values('Importance', ascending=False)

    print("\nFeature Importance:")
    print(feature_importance)

    return model, X_test, y_test

# Function to make predictions on latest data
def predict_market_direction(model, symbols, features, timeframe='1D'):
    """
    Make predictions for the next trading day for multiple symbols

    Parameters:
    - model: Trained machine learning model
    - symbols: List of stock symbols
    - features: List of features used by the model
    - timeframe: Time interval for bars

    Returns:
    - DataFrame with predictions and confidence scores
    """
    predictions = []

    for symbol in symbols:
        # Get latest data for prediction
        end_date = datetime.now()
        start_date = end_date - timedelta(days=60)  # Get enough data for indicators

        latest_data = get_historical_data(
            [symbol], timeframe=timeframe,
            start_date=start_date, end_date=end_date
        )[symbol]

        # Get the latest set of features
        latest_features = latest_data[features].iloc[-1].values.reshape(1, -1)

        # Make prediction
        prediction = model.predict(latest_features)[0]
        probability = model.predict_proba(latest_features)[0][1]

        predictions.append({
            'Symbol': symbol,
            'Prediction': 'UP' if prediction == 1 else 'DOWN',
            'Confidence': probability,
            'Current_Price': latest_data['close'].iloc[-1],
            'Timestamp': datetime.now()
        })

    return pd.DataFrame(predictions)

# Run an example
if __name__ == "__main__":
    # Define parameters
    symbols = ['AAPL', 'MSFT', 'AMZN', 'GOOGL', 'TSLA']
    timeframe = '1D'

    # Get historical data
    data_dict = get_historical_data(symbols, timeframe=timeframe)

    # Let's use Apple for model training
    symbol_to_train = 'AAPL'
    train_data = data_dict[symbol_to_train]

    # Define features to use
    features = [
        'return', 'range', 'daily_volatility',
        'ma_5', 'ma_10', 'ma_20', 'ma_50', 'ma_200',
        'macd', 'macd_hist', 'rsi'
    ]

    # Train model
    model, X_test, y_test = train_model(train_data, features)

    # Make predictions for all symbols
    predictions = predict_market_direction(model, symbols, features)

    print("\nNext day predictions:")
    print(predictions)

    # Visualization of actual vs predicted for test data
    y_pred = model.predict(X_test)

    # Plot closing prices with buy/sell signals
    plt.figure(figsize=(12, 6))
    plt.plot(train_data.iloc[-len(y_test):].index, train_data['close'].iloc[-len(y_test):], label='Close Price')

    # Add buy signals (predicted 1 and actual 1)
    buy_signals = (y_pred == 1) & (y_test == 1)
    plt.scatter(
        train_data.iloc[-len(y_test):].index[buy_signals],
        train_data['close'].iloc[-len(y_test):][buy_signals],
        color='green', marker='^', s=100, label='Buy Signal (Correct)'
    )

    # Add sell signals (predicted 0 and actual 0)
    sell_signals = (y_pred == 0) & (y_test == 0)
    plt.scatter(
        train_data.iloc[-len(y_test):].index[sell_signals],
        train_data['close'].iloc[-len(y_test):][sell_signals],
        color='red', marker='v', s=100, label='Sell Signal (Correct)'
    )

    # Add incorrect predictions
    incorrect = y_pred != y_test
    plt.scatter(
        train_data.iloc[-len(y_test):].index[incorrect],
        train_data['close'].iloc[-len(y_test):][incorrect],
        color='orange', marker='x', s=100, label='Incorrect Prediction'
    )

    plt.title(f'Model Predictions for {symbol_to_train}')
    plt.xlabel('Date')
    plt.ylabel('Price')
    plt.legend()
    plt.xticks(rotation=45)
    plt.tight_layout()
    plt.show()