In [54]:
import json
from datetime import datetime, timedelta
from zoneinfo import ZoneInfo

import alpaca
from alpaca.trading.client import TradingClient
from alpaca.data.timeframe import TimeFrame, TimeFrameUnit
from alpaca.data.historical.stock import StockHistoricalDataClient
from alpaca.trading.stream import TradingStream
from alpaca.data.live.stock import StockDataStream

from alpaca.data.requests import (
    StockBarsRequest,
    StockTradesRequest,
    StockQuotesRequest
)
from alpaca.trading.requests import (
    GetAssetsRequest, 
    MarketOrderRequest, 
    LimitOrderRequest, 
    StopOrderRequest, 
    StopLimitOrderRequest, 
    TakeProfitRequest, 
    StopLossRequest, 
    TrailingStopOrderRequest, 
    GetOrdersRequest, 
    ClosePositionRequest
)
from alpaca.trading.enums import ( 
    AssetStatus, 
    AssetExchange, 
    OrderSide, 
    OrderType, 
    TimeInForce, 
    OrderClass, 
    QueryOrderStatus
)
from alpaca.common.exceptions import APIError

from sklearn.preprocessing import StandardScaler
import pandas as pd

In [127]:
api_key = ""
secret_key = ""

paper = True 

# Below are the variables for development this documents
# Please do not change these variables
trade_api_url = None
trade_api_wss = None
data_api_url = None
stream_data_wss = None

# Fetching Market Data

In [128]:
# setup stock historical data client
stock_historical_data_client = StockHistoricalDataClient(api_key, secret_key, url_override = data_api_url)

In [129]:
symbols = ['META', 'AAPL', 'MSFT', 'AMZN', 'GOOG', 'TSLA']
now = datetime.now(ZoneInfo("America/New_York"))
start_date = datetime(2022, 9, 1)  # Starting from September 1, 2022
end_date = datetime(2023, 3, 1)    # Ending on September 1, 2023

In [126]:
market_data_dict = {}
for symbol in symbols:
    print(f"Fetching {symbol} data...")
    req = StockBarsRequest(
        symbol_or_symbols = [symbol],
        timeframe=TimeFrame(amount = 5, unit = TimeFrameUnit.Minute), 
        start = start_date,     
        end=end_date,                                                        
    )
    market_data_dict[symbol] = stock_historical_data_client.get_stock_bars(req).df.loc[:,["close","volume"]]

Fetching META data...
Fetching AAPL data...
Fetching MSFT data...
Fetching AMZN data...
Fetching GOOG data...
Fetching TSLA data...


In [130]:
market_data_dict['GOOG']

Unnamed: 0_level_0,Unnamed: 1_level_0,close,volume
symbol,timestamp,Unnamed: 2_level_1,Unnamed: 3_level_1
GOOG,2022-09-01 08:00:00+00:00,108.40,4977.0
GOOG,2022-09-01 08:05:00+00:00,108.62,703.0
GOOG,2022-09-01 08:10:00+00:00,108.41,672.0
GOOG,2022-09-01 08:15:00+00:00,108.53,652.0
GOOG,2022-09-01 08:20:00+00:00,108.50,2147.0
GOOG,...,...,...
GOOG,2023-02-28 23:20:00+00:00,90.02,302.0
GOOG,2023-02-28 23:40:00+00:00,90.02,401.0
GOOG,2023-02-28 23:45:00+00:00,90.07,295.0
GOOG,2023-02-28 23:50:00+00:00,89.94,5694.0


# Technical Indicator Functions

In [131]:
def calculate_bollinger_bands(data, window=20, num_of_std=2):
    """Calculate Bollinger Bands ratio wrt current price"""
    rolling_mean = data.rolling(window=window).mean()
    rolling_std = data.rolling(window=window).std()
    bb_ratio = (data - rolling_mean) / (rolling_std * num_of_std)
    return bb_ratio


In [132]:
def calculate_rsi(data, window=20):
    """Calculate Relative Strength Index"""
    delta = data.diff()
    gain = delta.clip(lower=0)
    loss = -delta.clip(upper=0)
    avg_gain = gain.rolling(window=window, min_periods=1).mean()
    avg_loss = loss.rolling(window=window, min_periods=1).mean()
    rs = avg_gain / avg_loss
    rsi = 100 - (100 / (1 + rs))
    return rsi

In [133]:
def calculate_sma(data, window=20):
    """Calculate SMA ratio of current price."""
    rolling_mean = data.rolling(window=window).mean()
    sma = (data / rolling_mean) - 1
    return sma

In [134]:
def calculate_obv(data):
    # Initialize OBV series with the same index as the DataFrame
    obv = [0]
    
    # Loop through each row in the DataFrame
    for i in range(1, len(data)):
        if data['close'].iloc[i] > data['close'].iloc[i - 1]:
            # Price went up, add the volume
            obv.append(obv[-1] + data['volume'].iloc[i])
        elif data['close'].iloc[i] < data['close'].iloc[i - 1]:
            # Price went down, subtract the volume
            obv.append(obv[-1] - data['volume'].iloc[i])
        else:
            # Price stayed the same, OBV remains unchanged
            obv.append(obv[-1]) 
    return obv


In [135]:
def calculate_ema(data, window = 20):
    """Calculate EMA ratio of current price."""
    rolling_mean = data.ewm(span=window, adjust=False).mean()
    ema = (data / rolling_mean) - 1
    return ema

In [136]:
def calculate_macd(prices, short_window=12, long_window=26, signal_window=9):
    """
    Calculate the MACD line, Signal line, and MACD Histogram.
    """
    # Calculate the short and long EMAs
    short_ema = prices.ewm(span=short_window, adjust=False).mean()
    long_ema = prices.ewm(span=long_window, adjust=False).mean()
    
    # Calculate the MACD line
    macd_line = short_ema - long_ema
    
    # Calculate the Signal line
    signal_line = macd_line.ewm(span=signal_window, adjust=False).mean()
    
    # Calculate the MACD Histogram
    macd_histogram = macd_line - signal_line
    
    # Combine the results in a DataFrame
    macd_df = pd.DataFrame({
        'MACD': macd_line,
        'Signal': signal_line,
        'Histogram': macd_histogram
    })
    
    return macd_df
    

# Label Generation Functions

In [137]:
def bollinger_detection(bb, thresh = 1):
    """
    Assign labels based on bollinger band crossings.
    """
    # Initialize the result list with zeros
    crossings = [0] * len(bb)
    
    # Loop through each element, starting from the second element (index 1)
    for i in range(1, len(bb)):
        # Check for -1 crossing: previous value <= -1 and current value > -1
        if bb.iloc[i-1] <= -thresh and bb.iloc[i] > -thresh:
            crossings[i] = 1
        # Check for 1 crossing: previous value >= 1 and current value < 1
        elif bb.iloc[i-1] >= thresh and bb.iloc[i] < thresh:
            crossings[i] = 2
    
    return pd.Series(crossings, index=bb.index)

In [138]:
def rsi_detection(rsi, low_thresh = 30, high_thresh = 70):
    """
    Assign labels based on RSI crossings.
    """
    # Initialize the result list with zeros
    crossings = [0] * len(rsi)
    
    # Loop through each element, starting from the second element (index 1)
    for i in range(1, len(rsi)):
        # Check for low crossing: previous value <= low_thresh and current value > low_thresh
        if rsi.iloc[i-1] <= low_thresh and rsi.iloc[i] > low_thresh:
            crossings[i] = 1
        # Check for high crossing: previous value >= 1 and current value < 1
        elif rsi.iloc[i-1] >= high_thresh and rsi.iloc[i] < high_thresh:
            crossings[i] = 2
    
    return pd.Series(crossings, index=rsi.index)

In [139]:
def generate_labels(df):
    """
    Generate trading labels for algorithm. 0 = hold, 1 = buy, 2 = sell.
    """
    labels_bb = bollinger_detection(df["bb"])
    labels_rsi = rsi_detection(df["rsi"])
    
    labels_final = pd.Series(
        [val if val == labels_bb.iloc[i] else 0 for i, val in enumerate(labels_rsi)],
        index=labels_rsi.index
    )
    
    return labels_final
    

# Generating Feature Tables

In [140]:
for symbol in symbols:
    # Calc indicators / features
    market_data_dict[symbol]['bb'] = calculate_bollinger_bands(market_data_dict[symbol]['close'])
    market_data_dict[symbol]['sma'] = calculate_sma(market_data_dict[symbol]['close'])
    market_data_dict[symbol]['rsi'] = calculate_rsi(market_data_dict[symbol]['close'])
    market_data_dict[symbol]['obv'] = calculate_obv(market_data_dict[symbol])
    market_data_dict[symbol]['ema'] = calculate_ema(market_data_dict[symbol]['close'])
    macd_df = calculate_macd(market_data_dict[symbol]['close'])
    market_data_dict[symbol] = pd.concat([market_data_dict[symbol], macd_df], axis=1)
    # Generate Labels
    labels_df = market_data_dict[symbol].shift(periods=-2)
    market_data_dict[symbol]['label'] = generate_labels(labels_df)
    # Remove NaN rows
    market_data_dict[symbol].dropna(inplace=True)


In [142]:
# Stack all dataframes
training_df = pd.concat(market_data_dict.values(), ignore_index=True)


In [145]:
n_buy = (training_df['label'] == 1).sum()
n_sell = (training_df['label'] == 2).sum()
print(f"{n_buy} total buy labels") 
print(f"{n_sell} total sell labels")

431 total buy labels
456 total sell labels


In [146]:
training_df.describe()

Unnamed: 0,close,volume,bb,sma,rsi,obv,ema,MACD,Signal,Histogram,label
count,128362.0,128362.0,128362.0,128362.0,128362.0,128362.0,128362.0,128362.0,128362.0,128362.0,128362.0
mean,153.766798,388736.3,0.009011,-4.9e-05,50.213937,45922810.0,-5.8e-05,-0.006528,-0.00652,-8e-06,0.010463
std,57.864781,1158702.0,0.654897,0.005824,14.672793,136743300.0,0.004993,0.427451,0.403243,0.126833,0.132129
min,81.56,100.0,-2.12412,-0.169462,1.810865,-312133700.0,-0.161479,-5.899816,-4.91741,-2.541669,0.0
25%,102.11,4017.25,-0.494125,-0.002017,40.140845,-43384630.0,-0.001792,-0.154753,-0.150038,-0.042766,0.0
50%,139.24,62527.5,0.014101,3.8e-05,50.26178,24279230.0,5.2e-05,0.004764,0.004602,0.000508,0.0
75%,194.77,421427.2,0.513262,0.002128,60.462038,111663500.0,0.001835,0.154356,0.147407,0.044751,0.0
max,313.1282,114007600.0,2.122992,0.155642,97.254902,404622600.0,0.137252,8.038458,7.229975,3.19888,2.0


In [None]:
# Normalize

In [120]:
(market_data_dict['GOOG']['label'] == 2).sum()

np.int64(78)

In [141]:
market_data_dict['GOOG']

Unnamed: 0_level_0,Unnamed: 1_level_0,close,volume,bb,sma,rsi,obv,ema,MACD,Signal,Histogram,label
symbol,timestamp,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
GOOG,2022-09-01 10:30:00+00:00,108.49,517.0,0.572612,0.001713,52.036199,-5233.0,0.001764,-0.018940,-0.042885,0.023945,0
GOOG,2022-09-01 10:50:00+00:00,108.55,2854.0,0.700370,0.002197,53.303965,-2379.0,0.002097,0.003144,-0.033679,0.036823,0
GOOG,2022-09-01 10:55:00+00:00,108.88,9120.0,1.375872,0.005123,55.462185,6741.0,0.004651,0.046735,-0.017596,0.064331,0
GOOG,2022-09-01 11:00:00+00:00,109.00,8654.0,1.282179,0.005957,62.882096,15395.0,0.005207,0.089928,0.003909,0.086019,0
GOOG,2022-09-01 11:05:00+00:00,109.15,8061.0,1.246520,0.007054,63.362069,23456.0,0.005959,0.134709,0.030069,0.104641,0
GOOG,...,...,...,...,...,...,...,...,...,...,...,...
GOOG,2023-02-28 23:20:00+00:00,90.02,302.0,-0.350084,-0.000588,49.206349,-10240484.0,-0.001094,-0.081578,-0.115461,0.033883,0
GOOG,2023-02-28 23:40:00+00:00,90.02,401.0,-0.333014,-0.000566,48.391259,-10240484.0,-0.000990,-0.080448,-0.108459,0.028010,0
GOOG,2023-02-28 23:45:00+00:00,90.07,295.0,0.006611,0.000011,48.387097,-10240189.0,-0.000394,-0.074658,-0.101698,0.027041,0
GOOG,2023-02-28 23:50:00+00:00,89.94,5694.0,-0.756311,-0.001366,45.454545,-10245883.0,-0.001662,-0.079640,-0.097287,0.017646,0


In [82]:
market_data_dict['TSLA']

Unnamed: 0_level_0,Unnamed: 1_level_0,close,volume,bb,sma,rsi,obv
symbol,timestamp,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
TSLA,2022-09-01 08:00:00+00:00,270.5400,36561.0,,,,0.0
TSLA,2022-09-01 08:05:00+00:00,270.9000,10580.0,,,100.000000,10580.0
TSLA,2022-09-01 08:10:00+00:00,270.8500,8035.0,,,87.804878,2545.0
TSLA,2022-09-01 08:15:00+00:00,270.8000,8216.0,,,78.260870,-5671.0
TSLA,2022-09-01 08:20:00+00:00,270.6600,10561.0,,,60.000000,-16232.0
TSLA,...,...,...,...,...,...,...
TSLA,2023-02-28 23:40:00+00:00,205.3001,17423.0,-0.641543,-0.000885,42.725011,170190623.0
TSLA,2023-02-28 23:45:00+00:00,205.2500,21761.0,-0.727643,-0.001068,43.961353,170168862.0
TSLA,2023-02-28 23:50:00+00:00,205.3500,17879.0,-0.366146,-0.000545,46.543779,170186741.0
TSLA,2023-02-28 23:55:00+00:00,205.3600,18899.0,-0.298774,-0.000441,44.497608,170205640.0


In [74]:
market_data_dict['META'].describe()

Unnamed: 0,close,volume,bb,sma,rsi
count,20788.0,20788.0,20769.0,20769.0,20786.0
mean,135.203584,242595.0,0.005751,4.6e-05,50.032532
std,24.815798,562468.4,0.650518,0.006762,14.249304
min,88.3,100.0,-2.113733,-0.097785,2.631579
25%,116.465,2234.25,-0.496115,-0.002225,40.440438
50%,132.99595,41456.5,0.009261,3e-05,50.10002
75%,148.01,291926.2,0.506198,0.00237,59.913657
max,197.15,23522030.0,2.122992,0.155642,100.0
