In [54]:
import json
from datetime import datetime, timedelta
from zoneinfo import ZoneInfo

import alpaca
from alpaca.trading.client import TradingClient
from alpaca.data.timeframe import TimeFrame, TimeFrameUnit
from alpaca.data.historical.stock import StockHistoricalDataClient
from alpaca.trading.stream import TradingStream
from alpaca.data.live.stock import StockDataStream

from alpaca.data.requests import (
    StockBarsRequest,
    StockTradesRequest,
    StockQuotesRequest
)
from alpaca.trading.requests import (
    GetAssetsRequest, 
    MarketOrderRequest, 
    LimitOrderRequest, 
    StopOrderRequest, 
    StopLimitOrderRequest, 
    TakeProfitRequest, 
    StopLossRequest, 
    TrailingStopOrderRequest, 
    GetOrdersRequest, 
    ClosePositionRequest
)
from alpaca.trading.enums import ( 
    AssetStatus, 
    AssetExchange, 
    OrderSide, 
    OrderType, 
    TimeInForce, 
    OrderClass, 
    QueryOrderStatus
)
from alpaca.common.exceptions import APIError

from sklearn.preprocessing import StandardScaler

In [71]:
api_key = ""
secret_key = ""

paper = True 

# Below are the variables for development this documents
# Please do not change these variables
trade_api_url = None
trade_api_wss = None
data_api_url = None
stream_data_wss = None

# Fetching Market Data

In [44]:
# setup stock historical data client
stock_historical_data_client = StockHistoricalDataClient(api_key, secret_key, url_override = data_api_url)

In [45]:
symbols = ['META', 'AAPL', 'MSFT', 'AMZN', 'GOOG', 'TSLA']
now = datetime.now(ZoneInfo("America/New_York"))
start_date = datetime(2022, 9, 1)  # Starting from September 1, 2022
end_date = datetime(2023, 3, 1)    # Ending on September 1, 2023

In [60]:
market_data_dict = {}
for symbol in symbols:
    print(f"Fetching {symbol} data...")
    req = StockBarsRequest(
        symbol_or_symbols = [symbol],
        timeframe=TimeFrame(amount = 5, unit = TimeFrameUnit.Minute), 
        start = start_date,     
        end=end_date,                                                        
    )
    market_data_dict[symbol] = stock_historical_data_client.get_stock_bars(req).df.loc[:,["close","volume"]]

Fetching META data...
Fetching AAPL data...
Fetching MSFT data...
Fetching AMZN data...
Fetching GOOG data...
Fetching TSLA data...


In [64]:
market_data_dict['GOOG']

Unnamed: 0_level_0,Unnamed: 1_level_0,close,volume,bb,sma
symbol,timestamp,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
GOOG,2022-09-01 08:00:00+00:00,108.40,4977.0,,
GOOG,2022-09-01 08:05:00+00:00,108.62,703.0,,
GOOG,2022-09-01 08:10:00+00:00,108.41,672.0,,
GOOG,2022-09-01 08:15:00+00:00,108.53,652.0,,
GOOG,2022-09-01 08:20:00+00:00,108.50,2147.0,,
GOOG,...,...,...,...,...
GOOG,2023-02-28 23:20:00+00:00,90.02,302.0,-0.350084,-0.000588
GOOG,2023-02-28 23:40:00+00:00,90.02,401.0,-0.333014,-0.000566
GOOG,2023-02-28 23:45:00+00:00,90.07,295.0,0.006611,0.000011
GOOG,2023-02-28 23:50:00+00:00,89.94,5694.0,-0.756311,-0.001366


In [65]:
market_data_dict['GOOG']['sma'].max()

np.float64(0.038333017916611256)

# Technical Indicator Functions

In [68]:
def calculate_bollinger_bands(data, window=20, num_of_std=2):
    """Calculate Bollinger Bands ratio wrt current price"""
    rolling_mean = data.rolling(window=window).mean()
    rolling_std = data.rolling(window=window).std()
    bb_ratio = (data - rolling_mean) / (rolling_std * num_of_std)
    return bb_ratio


In [67]:
def calculate_rsi(data, window=20):
    """Calculate Relative Strength Index"""
    delta = data.diff()
    gain = delta.clip(lower=0)
    loss = -delta.clip(upper=0)
    avg_gain = gain.rolling(window=window, min_periods=1).mean()
    avg_loss = loss.rolling(window=window, min_periods=1).mean()
    rs = avg_gain / avg_loss
    rsi = 100 - (100 / (1 + rs))
    return rsi

In [62]:
def calculate_sma(data, window=20):
    """Calculate SMA ratio of current price."""
    rolling_mean = data.rolling(window=window).mean()
    sma = (data / rolling_mean) - 1
    return sma

In [79]:
def calculate_obv(data):
    # Initialize OBV series with the same index as the DataFrame
    obv = [0]
    
    # Loop through each row in the DataFrame
    for i in range(1, len(data)):
        if data['close'].iloc[i] > data['close'].iloc[i - 1]:
            # Price went up, add the volume
            obv.append(obv[-1] + data['volume'].iloc[i])
        elif data['close'].iloc[i] < data['close'].iloc[i - 1]:
            # Price went down, subtract the volume
            obv.append(obv[-1] - data['volume'].iloc[i])
        else:
            # Price stayed the same, OBV remains unchanged
            obv.append(obv[-1]) 
    return obv


# Add TI Features

In [80]:
for symbol in symbols:
    # Calc new indicators
    market_data_dict[symbol]['bb'] = calculate_bollinger_bands(market_data_dict[symbol]['close'])
    market_data_dict[symbol]['sma'] = calculate_sma(market_data_dict[symbol]['close'])
    market_data_dict[symbol]['rsi'] = calculate_rsi(market_data_dict[symbol]['close'])
    market_data_dict[symbol]['obv'] = calculate_obv(market_data_dict[symbol])
    # ADD EMA 
    # TODO: Remove NaN rows
    # TODO: Generate Labels
    # TODO: Stack all dataframes
    # TODO: Normalize

In [81]:
market_data_dict['GOOG']

Unnamed: 0_level_0,Unnamed: 1_level_0,close,volume,bb,sma,rsi,obv
symbol,timestamp,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
GOOG,2022-09-01 08:00:00+00:00,108.40,4977.0,,,,0.0
GOOG,2022-09-01 08:05:00+00:00,108.62,703.0,,,100.000000,703.0
GOOG,2022-09-01 08:10:00+00:00,108.41,672.0,,,51.162791,31.0
GOOG,2022-09-01 08:15:00+00:00,108.53,652.0,,,61.818182,683.0
GOOG,2022-09-01 08:20:00+00:00,108.50,2147.0,,,58.620690,-1464.0
GOOG,...,...,...,...,...,...,...
GOOG,2023-02-28 23:20:00+00:00,90.02,302.0,-0.350084,-0.000588,49.206349,-10240484.0
GOOG,2023-02-28 23:40:00+00:00,90.02,401.0,-0.333014,-0.000566,48.391259,-10240484.0
GOOG,2023-02-28 23:45:00+00:00,90.07,295.0,0.006611,0.000011,48.387097,-10240189.0
GOOG,2023-02-28 23:50:00+00:00,89.94,5694.0,-0.756311,-0.001366,45.454545,-10245883.0


In [82]:
market_data_dict['TSLA']

Unnamed: 0_level_0,Unnamed: 1_level_0,close,volume,bb,sma,rsi,obv
symbol,timestamp,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
TSLA,2022-09-01 08:00:00+00:00,270.5400,36561.0,,,,0.0
TSLA,2022-09-01 08:05:00+00:00,270.9000,10580.0,,,100.000000,10580.0
TSLA,2022-09-01 08:10:00+00:00,270.8500,8035.0,,,87.804878,2545.0
TSLA,2022-09-01 08:15:00+00:00,270.8000,8216.0,,,78.260870,-5671.0
TSLA,2022-09-01 08:20:00+00:00,270.6600,10561.0,,,60.000000,-16232.0
TSLA,...,...,...,...,...,...,...
TSLA,2023-02-28 23:40:00+00:00,205.3001,17423.0,-0.641543,-0.000885,42.725011,170190623.0
TSLA,2023-02-28 23:45:00+00:00,205.2500,21761.0,-0.727643,-0.001068,43.961353,170168862.0
TSLA,2023-02-28 23:50:00+00:00,205.3500,17879.0,-0.366146,-0.000545,46.543779,170186741.0
TSLA,2023-02-28 23:55:00+00:00,205.3600,18899.0,-0.298774,-0.000441,44.497608,170205640.0


In [74]:
market_data_dict['META'].describe()

Unnamed: 0,close,volume,bb,sma,rsi
count,20788.0,20788.0,20769.0,20769.0,20786.0
mean,135.203584,242595.0,0.005751,4.6e-05,50.032532
std,24.815798,562468.4,0.650518,0.006762,14.249304
min,88.3,100.0,-2.113733,-0.097785,2.631579
25%,116.465,2234.25,-0.496115,-0.002225,40.440438
50%,132.99595,41456.5,0.009261,3e-05,50.10002
75%,148.01,291926.2,0.506198,0.00237,59.913657
max,197.15,23522030.0,2.122992,0.155642,100.0
