In [158]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import os
import pendulum

In [159]:
from alpaca.data.historical.stock import StockHistoricalDataClient
from alpaca.data.requests import StockBarsRequest 
from alpaca.data.timeframe import TimeFrame
from alpaca.trading.client import TradingClient
from alpaca.trading.requests import GetAssetsRequest
from alpaca.trading.enums import AssetStatus
from alpaca.trading.enums import AssetClass
from alpaca.data.enums import Adjustment
from alpaca.data.timeframe import TimeFrameUnit

In [160]:
# Environment Variables
api_key = os.getenv("ALPACA_API_KEY")
secret_key = os.getenv("ALPACA_SECRET_KEY")

# Clients
data_client = StockHistoricalDataClient(api_key,secret_key)
trading_client = TradingClient(api_key,secret_key)

In [161]:
# asset_request = GetAssetsRequest(
#     asset_class = AssetClass.US_EQUITY,
#     status=AssetStatus.ACTIVE,
# )

# all_assets = trading_client.get_all_assets(asset_request)

# all_assets = [vars(asset) for asset in all_assets]

# df = pd.DataFrame(data=all_assets)

# df = df[df['tradable'] & df['shortable'] & df['fractionable']]

# data_path = "assets.csv"

# df.to_csv(data_path,index=False)

In [162]:
# # Parameters
# end = pendulum.datetime(2023,12,31)
# start = pendulum.datetime(2020,12,31)

# symbols = pd.read_csv('assets.csv')
# symbols = symbols['symbol'].to_list()

# bars_request = StockBarsRequest(
#     symbol_or_symbols=symbols,
#     timeframe=TimeFrame(1,TimeFrameUnit.Day),
#     start=start,
#     end=end,
#     adjustment=Adjustment.ALL,
# )

# bars = data_client.get_stock_bars(bars_request)

# df = bars.df

# df = df.reset_index()


# data_path = "data.csv"

# df.to_csv(data_path,index=False)

# df

In [163]:
df = pd.read_csv('data.csv')

df

Unnamed: 0,symbol,timestamp,open,high,low,close,volume,trade_count,vwap
0,AAAU,2020-12-31 05:00:00+00:00,18.90,18.9400,18.8300,18.94,283545.0,455.0,18.880037
1,AAAU,2021-01-04 05:00:00+00:00,19.33,19.3600,19.2243,19.35,772807.0,1191.0,19.313978
2,AAAU,2021-01-05 05:00:00+00:00,19.42,19.4400,19.3110,19.41,626939.0,768.0,19.389445
3,AAAU,2021-01-06 05:00:00+00:00,19.27,19.2734,18.9310,19.10,1427801.0,2218.0,19.108362
4,AAAU,2021-01-07 05:00:00+00:00,19.07,19.1000,18.9910,19.05,334353.0,631.0,19.060624
...,...,...,...,...,...,...,...,...,...
2712021,Z,2023-12-22 05:00:00+00:00,59.04,59.2900,58.4000,58.79,2631384.0,31006.0,58.832141
2712022,Z,2023-12-26 05:00:00+00:00,59.10,59.1000,58.3100,58.68,2753810.0,29849.0,58.656603
2712023,Z,2023-12-27 05:00:00+00:00,59.00,59.6843,58.5700,59.47,2038048.0,30214.0,59.364864
2712024,Z,2023-12-28 05:00:00+00:00,59.10,59.2300,58.4200,58.85,2872784.0,37481.0,58.837350


In [164]:
# Cleaning

df['caldt'] = pd.to_datetime(df['timestamp']).dt.strftime("%Y-%m-%d")
df['mdt'] = pd.to_datetime(df['timestamp']).dt.strftime("%Y-%m")

df = df.query("'2021-01-01' <= caldt <= '2023-12-31'")

df = df[['symbol','caldt','mdt','close']].copy()

df['ret'] = df.groupby('symbol')['close'].pct_change()

df

Unnamed: 0,symbol,caldt,mdt,close,ret
1,AAAU,2021-01-04,2021-01,19.35,
2,AAAU,2021-01-05,2021-01,19.41,0.003101
3,AAAU,2021-01-06,2021-01,19.10,-0.015971
4,AAAU,2021-01-07,2021-01,19.05,-0.002618
5,AAAU,2021-01-08,2021-01,18.41,-0.033596
...,...,...,...,...,...
2712021,Z,2023-12-22,2023-12,58.79,0.000681
2712022,Z,2023-12-26,2023-12,58.68,-0.001871
2712023,Z,2023-12-27,2023-12,59.47,0.013463
2712024,Z,2023-12-28,2023-12,58.85,-0.010425


In [165]:
# Features

df['logret'] = np.log(1+df['ret'])

df['mom'] = df.groupby('symbol')['logret'].rolling(11,11).sum().reset_index(drop=True)

df

Unnamed: 0,symbol,caldt,mdt,close,ret,logret,mom
1,AAAU,2021-01-04,2021-01,19.35,,,
2,AAAU,2021-01-05,2021-01,19.41,0.003101,0.003096,
3,AAAU,2021-01-06,2021-01,19.10,-0.015971,-0.016100,
4,AAAU,2021-01-07,2021-01,19.05,-0.002618,-0.002621,
5,AAAU,2021-01-08,2021-01,18.41,-0.033596,-0.034173,
...,...,...,...,...,...,...,...
2712021,Z,2023-12-22,2023-12,58.79,0.000681,0.000681,
2712022,Z,2023-12-26,2023-12,58.68,-0.001871,-0.001873,
2712023,Z,2023-12-27,2023-12,59.47,0.013463,0.013373,
2712024,Z,2023-12-28,2023-12,58.85,-0.010425,-0.010480,


In [166]:
df['bin'] = df.groupby('mdt')['mom'].transform(lambda x: pd.qcut(x, 2, labels=False))

df

Unnamed: 0,symbol,caldt,mdt,close,ret,logret,mom,bin
1,AAAU,2021-01-04,2021-01,19.35,,,,
2,AAAU,2021-01-05,2021-01,19.41,0.003101,0.003096,,
3,AAAU,2021-01-06,2021-01,19.10,-0.015971,-0.016100,,
4,AAAU,2021-01-07,2021-01,19.05,-0.002618,-0.002621,,
5,AAAU,2021-01-08,2021-01,18.41,-0.033596,-0.034173,,
...,...,...,...,...,...,...,...,...
2712021,Z,2023-12-22,2023-12,58.79,0.000681,0.000681,,
2712022,Z,2023-12-26,2023-12,58.68,-0.001871,-0.001873,,
2712023,Z,2023-12-27,2023-12,59.47,0.013463,0.013373,,
2712024,Z,2023-12-28,2023-12,58.85,-0.010425,-0.010480,,


In [167]:
df[df['symbol'] == 'Z']

Unnamed: 0,symbol,caldt,mdt,close,ret,logret,mom,bin
2711273,Z,2021-01-04,2021-01,130.770,,,,
2711274,Z,2021-01-05,2021-01,135.645,0.037279,0.036601,,
2711275,Z,2021-01-06,2021-01,134.680,-0.007114,-0.007140,,
2711276,Z,2021-01-07,2021-01,141.990,0.054277,0.052855,,
2711277,Z,2021-01-08,2021-01,144.610,0.018452,0.018284,,
...,...,...,...,...,...,...,...,...
2712021,Z,2023-12-22,2023-12,58.790,0.000681,0.000681,,
2712022,Z,2023-12-26,2023-12,58.680,-0.001871,-0.001873,,
2712023,Z,2023-12-27,2023-12,59.470,0.013463,0.013373,,
2712024,Z,2023-12-28,2023-12,58.850,-0.010425,-0.010480,,
