In [None]:
from quantrion import settings

settings.DEFAULT_TIMEFRAME = "5min"

from quantrion.asset.alpaca import AlpacaUSStockListProvider, AlpacaUSStock
from quantrion.data.alpaca import _data_to_df, BAR_FIELDS_TO_NAMES
from quantrion.utils import retry_request

import httpx
import pandas as pd
import pytz

%load_ext autoreload
%autoreload 2

In [None]:
stocks = await AlpacaUSStockListProvider().list_assets()

In [None]:
end = stocks[0].localize(pd.Timestamp.utcnow()).floor("D")
start = end - pd.Timedelta(days=30)
good_stocks = []

for i, stock in enumerate(stocks, 1):
    async with httpx.AsyncClient() as client:
        url = f"https://data.alpaca.markets/v2/stocks/{stock.symbol}/bars"
        headers = {
            "APCA-API-KEY-ID": settings.ALPACA_API_KEY_ID,
            "APCA-API-SECRET-KEY": settings.ALPACA_API_KEY_SECRET,
        }
        start = start.astimezone(pytz.UTC)
        end = end.astimezone(pytz.UTC)
        params = {
            "timeframe": "1Day",
            "start": start.isoformat(),
            "adjustment": "all",
        }
        response = await retry_request(
            client, "get", url, params=params, headers=headers
        )
        response.raise_for_status()
        data = response.json().get("bars", []) or []
        if len(data) == 0:
            continue
    df = _data_to_df(data, BAR_FIELDS_TO_NAMES, stock)
    volume = df["volume"].fillna(0).mean()
    if volume < 1_000_000:
        continue
    print("%s. Interesting stock (%s, %s)" % (i, stock.symbol, volume))
    good_stocks.append((stock.symbol, volume))

good_stocks = pd.DataFrame(good_stocks, columns=["symbol", "volume"]).sort_values(
    "volume", ascending=False
)
print("Found %d interesting stocks" % len(good_stocks))

In [None]:
good_stocks.to_csv("files/good_stocks.csv", index=False)

In [None]:
import gc

market_open, market_close = pd.Timestamp("09:30").time(), pd.Timestamp("16:00").time()

for n, (_, row) in enumerate(good_stocks.iterrows(), 1):
    df = None
    stock = AlpacaUSStock(row["symbol"])
    for i in range(4, -1, -1):
        year = stock.localize(pd.Timestamp.utcnow()).year
        start = stock.localize(pd.Timestamp(year - i, 1, 1))
        end = stock.localize(pd.Timestamp(year - i + 1, 1, 1))
        new_df = await stock.bars.get(start, end)
        new_df = new_df.between_time(market_open, market_close).dropna()
        if df is None:
            df = new_df
        else:
            df = pd.concat([df, new_df])
    if n % 10 == 0:
        print("Processed %d stocks" % n)
    df.to_csv("files/stocks/%s.csv" % stock.symbol)
    del stock
    gc.collect()