In [1]:
import asyncio
import nest_asyncio
nest_asyncio.apply()

import pandas as pd
from typing import List, Dict, Any, Optional, Tuple, Union

from lib.utils import get_polygon_root, get_nyse_calendar, get_93, get_polygon_key
from lib.fetcher import HttpRequestFetcher, BatchRequestExecutor

In [2]:
base_path = get_polygon_root()
api_key = get_polygon_key()
tickers = get_93()

## Get urls

In [4]:
def get_nyse_date_tups(start: str, end: str = 'today', time_detail=True):
    if end == 'today': end = pd.Timestamp.now().strftime('%Y-%m-%d') # get today! 
    assert pd.Timestamp(start) < pd.Timestamp(end), "start date must be before end date"

    nyse = get_nyse_calendar(start, end) # get nyse calendar

    decode_str = "%Y-%m-%d %H:%M:%S" if time_detail else "%Y-%m-%d" # decode str
    func = lambda x: pd.to_datetime(x, utc=True).tz_convert('America/New_York').strftime(decode_str) # convert to nyse tz
    tups = [(func(a), func(b)) for a, b in zip(nyse['market_open'], nyse['market_close'])] # get tups of open/close, formatted with func
    return tups

def make_urls(tickers: Union[List[str], str], tups: [Tuple[str, str]]):

    def _make_url(ticker, start, end, limit=1000, adjusted=True, api_key=get_polygon_key()):
        assert api_key is not None, "api_key must be provided"
        base_url = "https://api.polygon.io/v2/aggs/ticker/"
        adj = 'true' if adjusted else 'false'
        url = f"{base_url}{ticker}/range/1/minute/{start}/{end}?adjusted={adj}&sort=asc&limit={limit}&apiKey={api_key}"
        return url
    
    def _validate(tickers: Union[List[str], str]) -> List[str]:
        if isinstance(tickers, str): tickers = [tickers] # make sure tickers is a list
        assert len(tickers) > 0, "list of tickers must be non-empty" # make sure tickers is non-empty
        return tickers
    
    tickers = _validate(tickers)
    urls = [[_make_url(ticker, date1, date2) for date1, date2 in tups] for ticker in tickers]
    
    if len(tickers) == 1: urls = urls[0] # if only one ticker, flatten list to avoid nested list [[data]] -> [data]

    return urls

def estimate_time(urls, batch_size=1000, req_time=1):
    n_urls = sum([len(url) for url in urls])
    total_time_hrs = n_urls / batch_size * req_time/60
    print(f"Estimated time for {n_urls} requests @ {req_time}s per API call: {total_time_hrs:0.2f} hrs")

In [41]:
end = pd.Timestamp.now().strftime("%Y-%m-%d")
start = (pd.Timestamp.now() - pd.DateOffset(months=1)).strftime("%Y-%m-%d")
tickers_ = tickers[:2]
print(f"Getting data for {tickers_} from {start} to {end}")
tups = get_nyse_date_tups(start, end, time_detail=False)
print(len(tups))
urls = make_urls(tickers_, tups)
estimate_time(urls)

Getting data for ['AAPL', 'ACN'] from 2023-09-09 to 2023-10-09
21
Estimated time for 42 requests @ 1s per API call: 0.00 hrs


In [42]:
urls

[['https://api.polygon.io/v2/aggs/ticker/AAPL/range/1/minute/2023-09-11/2023-09-11?adjusted=true&sort=asc&limit=1000&apiKey=nBHpDZaNGDNb5EuRL3HNUGNU_Z2NnD9h',
  'https://api.polygon.io/v2/aggs/ticker/AAPL/range/1/minute/2023-09-12/2023-09-12?adjusted=true&sort=asc&limit=1000&apiKey=nBHpDZaNGDNb5EuRL3HNUGNU_Z2NnD9h',
  'https://api.polygon.io/v2/aggs/ticker/AAPL/range/1/minute/2023-09-13/2023-09-13?adjusted=true&sort=asc&limit=1000&apiKey=nBHpDZaNGDNb5EuRL3HNUGNU_Z2NnD9h',
  'https://api.polygon.io/v2/aggs/ticker/AAPL/range/1/minute/2023-09-14/2023-09-14?adjusted=true&sort=asc&limit=1000&apiKey=nBHpDZaNGDNb5EuRL3HNUGNU_Z2NnD9h',
  'https://api.polygon.io/v2/aggs/ticker/AAPL/range/1/minute/2023-09-15/2023-09-15?adjusted=true&sort=asc&limit=1000&apiKey=nBHpDZaNGDNb5EuRL3HNUGNU_Z2NnD9h',
  'https://api.polygon.io/v2/aggs/ticker/AAPL/range/1/minute/2023-09-18/2023-09-18?adjusted=true&sort=asc&limit=1000&apiKey=nBHpDZaNGDNb5EuRL3HNUGNU_Z2NnD9h',
  'https://api.polygon.io/v2/aggs/ticker/AAPL/

In [43]:
fetcher = HttpRequestFetcher(rps=10, detailed_logs=True)
executor = BatchRequestExecutor()

In [45]:
flattened_urls = [url for url_list in urls for url in url_list]

In [46]:
results = executor.execute(flattened_urls, fetcher)

Request!  0.00s
Request!  0.00s
Request!  0.00s
Request!  0.00s
Request!  0.00s
Request!  0.00s
Request!  0.00s
Request!  0.00s
Request!  0.00s
Request!  0.00s
Request!  0.10s
Request!  0.20s
Request!  0.31s
Request!  0.41s
Request!  0.51s
Request!  0.62s
Request!  0.72s
Request!  0.82s
Request!  0.99s
Request!  1.09s
Request!  1.20s
Request!  1.30s
Request!  1.30s
Request!  1.40s
Request!  1.50s
Request!  1.60s
Request!  1.71s
Request!  1.81s
Request!  1.91s
Request!  2.01s
Request!  2.12s
Request!  2.22s
Request!  2.32s
Request!  2.42s
Request!  2.53s
Request!  2.63s
Request!  2.73s
Request!  2.84s
Request!  2.94s
Request!  3.04s
Request!  3.14s
Request!  3.24s


## Parsing and Response Validation

In [36]:
from pydantic import BaseModel
from typing import List, Optional

class Result(BaseModel):
    c: float
    h: float
    l: float
    n: int
    o: float
    t: int
    v: int
    vw: float

class Snapshot(BaseModel):
    adjusted: bool
    next_url: Optional[str] = None
    queryCount: int
    request_id: str
    results: List[Result]
    resultsCount: int
    status: str
    ticker: str

In [48]:
validated = [Snapshot(**result) for result in results]

In [51]:
df = pd.DataFrame([result.model_dump() for snapshot in validated for result in snapshot.results])

df = pd.DataFrame([
    {
        **result.model_dump(),
        'ticker': snapshot.ticker
    }
    for snapshot in validated for result in snapshot.results])

In [52]:
df

Unnamed: 0,c,h,l,n,o,t,v,vw,ticker
0,179.59,179.95,179.35,146,179.35,1694419200000,6238,179.7899,AAPL
1,179.50,179.67,179.50,108,179.67,1694419260000,3353,179.5753,AAPL
2,179.41,179.41,179.41,91,179.41,1694419380000,2214,179.4657,AAPL
3,179.41,179.42,179.41,54,179.41,1694419440000,1141,179.4251,AAPL
4,179.42,179.42,179.42,44,179.42,1694419500000,1073,179.4440,AAPL
...,...,...,...,...,...,...,...,...,...
24277,312.11,312.18,311.93,501,311.93,1696881420000,23694,312.0726,ACN
24278,312.14,312.18,312.07,562,312.10,1696881480000,21002,312.1144,ACN
24279,312.06,312.17,311.93,825,312.11,1696881540000,52732,312.0513,ACN
24280,312.01,312.01,312.01,41,312.01,1696881600000,33114,312.0100,ACN
