In [2]:
import asyncio
import nest_asyncio
nest_asyncio.apply()

import pandas as pd
from typing import List, Dict, Any, Optional, Tuple, Union

from lib.utils import get_polygon_root, get_93, get_polygon_key, get_nyse_date_tups, estimate_time
from lib.fetcher import HttpRequestFetcher, BatchRequestExecutor
from lib.polygon import make_urls
from lib.models import Snapshot

In [3]:
base_path = get_polygon_root()
api_key = get_polygon_key()
tickers = get_93()

In [23]:
tickers_ = tickers[:3]
# start_date = '2018-10-11'
start_date = '2023-09-09'
end_date = '2023-10-09'
tups = get_nyse_date_tups(start_date, end_date, unix=True)

urls = make_urls(tickers_, tups, api_key)

In [10]:
urls[0]

'https://api.polygon.io/v2/aggs/ticker/FISV/range/1/minute/1694439000000/1694462400000?adjusted=true&sort=asc&limit=1000&apiKey=nBHpDZaNGDNb5EuRL3HNUGNU_Z2NnD9h'

In [5]:
estimate_time(len(urls), rps=1000, req_time=0.1)

Estimated time for 21 requests @ 0.1s per API call: 00:00:00


In [24]:
fetcher = HttpRequestFetcher(rps=100, detailed_logs=True)
executor = BatchRequestExecutor()

In [25]:
results = executor.execute(urls, fetcher)

Request!  0.00s
Request!  0.00s
Request!  0.00s
Request!  0.00s
Request!  0.00s
Request!  0.00s
Request!  0.00s
Request!  0.00s
Request!  0.01s
Request!  0.01s
Request!  0.01s
Request!  0.01s
Request!  0.01s
Request!  0.01s
Request!  0.01s
Request!  0.01s
Request!  0.01s
Request!  0.01s
Request!  0.01s
Request!  0.01s
Request!  0.01s
Request!  0.01s
Request!  0.01s
Request!  0.01s
Request!  0.01s
Request!  0.01s
Request!  0.01s
Request!  0.01s
Request!  0.01s
Request!  0.01s
Request!  0.01s
Request!  0.01s
Request!  0.01s
Request!  0.01s
Request!  0.01s
Request!  0.02s
Request!  0.02s
Request!  0.02s
Request!  0.02s
Request!  0.02s
Request!  0.02s
Request!  0.02s
Request!  0.02s
Request!  0.02s
Request!  0.02s
Request!  0.02s
Request!  0.02s
Request!  0.02s
Request!  0.02s
Request!  0.02s
Request!  0.02s
Request!  0.02s
Request!  0.02s
Request!  0.02s
Request!  0.02s
Request!  0.02s
Request!  0.02s
Request!  0.02s
Request!  0.02s
Request!  0.02s
Request!  0.02s
Request!  0.02s
Request!

In [26]:
def validate_results(results : List[Dict]):
    validated_results = []
    invalidated_results = []
    for result in results:
        try:
            validated_result = Snapshot(**result)
            validated_results.append(validated_result)
        except Exception as e:
            invalidated_result = result
            invalidated_results.append(invalidated_result)

    print(f'Validated: {len(validated_results)}')
    print(f'Invalidated: {len(invalidated_results)}')
    return validated_results, invalidated_results

In [27]:
validated, invalidated = validate_results(results)

Validated: 63
Invalidated: 0


In [28]:
df_val = pd.DataFrame([
    {
        **result.model_dump(),
        'ticker': snapshot.ticker
    }
    for snapshot in validated for result in snapshot.results])

In [30]:
df = df_val.copy()

In [37]:
df_ticker_tups = [(df.reset_index(drop=True), ticker) for ticker, df in df.groupby('ticker')] # groupby ticker

In [40]:
for idx, (df, ticker) in enumerate(df_ticker_tups):
    print(f"Saving {ticker}... {idx+ 1} / {len(df_ticker_tups)}")

Saving AAPL... 1 / 3
Saving ACN... 2 / 3
Saving ADBE... 3 / 3


In [36]:
tups[0]

(             c         h        l      n        o              t        v  \
 0     180.0677  180.3000  179.940  15819  180.070  1694439000000  1180839   
 1     179.8400  180.2000  179.840   4444  180.090  1694439060000   441838   
 2     180.0790  180.0900  179.780   3307  179.870  1694439120000   273724   
 3     180.0979  180.1000  179.880   3030  180.080  1694439180000   237649   
 4     179.8100  180.1100  179.780   3653  180.099  1694439240000   300991   
 ...        ...       ...      ...    ...      ...            ...      ...   
 8206  178.9700  179.0200  178.925   2111  179.010  1696881360000   208246   
 8207  178.9900  179.0200  178.950   2221  178.970  1696881420000   183928   
 8208  178.9500  179.0012  178.940   3247  178.980  1696881480000   347245   
 8209  179.0000  179.0200  178.920   5098  178.945  1696881540000   564537   
 8210  178.9000  178.9900  178.850    147  178.990  1696881600000   900969   
 
             vw ticker  
 0     180.0808   AAPL  
 1     179.9

In [None]:
df_ticker_tups = [(df.reset_index(drop=True), ticker) for df, ticker in concat_df.groupby(by='ticker')]

In [22]:
invalidated

[{'ticker': 'FISV',
  'queryCount': 0,
  'resultsCount': 0,
  'adjusted': True,
  'status': 'OK',
  'request_id': '542297af948347e8e05131a24755767b'},
 {'ticker': 'FISV',
  'queryCount': 0,
  'resultsCount': 0,
  'adjusted': True,
  'status': 'OK',
  'request_id': 'bfc3bc9df046f2ffbb52f9ddd68ade9f'},
 {'ticker': 'FISV',
  'queryCount': 0,
  'resultsCount': 0,
  'adjusted': True,
  'status': 'OK',
  'request_id': '5ac44fcf586272984f4cb5c58ff7ad9f'},
 {'ticker': 'FISV',
  'queryCount': 0,
  'resultsCount': 0,
  'adjusted': True,
  'status': 'OK',
  'request_id': 'ac59c4ca2ae6e0febd24654e76dbc341'},
 {'ticker': 'FISV',
  'queryCount': 0,
  'resultsCount': 0,
  'adjusted': True,
  'status': 'OK',
  'request_id': 'c6f3e80e8f4754568d7acc1d1259c3c9'},
 {'ticker': 'FISV',
  'queryCount': 0,
  'resultsCount': 0,
  'adjusted': True,
  'status': 'OK',
  'request_id': '50e4a63ba89a5ccdc603e09e916545d3'},
 {'ticker': 'FISV',
  'queryCount': 0,
  'resultsCount': 0,
  'adjusted': True,
  'status': 'O