## Getting data from Polygon

In [None]:
# needed to run tasks in parallel
from gevent import monkey
monkey.patch_all()
import gevent
from gevent.queue import Queue
import itertools
from typing import Callable
from polygon import RESTClient
from dotenv import load_dotenv
import os

load_dotenv()

# represents a nil value for a float
nil_float = -9999.0

def polygon_factory():
    return RESTClient(api_key=os.getenv("POLYGON_KEY"))

# makes one client for one-off calls
client = polygon_factory()

# Function to process a queue with a client
def process_queue(client:RESTClient, queue:Queue, fn:Callable[[dict, RESTClient], None]):
    while not queue.empty():
        item = queue.get()
        fn(item, client)

# Pass the dict with all items, fn operates on each item and add fields where needed
def run_on_all_items(items:list[dict], fn:Callable[[dict, RESTClient], None], cli_gen:Callable[[], RESTClient],  N = 8):
    clients = [cli_gen() for _ in range(N)]
    queues = [Queue() for _ in range(N)]

    # Distribute items into the queues
    for item, queue in zip(items, itertools.cycle(queues)):
        queue.put_nowait(item)

    # Create greenlets for each client and queue, and process them
    greenlets = [
        gevent.spawn(process_queue, client, queue, fn)
        for client, queue in zip(clients, queues)
    ]

    # Wait for all greenlets to complete
    gevent.joinall(greenlets)

In [None]:
# get all exchanges
exchange_list = client.get_exchanges(asset_class='stocks')

for t in exchange_list:
    print(t)

In [None]:
from polygon.rest.models import Ticker
from typing import Any, Dict

# every ticker we hold here will be TIKCER: {<props...>}
ticker_dict: Dict[str, Any] = {}

exchanges_we_care = [
    "XASE", # 234 https://www.tradinghours.com/mic/s/xase ?
    "XNAS", # NASDAQ 3370
    "XNYS", # NYSE 1797
]

def list_all_tickers_from_exchange(mic):
    tickers = client.list_tickers(market="stocks", limit=1000, exchange=mic, type="CS")
    # print(f"--{mic}:{len(tickers)}")
    return tickers

for ex in exchanges_we_care:
    # print(f"exchange: {ex}")
    l = 0
    tickers_itr = list_all_tickers_from_exchange(ex)
    t: Ticker = None
    for t in tickers_itr:
        l += 1
        if t.ticker in ticker_dict:
            print(f"DUPLICATE: {ex}:{t.ticker} <> {ticker_dict[t.ticker]['mic']}:{t.ticker}")
        ticker_dict[t.ticker] = {
            "t": t.ticker,
            "mic": ex,
            "name": t.name,
        }
        # it's all USD for we got so far
        if t.currency_name != 'usd':
            print(f"-- {t.ticker} currency: {t.currency_name}")
            ticker_dict[t.ticker]["curr"] = t.currency_name
    print(f"exchange: {ex} tickers: {l}")

## ⚠️ Sample Down?

Option to sample down here and make the code go hundreds of time faster

In [None]:
sample = 'META MSFT AMZN GOOGL WMT INTC NET SQ COIN MTCH QS'

ticker_sample = {}
for t in sample.split(' '):
    ticker_sample[t] = ticker_dict[t]

# RUN ALL STOCKS
ticker_sample = ticker_dict

At this point we have collected all tickers in `ticker_dict`  

Let's collect some **financial metrics**

In [None]:
# time: 11m  -> N=8 1.24m

from polygon.rest.models import TickerDetails

def add_details(t_obj:dict, client:RESTClient):
    t = t_obj['t']
    # https://polygon.io/docs/stocks/get_v3_reference_tickers__ticker
    deets:TickerDetails = client.get_ticker_details(t)
    # Whether or not the asset is actively traded. False means the asset has been delisted.
    t_obj['active'] = deets.active
    # The most recent close price of the ticker multiplied by weighted outstanding shares.
    t_obj['market_cap'] = deets.market_cap
    # The shares outstanding calculated assuming all shares of other share classes are converted to this share class.
    t_obj['share_count'] = deets.weighted_shares_outstanding
    t_obj['employees'] = deets.total_employees
    t_obj['sic_description'] = deets.sic_description

run_on_all_items(ticker_sample.values(), add_details, polygon_factory)

In [None]:
from polygon.rest.models import DailyOpenCloseAgg
from datetime import datetime, timedelta

def recursive_dict(obj):
    if isinstance(obj, dict):
        return {k: recursive_dict(v) for k, v in obj.items()}
    elif isinstance(obj, list):
        return [recursive_dict(elem) for elem in obj]
    elif hasattr(obj, '__dict__'):
        return recursive_dict(obj.__dict__)
    else:
        return obj

# get the last open/close, avoid weekends and empty dates.
# ps: would be nice to have a cache for dates, so we don't end up repeating failures
def get_last_open_day_data(date_s:str, ticker:str, back: int = 0, halt: int = 5) -> DailyOpenCloseAgg:
    if halt < 1:
        print("halting, too many tries", ticker, date_s)
        # raise RuntimeError("too many tries")
        return None
    # cast to datetime
    from datetime import datetime, timedelta
    date = datetime.strptime(date_s, '%Y-%m-%d') - timedelta(days=back)
    # get last open day that is not a weekend
    while date.weekday() > 4:
        date -= timedelta(days=1)
    date_s = date.strftime('%Y-%m-%d')
    # even if not weekend may still be holiday, and data won't be found
    try:
        # print(f"get_last_open_day_data trying: ${ticker} {date_s}")
        day_data = client.get_daily_open_close_agg(ticker=ticker, adjusted=True, date=date_s)
        return day_data
    except:
        return get_last_open_day_data(date_s, ticker, back + 1, halt -1)

## P/E ratio

The PE ratio is calculated by dividing the market value price per share by the company's earnings per share. A high P/E ratio can mean that a stock's price is high relative to earnings and possibly overvalued. A low P/E ratio might indicate that the current stock price is low relative to earnings.

In [None]:
# time: 24m -> N=8 4.4m

# set: last_price, eps, pe
def get_pe(t_obj:dict, client:RESTClient):
    ticker = t_obj['t']
    t_obj["last_price"] = nil_float
    t_obj["eps"] = nil_float
    t_obj["pe"] = nil_float
    
    fundamentals_req = client.vx.list_stock_financials(ticker=ticker, limit=100, timeframe="quarterly")
    fundamentals_res = []

    for f in fundamentals_req:
        fundamentals_res.append(f)

    # (IMO) using annual is too terrible outdated, better sum off last 4 quarters
    eps = 0
    i=0
    eps_list = []
    list_len = len(fundamentals_res)
    while i < 4 and i < list_len:
        fundamentals_res[i]
        if fundamentals_res[i].financials.income_statement != None and fundamentals_res[i].financials.income_statement.basic_earnings_per_share != None:
            eps_list.append(fundamentals_res[i].financials.income_statement.basic_earnings_per_share.value)
        i += 1

    # note: when i use todays, it gets pretty close to google/yahoo (but not same): https://ca.finance.yahoo.com/quote/META?p=META -- not immediate important, since i'm not looking for precise fundamentals, but it's an important point to fix later.
    date = datetime.today().strftime('%Y-%m-%d')
    day_data = get_last_open_day_data(date, ticker)
    if day_data == None:
        return
    last_price = day_data.open
    if day_data.close != None:
        last_price = day_data.close
    if last_price == None or last_price == 0:
        return
    t_obj["last_price"] = last_price

    if len(eps_list) < 1:
        return

    # pro-rate if needed
    eps = sum(eps_list) * 4/len(eps_list)

    if eps == 0:
        return
    # print(f"day_data: {day_data}")
    pe = last_price / eps
    
    # print(f"\nday_data.close:{day_data.close} \neps:{eps} \nP/E ={pe}")
    # print(json.dumps(recursive_dict(f0), indent=4, sort_keys=True))
    t_obj["eps"] = eps
    t_obj["pe"] = pe
    return

run_on_all_items(ticker_sample.values(), get_pe, polygon_factory)


## Dividends

Project forward - Trailing Annual Dividend Rate

get the last dividend that was paid as "dividend_type": "CD" in the last year
feequencies observed: 1, 2, 4(most common), 12

for many reasons, may return none.

[Dividends are kinda complicated](https://www.investopedia.com/ask/answers/102714/how-and-when-are-stock-dividends-paid-out.asp)


In [None]:
# time: 8m  -> N=8  1m
from polygon.rest.models import Dividend

# eg: https://ca.finance.yahoo.com/quote/MO/key-statistics?p=MO#:~:text=17.56M-,Dividends%20%26%20Splits,-Forward%20Annual%20Dividend
# eg code (other API): https://medium.com/swlh/finding-high-dividend-stocks-with-python-c28e02c14e14

# set: div_year, div_pc
def get_dividends(t_obj:dict, client:RESTClient):
    ticker = t_obj['t']
    last_price = t_obj['last_price']
    t_obj["div_year"] = nil_float
    t_obj["div_pc"] = nil_float
    t_obj["div_freq"] = nil_float
    
    # give a little extra in case now is near that time
    start_date = datetime.today() - timedelta(days=366 * 1.3)

    dividends = client.list_dividends(ticker=ticker, limit=1000, dividend_type="CD", ex_dividend_date_gt=start_date.strftime('%Y-%m-%d'))
    dividends_list = []
    freq = None
    for d in dividends:
        d: Dividend = d
        # avoid handling different frequencies - keep calculations simple for now
        if freq == None:
            freq = d.frequency
        else:
            if freq != d.frequency:
                print(f"div freq changed: {ticker}")
                return
        dividends_list.append(d)
    # print('LEN:', len(dividends_list))
    if len(dividends_list) == 0:
        return

    freq = dividends_list[0].frequency
    t_obj["div_freq"] = freq

    if len(dividends_list) < freq:
        print(f"not enough dividends: {len(dividends_list)} - {ticker}")
        return

    if freq not in (1, 2, 4, 12):
        print(f"div freq not supported: {freq} - {ticker}")
        return

    # make sure the last div is not too old (proportional to frequency)
    last_div_date = datetime.strptime(dividends_list[0].ex_dividend_date, '%Y-%m-%d')
    if last_div_date < datetime.today() - timedelta(days=((366/freq)+31)):
        print(f"last div too old: {last_div_date} - {ticker} (freq:{freq})")
        print(dividends_list)
        return

    i=0
    vals = []
    list_len = len(dividends_list)
    while i < freq and i < list_len:
        dividends_list[i]
        vals.append(dividends_list[i].cash_amount)
        i += 1
    # print(f"vals: {vals}")
    if len(vals) == 0:
        return
    div_year = sum(vals)
    # print(f"dividends ~year: {div_year}")
    div_pc = round(div_year/last_price * 100, 3)
    # print("dividends %", div_pc)
    t_obj["div_year"] = div_year
    t_obj["div_pc"] = div_pc
    return

# run_on_all_items(ticker_sample.values(), get_dividends, polygon_factory)

# debug
# get_dividends(ticker_sample['HBI'], client) # -- too old last div: 2022-11-21 00:00:00 - HBI
# ticker_sample['HBI']
# get_dividends(ticker_sample['BDL'], client) # freq:1
# ticker_sample['BDL']

# get_dividends(ticker_sample['EVBN'], client) # freq:2
# ticker_sample['EVBN']

# get_dividends(ticker_sample['AGNC'], client) # freq:12
# ticker_sample['AGNC']

# get_dividends(ticker_sample['BCPC'], client) # freq:1
# ticker_sample['BCPC']

# get_dividends(ticker_sample['ADTN'], client) -- missing last quarter (skip)
# ticker_sample['ADTN']


## Consolidate Dataframe

In [None]:
# save to pickle / from pickle
import pickle

# save
# with open('jars/tickers-2023-12-28.pickle', 'wb') as handle:
#     pickle.dump(ticker_sample, handle, protocol=pickle.HIGHEST_PROTOCOL)

# # load
# with open('jars/tickers-2023-12-28.pickle', 'rb') as handle:
#     ticker_sample = pickle.load(handle)


In [None]:
# put in a pandas dataframe
import pandas as pd
# configure number of lines to display in pandas dataframe
pd.set_option("display.max_rows", 100)
pd.set_option("display.min_rows", 30)
pd.set_option("display.max_columns", 100)
# df = pd.DataFrame([t.__dict__ for t in ticker_sample_list])
rows = 'name market_cap share_count employees sic_description last_price eps pe div_pc'.split(' ')
df = pd.DataFrame.from_dict(ticker_sample, orient='index', columns=rows)
df.index.name = 'ticker'

# filter out ALL values that are NaN
df = df.dropna()
# filter out all data that is less than 9999
df = df[(df['pe'] > -100)]
df = df[df['eps'] > -100]
df = df[df['market_cap'] > 1e9]
# df = df[(df['div_pc'] < 50) & (df['div_pc'] > 0)]
df = df.sort_values(by=['market_cap'], ascending=False)

print("len: ", df.shape[0])
df