In [1]:
import requests
from bs4 import BeautifulSoup
from td_api import Account
from datetime import datetime, timedelta
from price_probability_distobution import get_pdfs_from_deltas, get_pdfs_from_marks
from scipy import stats
from scipy.integrate import quad
import time
import numpy as np
import pandas as pd
from tqdm import tqdm



In [9]:
page = requests.get("https://research.investors.com/options-center/reports/option-volume", headers={"User-Agent": "Chrome"})
soup = BeautifulSoup(page.text, features="lxml")

items = soup.findAll("a", {"class": "stockRoll"})
tickers = [item.text for item in items]

In [3]:
acc = Account("keys.json")

profiles = []
from_date = datetime.now()
to_date = from_date + timedelta(days=8)

last_query_time = 0

for ticker in tqdm(tickers[40:]):
    # print(ticker + ("\t%d" % i))
    time.sleep(max(0, 0.6 - (time.time() - last_query_time)))
    data = acc.get_options_chain(ticker, from_date, to_date, strike_count=50)
    if data is None:
        continue
    mark = acc.get_quotes([ticker])["mark"].iloc[0]
    last_query_time = time.time()
    ticker_pdfs = get_pdfs_from_marks(data, distribution=stats.logistic)
    profile = {}
    (u, s, errs) = list(ticker_pdfs.values())[0]
    profile["u"] = u
    profile["s"] = s
    profile["errs"] = errs
    profile["mark"] = mark
    profile["ticker"] = ticker
    profiles.append(profile)
    

30%|███       | 33/110 [19:09<33:40, 26.24s/it]No options data for MDR
 37%|███▋      | 41/110 [39:05<1:05:47, 57.21s/it]


ConnectionError: ('Connection aborted.', TimeoutError(110, 'Connection timed out'))

In [8]:
profiles.sort(key=lambda x: x["u"]/x["mark"]-x["errs"][0]/x["u"], reverse=True)
# PDD	mean: 92.07±0.08%	std: 9.09±1.14%	E[return]: 1.75%	profit: 54.10%-54.35%-54.61%	err: 1.15
df = pd.DataFrame(columns=["mean", "std", "return", "profit_odds_low", "profit_odds_avg", "profit_odds_high", "err"])
tickers = []
for profile in tqdm(profiles):
    row = {}
    u = profile["u"]
    s = profile["s"]
    errs = profile["errs"]
    mark = profile["mark"]
    ticker = profile["ticker"]
    tickers.append(ticker)
    err = 100 * np.linalg.norm(errs / (u, s))
    row["mean"] = u
    row["std"] = s
    row["err"] = err
    # label = ticker + "\tmean: %.2f±%.2f%%\tstd: %.2f±%.2f%%\t" % (u, 100*errs[0]/u, s, 100*errs[1]/s)

    distribution = stats.logistic.pdf

    x = np.linspace(u-5*s, u+5*s, 100)
    y = distribution(x, u, s)

    loss_odds = quad(lambda x: distribution(x, u, s), 0, mark)[0]
    s_sign = 1 if u > mark else -1
    loss_odds_min = quad(lambda x: distribution(x, u+errs[0], s - errs[1]*s_sign), 0, mark)[0]
    loss_odds_max = quad(lambda x: distribution(x, u-errs[0], s + errs[1]*s_sign), 0, mark)[0]

    # label += "E[return]: %.2f%%\t" % (100 * (u/mark-1))

    row["return"] = 100 * (u/mark-1)
    row["profit_odds_low"] = 100*(1-loss_odds_max)
    row["profit_odds_avg"] = 100*(1-loss_odds)
    row["profit_odds_high"] = 100*(1-loss_odds_min)

    # label += "profit: %.2f%%-%.2f%%-%.2f%%\t" % (100*(1-loss_odds_max), 100*(1-loss_odds), 100*(1-loss_odds_min))
    # label += "err: %.2f\t" % err
    # print(label)
    df = df.append(row, ignore_index=True)
    # err = 100 * np.linalg.norm(errs / (u, s))
    # print(ticker + "\t%.2f" * 3 % (100 * (u/mark-1), u, 100 * (errs[0]/u)))
df.index = tickers
print()
print(len(df))

100%|██████████| 40/40 [00:03<00:00, 10.81it/s]
40



In [5]:
df.to_csv("temp.csv")