## Yahoo options: Fetch options using unpublished yahoo rest api calls
### Usage:  _change the value of sym below, and run all cells_


In [None]:
import urllib.request
import json
import datetime
import pandas as pd
import numpy as np
import pathlib
from IPython import display
from tqdm import tqdm,tqdm_notebook




In [None]:
def _to_dt(ts):
    d = datetime.datetime.fromtimestamp(ts)
    return datetime.datetime(d.year,d.month,d.day)

_DAY_INDEX = ['MON','TUE','WED','THU','FRI','SAT','SUN']
def get_nth_weekday(year,month,target_weekday,nth_occurrence):
    '''
    weekday is the term that assigns numbers from 0 to 6 to the days of the weeks.
    weekday 0 = monday
    '''
    f = f'W-{_DAY_INDEX[target_weekday]}'
    dr = pd.date_range(datetime.datetime(int(year),int(month),1), periods=nth_occurrence, freq=f)[-1]
    return dr



In [None]:
def get_yahoo_options_expirations(sym):
    base_url = f'https://query1.finance.yahoo.com/v7/finance/options/{sym}'
    # get main list of contracts
    r =  urllib.request.urlopen(base_url)
    h = r.read().decode("utf-8")
    d = json.loads(h)
    c = d['optionChain']['result'][0]
    dict_ts =  {d:_to_dt(d) for d in c['expirationDates']}
    dict_dt =  {_to_dt(d):d for d in c['expirationDates']}
    dict_monthlies = {dt:dict_dt[dt] for dt in dict_dt.keys() if get_nth_weekday(dt.year,dt.month,3,3)==dt}
    return dict_ts,dict_dt,dict_monthlies


def get_yahoo_options(sym,expirations=None):
    base_url = f'https://query1.finance.yahoo.com/v7/finance/options/{sym}'
    # get main list of contracts
    r =  urllib.request.urlopen(base_url)
    h = r.read().decode("utf-8")
    d = json.loads(h)
    c = d['optionChain']['result'][0]
    print(c.keys())

    # now process chains
    df_chain = None
    expiries_to_get = c['expirationDates'] if expirations is None else expirations
    for e in expiries_to_get:
        u = base_url + f'?date={e}'
        print(u,end=' : ')
        r =  urllib.request.urlopen(u)
        h = r.read().decode("utf-8")
        # print(h)
        d = json.loads(h)
        calls = d['optionChain']['result'][0]['options'][0]['calls']
        puts = d['optionChain']['result'][0]['options'][0]['puts']
        chain = calls + puts
        print(f'calls: {len(calls)} puts:{len(puts)} all:{len(chain)}')
        df_temp = pd.DataFrame(chain)
        if df_chain is None:
            df_chain = df_temp.copy()
        else:
            df_chain = df_chain.append(df_temp)
    def get_dte(v):
        try:
            t= datetime.datetime.now()
            y = int(v.split(sym)[1][0:2]) + 2000
            m = int(v.split(sym)[1][2:4])
            d = int(v.split(sym)[1][4:6])
            dt = datetime.datetime(y,m,d)
            return (dt - t).days
        except:
            return None
    df_chain['dte'] = df_chain.contractSymbol.apply(get_dte)
    prev_close = c['quote']['regularMarketPreviousClose']
    df_chain['pc'] = df_chain.contractSymbol.str[-9]
    return df_chain

def get_first_n_expiries(sym,first_n_expiries=10):
    dict_exp,dict_dt,dict_monthlies = get_yahoo_options_expirations(sym)
    expiries = list(dict_dt.values())[:first_n_expiries]
    df = get_yahoo_options(sym)    
    return df

def get_yahoo_monthly_options(sym):
    dict_exp,dict_dt,dict_monthlies = get_yahoo_options_expirations(sym)
    monthlies = list(dict_monthlies.values())[:2]
    df_monthlies = get_yahoo_options(sym,expirations=monthlies)    
    return df_monthlies

def get_ctm_implied_vols(df_chain,pc='c'):
    pc2 = pc.upper()
    dfc2 = df_chain[(df_chain.inTheMoney==False) & (df_chain.pc==pc2)][['strike','expiration']]
    dfc3 = dfc2.groupby('expiration',as_index=False).min()
    dfc4 = dfc3.merge(df_chain[df_chain.pc==pc2],on=['expiration','strike'],how='inner')
    return dfc4

def get_avg_implied_vol_from_chain(df_chain,pc='c'):
    dfc2 = get_ctm_implied_vols(df_chain,pc=pc)
    dict_exp = {exp:_to_dt(exp) for exp in dfc2.expiration.values}
    def __is3t(dt):
        y = dt.year
        m = dt.month
        d = dt.day
        dt2 = datetime.datetime(y,m,d)
        thur3 = get_nth_weekday(y,m,3,3)
        return dt2==thur3

    third_thursdays = [k for k in dict_exp.keys() if __is3t(dict_exp[k])]
    dfc3 = dfc2[dfc2.expiration.isin(third_thursdays)].copy()
    dfc3['exp_dt']= [dict_exp[k] for k in dfc3.expiration.values]
    dfc4 = dfc3[dfc3.dte>=25].sort_values('dte')[:2]
    return dfc4

    

In [None]:
def get_implied_vols(sym_list,get_monthly_only=False,first_n_expiries=10):
    dict_ret = {}
    for sym in tqdm_notebook(sym_list):
        try:
            if get_monthly_only:
                df = get_yahoo_monthly_options(sym)
            else:
                df = get_first_n_expiries(sym,first_n_expiries=first_n_expiries)
            dict_ret[sym] = df.copy()
        except Exception as e:
            dict_ret[sym] = str(e)
            print(str(e))
    return dict_ret


In [None]:
def get_avg_implied_vols(sym_list):
    dict_ret = {}
    for sym in tqdm_notebook(sym_list):
        try:
            df = get_yahoo_monthly_options(sym)
            df = get_avg_implied_vol_from_chain(df)
            dict_ret[sym] = df.copy()
        except Exception as e:
            dict_ret[sym] = str(e)
            print(str(e))
    return dict_ret
        


In [None]:
h = pathlib.Path.home()
df_sp500 = pd.read_csv(f"{h}/downloads/sp500_constituents.csv")
dict_avg_implied_vols = get_avg_implied_vols(df_sp500.Symbol.values)

In [None]:
np.isnan(dict_avg_implied_vols['NWS'].impliedVolatility.mean())

In [None]:
df_avg_implied = None
for k in dict_avg_implied_vols.keys():
    try:
        df_avg_imp = dict_avg_implied_vols[k]
        avg_vol = df_avg_imp.impliedVolatility.mean()
        if np.isnan(avg_vol):
            continue
        df_avg_imp = pd.DataFrame({'sym':[k],'avg_vol':[avg_vol]})
        if df_avg_implied is None:
            df_avg_implied = df_avg_imp.copy()
        else:
            df_avg_implied = df_avg_implied.append(df_avg_imp)
        df_avg_implied.index = list(range(len(df_avg_implied)))
    except:
        print(f"{k}: {dict_avg_implied_vols[k]}")
        
# df_avg_implied = df_avg_implied[['sym','impliedVolatility']].groupby('sym',as_index=False).mean()

df_avg_implied = df_avg_implied.sort_values('avg_vol',ascending=False)
df_avg_implied.index = list(range(len(df_avg_implied)))
df_avg_implied
                                

In [None]:
df_avg_implied.to_csv('df_avg_implied.csv',index=False)

In [None]:
h = pathlib.Path.home()
df_sp500 = pd.read_csv(f"{h}/downloads/sp500_constituents.csv")
dict_all_implied_vols = get_implied_vols(df_sp500.Symbol.values)

In [None]:
df_all_implied = None
for k in tqdm_notebook(dict_all_implied_vols.keys()):
    try:
        df_all_imp  = dict_all_implied_vols[k]
        avg_vol = df_all_imp.impliedVolatility.mean()
        if np.isnan(avg_vol):
            continue
        df_all_imp['symbol'] = k
        if df_all_implied is None:
            df_all_implied = df_all_imp.copy()
        else:
            df_all_implied = df_all_implied.append(df_all_imp)
        df_all_implied.index = list(range(len(df_all_implied)))
    except:
        print(f"{k}: {dict_all_implied_vols[k]}")
        


In [None]:
#!jupyter nbconvert --to script yahoo_options.ipynb

In [None]:
df_all_implied.to_csv('df_all_implied.csv',index=False)