In [1]:
# -*- coding: utf-8 -*-
"""
Created on Tue Jun  2 05:15:28 2020

@author: ywu37
"""
import numpy as np
import pandas as pd
import yfinance as yf
import datetime as dt
from fredapi import Fred
import time
from finsymbols import symbols
Fred_API_key = 'a40841f52075d4615cb0d3c895819d1c'

In [2]:
import requests
import bs4 as bs
 
# Running the code below need to turn on the VPN
def save_sp500_tickers():
    """
    This is a function to save current stock ticker in S&P500
    
    Input: None
    
    Output:
    tickers: A list of ticker name of S&P500 stocks
    """
#     print("Starting to request...")
    resp = requests.get('http://en.wikipedia.org/wiki/List_of_S%26P_500_companies')
#     print("Got the requested website")
    soup = bs.BeautifulSoup(resp.text, 'lxml')
    table = soup.find('table', {'class': 'wikitable sortable'})
    ### Collect the Ticker Name from Wikipedia tables
    tickers = []
    for row in table.findAll('tr')[1:]:
        ticker = row.findAll('td')[0].text
#         print("Getting Ticker: ", ticker)
        tickers.append(ticker)
    ### Remove the newline character from list
    sample_list = tickers
    converted_list = []
    for element in sample_list:
        converted_list.append(element.strip())
    ### In Yahoo Finance, the website uses '-' in ticker name instead of '.'
    ### Replace "." With "-"
    new_strings = []
    for string in converted_list:
        new_string = string.replace(".", "-")
        new_strings.append(new_string)
    tickers = new_strings
    
    """
    ### Save the Ticker into file
    with open("sp500tickers.pickle","wb") as f:
        pickle.dump(tickers,f)    
    """
    return tickers

In [19]:
sp500_ = save_sp500_tickers()

In [20]:
# Slice the tickers for testing purpose:
sp500 = sp500_[:10]

In [21]:
sp500

['MMM', 'ABT', 'ABBV', 'ABMD', 'ACN', 'ATVI', 'ADBE', 'AMD', 'AAP', 'AES']

In [26]:
# Then turn off the VPN

#Min Max Scaler
def CalculateIndex(series):
    MAX = max(series)
    MIN = min(series)
    index = 100*((series[-1]-MIN)/(MAX-MIN))
    return index

def get_stock_info(ticker, p = '1y'):
    ticker = yf.Ticker(ticker)
    hist = ticker.history(period = p)
    return hist

def stock_price_momentum():
    hist = get_stock_info('^GSPC', '2y')
    hist_close = hist['Close']
    hist_momentum = hist_close/hist_close.rolling(125).mean()-1
    hist_momentum.dropna(inplace=True)
    index = CalculateIndex(hist_momentum)
    return index

def market_volatility():
    hist = get_stock_info('^VIX')
    hist_close = hist['Close']
    index = CalculateIndex(hist_close)
    return index

def stock_price_strength(tickers):
    noh = 0 #number of hitting high
    nol = 0 #number of hitting low
    for ticker in tickers:
        try:
            hist = get_stock_info(ticker)
            close = hist['Close']
            if close[-1] == min(close):
                nol += 1
            if close[-1] == max(close):
                noh += 1
        except:
            continue
        
    index = (abs(noh - nol)/max([noh, nol]))*100
    return index
        
def stock_price_breadth(tickers):
    length = len(get_stock_info('MSFT','1mo'))
    Rvolume = [0 for i in range(length)] #volume of rise during 1 mo
    Dvolume = [0 for i in range(length)] #volume of decline during 1 mo
    
    for ticker in tickers:
        try:
            hist = get_stock_info(ticker, '1mo')
#             print("Getting stock_price_breadth for: ", ticker)
            for j in range(length):
                if hist['Open'][j] > hist['Close'][j]:
                    Rvolume[j] += hist['Volume'][j]
                else:
                    Dvolume[j] += hist['Volume'][j]
        except:
            continue
    
    breadth = np.array(Rvolume) - np.array(Dvolume) 
    breadth = breadth[~np.isnan(breadth)]
    index = CalculateIndex(breadth)
    return index
    
def junk_bond_demand():
    date_start = dt.date.today()-dt.timedelta(days=30) #date of one month ago
    fred = Fred(api_key=Fred_API_key)
    junkbond = fred.get_series('BAMLH0A0HYM2EY',date_start)/100 #junkbond data
    investbond = fred.get_series('BAMLC0A2CAAEY',date_start)/100 #investment-grade bond
    spread = junkbond-investbond
    spread.dropna(inplace=True) #remove nan value
    index = CalculateIndex(spread)
    return index

def safe_heaven_demand():
    hist = get_stock_info('^GSPC')
    hist_close = hist['Close']
    date = dt.date.today()-dt.timedelta(days=30) #keep one year of 30day returns
    date_start = dt.date.today()-dt.timedelta(days=90) #date of two year ago
    fred = Fred(api_key=Fred_API_key)
    treasury = fred.get_series('DGS10',date_start)/100 # yield of treasuty
    treasury = treasury.pct_change(30)
    treasury = treasury.loc[date:]
    returns = hist_close.pct_change(30) # the 30day return of SP 500
    returns = returns[treasury.index]
    spread = returns - treasury
    spread.dropna(inplace=True)#remove nan value
    index = CalculateIndex(spread)
    return index

def get_date_list(begin_date,end_date):
    date_list = [str(date).split(' ')[0] for date in pd.date_range(start=begin_date, end=end_date,freq='D')]
    return date_list

#print(get_date_list('2018-06-01','2018-06-08'))

def Put_and_Call_Options(tickers):
    date_end = dt.date.today()
    date_start = dt.date.today()-dt.timedelta(days=30) #date of one month ago
    date_list = get_date_list(date_start, date_end)
    Put_volume = [0 for i in range(len(date_list))]
    Call_volume = [0 for i in range(len(date_list))]
    for ticker in tickers:
        try:
            stock = yf.Ticker(ticker)
#             print("Getting Options Data for: ", ticker)
            opt = stock.option_chain(stock.options[0])
            callvolume = opt.calls['volume']
            calldate = [x.strftime('%Y-%m-%d') for x in list(opt.calls['lastTradeDate'])]
            putvolume = opt.puts['volume']
            putdate = [x.strftime('%Y-%m-%d') for x in list(opt.puts['lastTradeDate'])]
            for j in range(len(date_list)):
                for k in range(len(calldate)):
                    if date_list[j] == calldate[k]:
                        Call_volume[j] += 0 if int(callvolume[k]) is np.nan else int(callvolume[k]) 
                for k in range(len(putdate)):
                    if date_list[j] == putdate[k]:
                        Put_volume[j] += 0 if int(putvolume[k]) is np.nan else int(putvolume[k]) 
        except:
            continue
        
    volumespread = np.array(Put_volume)/ np.array(Call_volume) 
    volumespread = volumespread[~np.isnan(volumespread)] #remove nan values in np array
    index = CalculateIndex(volumespread)
    return index

def main(sp500):
    
#     sp500_list = save_sp500_tickers()
    sp500 = set(sp500)
    
#     print('---------Put_and_Call Options: ', end='')
#     a = Put_and_Call_Options(sp500)
#     print(a)
    
    print('---------Safe_heaven_demand: ', end='')
    b = safe_heaven_demand()
    print(b)
    
    print('---------Stock_price_breadth: ',end='')
    c = stock_price_breadth(sp500)
    print(c)
    
    print('---------Stock_price_momentum: ',end='')
    d = stock_price_momentum()
    print(d)
    
    print('---------Market_volatility: ',end='')
    e = market_volatility()
    print(e)
    
    print('---------Stock_price_strength: ',end='')
    f = stock_price_strength(sp500)
    print(f)
    
    print('---------Junk_bond_demand: ',end='')
    g = junk_bond_demand()
    print(g)
    
    print('Equally weighted average of 6 factors: ',end='')
    index = (b+c+d+e+f+g)/6
    print(index)     

In [27]:
start_time = time.time()

if __name__ == "__main__":
    main(sp500)
    print("--- %s seconds ---" % (time.time() - start_time))

---------Safe_heaven_demand: 4.424231610165707
---------Stock_price_breadth: 81.92247092860177
---------Stock_price_momentum: 98.76084568707827
---------Market_volatility: 15.460295151089246
---------Stock_price_strength: 100.0
---------Junk_bond_demand: 62.06896551724148
Equally weighted average of 6 factors: 60.439468149029416
--- 72.3563449382782 seconds ---


In [38]:
from datetime import datetime

In [45]:
retrieval_dt = str(datetime.now().date())
sp500_df = pd.DataFrame(sp500_, columns=['Tickers'])
sp500_df['retrieval_dt'] = retrieval_dt

In [47]:
sp500_df.to_csv("/Users/miaoyuesun/Code_Workspace/brad_public_workspace_mac/quant_research/sp500_tickers_updates/sp500.csv", index = False)

In [None]:
#CalculateIndex 这个function实际上是在做一个Min Max Scale （让数值scale到0-100之间）。这个概念本身没问题，可是
# Min max scaler 的公式打错了

#本来该算Put call ratio 算成了call put difference

#Safe haven demand里面忘记换算单位，直接用小数减百分比。导致数值小了100倍。

In [None]:
# #筛选掉delisted symbols and not found symbols
# NYSE_list = pd.read_csv("NYSE.csv")
# NYSE = set(NYSE_list['Symbol'])
# NYSE_copy = NYSE.copy()
# for ticker in NYSE_copy: 
#     hist = yf.Ticker(ticker)
#     hist = hist.history(period = '1d')
#     if hist.empty:
#         print(ticker)
#         NYSE.remove(ticker)
# NYSE_pd = pd.DataFrame(data={'Symbol':list(NYSE)})
# NYSE_pd.to_csv('NYSE_filtered.csv')