In [44]:
import pandas as pd
import numpy as np
import yfinance as yf
import matplotlib.pyplot as plt
from datetime import datetime
from datetime import date
from datetime import timedelta
import concurrent.futures as cf

import random

In [27]:
monthly_constituents = pd.read_csv('./SP500_monthly_hist.csv')
monthly_constituents.set_index('Date', inplace=True)

In [43]:
ticker_list = set(monthly_constituents.to_numpy().flatten())
ticker_list.remove(np.nan)
ticker_list = [elem for elem in ticker_list if not elem[0].isnumeric()]

In [42]:
start_date = datetime.strptime(monthly_constituents.index[0], '%Y-%m-%d')
start_date = date(start_date.year - 10, start_date.month, start_date.day)
end_date = datetime.strptime(monthly_constituents.index[-1] , '%Y-%m-%d')

In [60]:
#Grabs market data for S&P500
market_index = yf.Ticker('^GSPC')
market_hist = market_index.history(start=start_date, end=end_date, interval='1d')
market_hist = pd.DataFrame(market_hist['Close'])
daily_market_returns = market_hist.pct_change()

# Retrieves relevant yahoo data for a sigle ticker
def retreive_yahoo_data_thread(ticker):
    
    #Gets data for filtering
    stock = yf.Ticker(ticker)
    stock_hist = stock.history(start=start_date, end=end_date, interval='1d')

    #creates a dataframe for just the daily closing price 
    #  and another one for just daily returns
    prices = pd.DataFrame(stock_hist['Close'])
    returns = prices.pct_change()

    ######## Beta #############
    returns['Market'] = daily_market_returns['Close']
    beta = returns.cov() / daily_market_returns['Close'].var()

    ######### STD #############
    std = prices.pct_change().std()

    #returns a dataframe with the tickers price, beta, std and a dataframe for it's returns
    return pd.DataFrame({'Ticker': [ticker],
                        'Price History': [stock_hist['Close']],
                        'Beta': [beta.iat[1,0]],
                        'STD': [std.Close],
                        'Returns': [stock_hist['Close'].pct_change()]})

# Produces a dataframe with tickers and corresonding finance data to go with them.
def retreive_yahoo_data(tickers):
    
    tickers_data =  pd.DataFrame({'Ticker': [],
                                  'Price History': [],
                                  'Beta': [],
                                  'STD': [],
                                  'Returns': []})
    
    # Threading
    with cf.ThreadPoolExecutor() as executor:
        
        # Creates a thread for each ticker
        datarow = [executor.submit(retreive_yahoo_data_thread, ticker) for ticker in tickers]
        
        for row in cf.as_completed(datarow):
            tickers_data = pd.concat([tickers_data, row.result()], ignore_index=True)

    tickers_data.reset_index(inplace=True)
    tickers_data = tickers_data[['Ticker', 'Price History', 'Beta', 'STD', 'Returns']]
    tickers_data.dropna(inplace=True)

    return tickers_data

In [62]:
all_ticker_data = retreive_yahoo_data(random.sample(ticker_list, 5))
all_ticker_data

MNKKQ: No timezone found, symbol may be delisted


Unnamed: 0,Ticker,Price History,Beta,STD,Returns
1,QRVO,Date 2015-01-02 00:00:00-05:00 70.400002 20...,1.354878,0.027083,Date 2015-01-02 00:00:00-05:00 NaN 201...
2,DE,Date 2003-09-02 00:00:00-04:00 18.823093 2...,1.139947,0.020471,Date 2003-09-02 00:00:00-04:00 NaN 200...
3,PARA,Date 2005-12-05 00:00:00-05:00 17.629139 20...,1.464976,0.028097,Date 2005-12-05 00:00:00-05:00 NaN 200...
4,MRO,Date 2003-09-02 00:00:00-04:00 5.613751 20...,1.379741,0.029118,Date 2003-09-02 00:00:00-04:00 NaN 200...
