In [1]:
import yfinance as yf
import pandas as pd
import requests
from bs4 import BeautifulSoup

In [2]:
#Function to get the ticker of the Russell 1000 index
def get_russell_1000(): 
    #get ticker from wikepedia page
    url = 'https://en.wikipedia.org/wiki/Russell_1000_Index'
    response = requests.get(url)
    soup = BeautifulSoup(response.text, 'lxml')
    #get tables
    table = soup.find('table', {'class': 'wikitable sortable'})

    #loop over rows and extracts the ticker symbol
    tickers = []
    for row in table.findAll('tr')[1:]:
        ticker = row.findAll('td')[1].text.strip()
        tickers.append(ticker)

    return tickers


In [5]:
#Function to get stock price and financial data for a company
def get_stock_financial_data(ticker):
    #download stock information using yfinance
    stock = yf.Ticker(ticker)
    
    #get current stock price
    stock_price = stock.history(period="1d")['Close'].iloc[-1]
    
    #get financial information
    financial_info = stock.info

    #get desired metrics
    metrics = {
        'Ticker': ticker,
        'longName' : financial_info.get('longName', 'N/A'),
        'Stock Price': stock_price,
        'Industry': financial_info.get('industry', 'N/A'),
        'Sector': financial_info.get('sector', 'N/A'),
        'Full Time Employees': financial_info.get('fullTimeEmployees', 'N/A'),
        'Dividend Rate': financial_info.get('dividendRate', 'N/A'),
        'Dividend Yield': financial_info.get('dividendYield', 'N/A'),
        'Payout Ratio': financial_info.get('payoutRatio', 'N/A'),
        '5-Year Avg Dividend Yield': financial_info.get('fiveYearAvgDividendYield', 'N/A'),
        'Beta': financial_info.get('beta', 'N/A'),
        'Trailing P/E': financial_info.get('trailingPE', 'N/A'),
        'Forward P/E': financial_info.get('forwardPE', 'N/A'),
        'Volume': financial_info.get('volume', 'N/A'),
        'Regular Market Volume': financial_info.get('regularMarketVolume', 'N/A'),
        'Average Volume': financial_info.get('averageVolume', 'N/A'),
        'Average Volume (10 days)': financial_info.get('averageVolume10days', 'N/A'),
        'Market Cap': financial_info.get('marketCap', 'N/A'),
        '52 Week Low': financial_info.get('fiftyTwoWeekLow', 'N/A'),
        '52 Week High': financial_info.get('fiftyTwoWeekHigh', 'N/A'),
        'Price/Sales (TTM)': financial_info.get('priceToSalesTrailing12Months', 'N/A'),
        'Trailing Annual Dividend Rate': financial_info.get('trailingAnnualDividendRate', 'N/A'),
        'Trailing Annual Dividend Yield': financial_info.get('trailingAnnualDividendYield', 'N/A'),
        'Profit Margins': financial_info.get('profitMargins', 'N/A'),
        'Enterprise to Revenue': financial_info.get('enterpriseToRevenue', 'N/A'),
        'Enterprise to EBITDA': financial_info.get('enterpriseToEbitda', 'N/A'),
        'Recommendation Mean': financial_info.get('recommendationMean', 'N/A'),
        'Recommendation Key': financial_info.get('recommendationKey', 'N/A'),
        'Total Cash Per Share': financial_info.get('totalCashPerShare', 'N/A'),
        'EBITDA': financial_info.get('ebitda', 'N/A'),
        'Total Debt': financial_info.get('totalDebt', 'N/A'),
        'Quick Ratio': financial_info.get('quickRatio', 'N/A'),
        'Current Ratio': financial_info.get('currentRatio', 'N/A'),
        'Total Revenue': financial_info.get('totalRevenue', 'N/A'),
        'Debt to Equity': financial_info.get('debtToEquity', 'N/A'),
        'Revenue Per Share': financial_info.get('revenuePerShare', 'N/A'),
        'Return on Assets': financial_info.get('returnOnAssets', 'N/A'),
        'Return on Equity': financial_info.get('returnOnEquity', 'N/A'),
        'Revenue Growth': financial_info.get('revenueGrowth', 'N/A'),
        'Gross Margins': financial_info.get('grossMargins', 'N/A'),
        'EBITDA Margins': financial_info.get('ebitdaMargins', 'N/A'),
        'Earnings Per Share': financial_info.get('trailingEps', 'N/A'),
    }
    
    #return the collected data as a dictionary
    return metrics


In [6]:
#function to get historical stock price
def get_historical_prices(tickers):

    all_data = {}
    
    #loop over ticker and get table with historical data from past 2 years
    for ticker in tickers:
        stock = yf.Ticker(ticker)
        #only get closing price
        historical_data = stock.history(period="2y")['Close']
        all_data[ticker] = historical_data
    
    #return dictionary where key=ticker and value=df
    return all_data

In [3]:
#Create ticker list 
russell1000_tickers = get_russell_1000()
#remove ticker with . as that does not work in yfinance
russell1000_tickers = [ticker for ticker in russell1000_tickers if '.' not in ticker]
#remove other tickers
russell1000_tickers.remove('LSXMA') #no price historical data
russell1000_tickers.remove('LSXMK') #no price historical data

In [103]:
#Create financial info df
financial_data_list = []

#loop over ticker, get data
for ticker in russell1000_tickers:
    data = get_stock_financial_data(ticker)
    financial_data_list.append(data)

russell1000_financial_data = pd.DataFrame(financial_data_list)

In [7]:
#Create stock price info df
historical_prices = get_historical_prices(russell1000_tickers)


In [13]:
#Combine all historical prices into df
all_historical_prices = pd.DataFrame()

#loop over dictionray 
for ticker, series in historical_prices.items():
        #convert each series to a DataFrame
        stock_df = pd.DataFrame(series)
        stock_df.columns = [ticker]  # Rename the column to the ticker symbol

        #merge this DataFrame with the existing merged DataFrame on the index (date)
        if all_historical_prices.empty:
            all_historical_prices = stock_df  # First DataFrame, just set it as merged_df
        else:
            all_historical_prices = all_historical_prices.merge(stock_df, how='outer', left_index=True, right_index=True)

#create column for date
all_historical_prices= all_historical_prices.reset_index()

In [122]:
#save the data into a csv file
csv_filename_1 = 'russell_1000_financial_information'
russell1000_financial_data.to_csv(csv_filename_1, index=False)

csv_filename_2 = 'russell_1000_historical_price'
all_historical_prices.to_csv(csv_filename_2, index=False)