Import Packages

In [23]:
import pandas as pd
import yfinance as yf
import logging 

pd.set_option('display.float_format', '{:.2f}'.format)

# Configure the 'yfinance' logger to suppress output
logger = logging.getLogger('yfinance')
logger.disabled = True
# Optional: prevent propagation to the root logger as well
logger.propagate = False 

In [24]:
## OUTLINE:
## 1. Value vs. Growth - x=P/E Ratio, y=TTM, r=MarketCap
## 2. Risk-Return - x=beta, y=1yr total return, r=MarketCap
## 3. Profitability vs. Efficiency - x=ROE, y=Net Margin, r=MarketCap
## 4. Quality vs. Valuation - x=P/B Ratio, y=ROA, r=MarketCap
## 5. Leverage vs coverage - x=Debt/EBITDA, y=interest coverage ratio, r=MarketCap
## 6. Momentum vs.Value - 6-Month Price Return (%)

yfinance API - s&p 500

In [25]:
def process_tickers(url):
    df = pd.read_csv(url)
    tickers = df['Symbol'].tolist()
    return tickers

In [26]:
def top_20_market_cap(tickers):
    # Batch download 1-day price data for all tickers - MUCH more efficient
    print("Downloading price data for all tickers...")
    price_data = yf.download(tickers, period='1d', progress=False, group_by='ticker')
    
    market_cap_list = []
    ticker_list = []
    
    for i in tickers:
        try:
            # Get the latest close price from the batch download
            if len(tickers) == 1:
                close_price = price_data['Close'].iloc[-1]
            else:
                close_price = price_data[i]['Close'].iloc[-1]
            
            # Only make individual API call to get shares outstanding
            ticker = yf.Ticker(i)
            shares = ticker.info.get('sharesOutstanding')
            
            if shares and close_price:
                market_cap = close_price * shares
                market_cap_list.append(market_cap)
                ticker_list.append(i)
        except:
            pass
        
    df = pd.DataFrame(data={'ticker': ticker_list, 'marketCap':market_cap_list}).sort_values(by='marketCap', ascending=False).reset_index(drop=True)

    top_20_market_cap = df[df['ticker'] != 'GOOG'][:20].reset_index(drop=True)

    return top_20_market_cap



In [27]:
def json_data(df):
    # Batch download 1-year price history for top 20 tickers only
    print("Downloading 1-year history for top 20 tickers...")
    top_tickers = df['ticker'].tolist()
    history_data = yf.download(top_tickers, period='1y', progress=False, group_by='ticker')
    
    json_output = []

    for index, row in df.iterrows():
        ticker = yf.Ticker(row.ticker)

        # Get history from the batch download instead of individual API call
        if len(top_tickers) == 1:
            history = history_data
        else:
            history = history_data[row.ticker]

        history = ticker.history(period='1y')

        # Calculate returns - 1 year and 6 months
        # Use more lenient thresholds - yfinance doesn't always return exactly 252 days
        if len(history) >= 200:  # At least ~8 months of data for "1 year" return
            one_year_return = float(((history['Close'].iloc[-1] / history['Close'].iloc[0]) - 1) * 100)
        else:
            one_year_return = None
                
        if len(history) >= 100:  # At least ~4 months of data for "6 month" return
            # Use the midpoint or as far back as we can go
            lookback_index = min(126, len(history) - 1)
            six_month_return = float(((history['Close'].iloc[-1] / history['Close'].iloc[-lookback_index]) - 1) * 100)
        else:
            six_month_return = None

        # Calculate Interest Coverage Ratio
        income_statement = ticker.income_stmt
        if 'EBIT' in income_statement.index and 'Interest Expense' in income_statement.index:
            ebit = income_statement.loc['EBIT'].iloc[0]
            interest_expense = abs(income_statement.loc['Interest Expense'].iloc[0])  # Usually negative
            
            if interest_expense > 0:
                    interest_coverage = float(ebit / interest_expense)
            else:
                interest_coverage = None
        else:
                interest_coverage = None

        structure =  {
            #core information
            'ticker': row.ticker,
            'sector': ticker.info.get('sector'),
            'company_name': ticker.info.get('longName', ticker),
            'industry': ticker.info.get('industry'),

            #market cap
            'market_cap': row.marketCap,

            # Valuation
            'pe_ratio': ticker.info.get('trailingPE'),
            'forward_pe_ratio':ticker.info.get('forwardPE'),
            'pb_ratio':ticker.info.get('priceToBook'),
            'ps_ratio': ticker.info.get('priceToSalesTrailing12Months'),
            'ev_to_revenue': ticker.info.get('enterpriseToRevenue'),
            'ev_to_ebitda': ticker.info.get('enterpriseToEbitda'),

            #Growth
            'rev_growth': ticker.info.get('revenueGrowth', 0) * 100 if ticker.info.get('revenueGrowth') else None,  # Convert to %
            'earnings_growth': ticker.info.get('earningsGrowth', 0) * 100 if ticker.info.get('earningsGrowth') else None,

            # Profitability
            'roe': ticker.info.get('returnOnEquity', 0) * 100 if ticker.info.get('returnOnEquity') else None,
            'roa': ticker.info.get('returnOnAssets', 0) * 100 if ticker.info.get('returnOnAssets') else None,
            'operating_margin': ticker.info.get('operatingMargins', 0) * 100 if ticker.info.get('operatingMargins') else None,
            'net_margin': ticker. info.get('profitMargins', 0) * 100 if ticker.info.get('profitMargins') else None,
            'profit_margin': ticker.info.get('profitMargins', 0) * 100 if ticker.info.get('profitMargins') else None,
                    
            # Risk
            'beta':ticker.info.get('beta'),
                    
            # Returns (calculated)
            'one_year_return': one_year_return,
            'six_month_return': six_month_return,
                    
            # Leverage
            'debt_to_equity': ticker.info.get('debtToEquity'),
            'total_debt': ticker.info.get('totalDebt'),
            'total_cash': ticker.info.get('totalCash'),

            # Coverage
            'interest_coverage': interest_coverage,
                    
            # Other
            'avg_vol': ticker.info.get('averageVolume')
        }
        json_output.append(structure)

    return json_output



In [28]:
def json_constructor(url):
    #scrape s&P 500 ticker list
    tickers = process_tickers(url)
    #baseline df for additional columns
    top_20_market_cap_df = top_20_market_cap(tickers)
    #construct json format
    json_data_output = json_data(top_20_market_cap_df)

    return json_data_output

    
    

In [29]:
# DataSets repo - updated regularly
url = "https://raw.githubusercontent.com/datasets/s-and-p-500-companies/main/data/constituents.csv"

In [30]:
tickers = process_tickers(url)

In [31]:
top_20_market_cap_df = top_20_market_cap(tickers)

Downloading price data for all tickers...


In [32]:
json_data_output = json_data(top_20_market_cap_df)

Downloading 1-year history for top 20 tickers...


YFRateLimitError: Too Many Requests. Rate limited. Try after a while.

In [None]:
json_data_output

[{'ticker': 'NVDA',
  'sector': 'Technology',
  'company_name': 'NVIDIA Corporation',
  'industry': 'Semiconductors',
  'market_cap': 4554106601472.0,
  'pe_ratio': 46.185184,
  'forward_pe_ratio': 24.61926,
  'pb_ratio': 38.235893,
  'ps_ratio': 24.335033,
  'ev_to_revenue': 23.514,
  'ev_to_ebitda': 39.048,
  'rev_growth': 62.5,
  'earnings_growth': 66.7,
  'roe': 107.35900000000001,
  'roa': 53.528,
  'operating_margin': 63.168997000000005,
  'net_margin': 53.007000000000005,
  'profit_margin': 53.007000000000005,
  'beta': 2.314,
  'one_year_return': np.float64(40.0772346807599),
  'six_month_return': np.float64(8.503543523002177),
  'debt_to_equity': 9.102,
  'total_debt': 10821999616,
  'total_cash': 60608000000,
  'interest_coverage': np.float64(341.1862348178138),
  'avg_vol': 182945991},
 {'ticker': 'GOOGL',
  'sector': 'Communication Services',
  'company_name': 'Alphabet Inc.',
  'industry': 'Internet Content & Information',
  'market_cap': 4030671880192.0,
  'pe_ratio': 32.

In [None]:
stock_data = json_constructor(url)

YFRateLimitError: Too Many Requests. Rate limited. Try after a while.

In [None]:
stock_data