Import Packages

In [1]:
import pandas as pd
import yfinance as yf
import logging 



pd.set_option('display.float_format', '{:.2f}'.format)

# Configure the 'yfinance' logger to suppress output
logger = logging.getLogger('yfinance')
logger.disabled = True
# Optional: prevent propagation to the root logger as well
logger.propagate = False 

In [2]:
## OUTLINE:
## 1. Value vs. Growth - x=P/E Ratio, y=TTM, r=MarketCap
## 2. Risk-Return - x=beta, y=1yr total return, r=MarketCap
## 3. Profitability vs. Efficiency - x=ROE, y=Net Margin, r=MarketCap
## 4. Quality vs. Valuation - x=P/B Ratio, y=ROA, r=MarketCap
## 5. Leverage vs coverage - x=Debt/EBITDA, y=interest coverage ratio, r=MarketCap
## 6. Momentum vs.Value - 6-Month Price Return (%)

yfinance API - s&p 500

In [3]:
def process_tickers(url):
    df = pd.read_csv(url)
    tickers = df['Symbol'].tolist()
    return tickers

In [4]:
def top_20_market_cap(tickers):
    market_cap_list = []
    ticker_list = []
    for i in tickers:
        ticker = yf.Ticker(i)
        try: 
            market_cap = ticker.info.get('marketCap')
            market_cap_list.append(market_cap)
            ticker_list.append(i)
            if not market_cap or market_cap <= 0:
                shares = ticker.info.get('sharesOutstanding')
                history = ticker.history(period='1d')
                price = history['Close'].iloc[-1]
                market_cap_calc = price * shares
                market_cap_list.append(market_cap_calc)
                ticker_list.append(i)
        except:
            pass
        
    df = pd.DataFrame(data={'ticker': ticker_list, 'marketCap':market_cap_list}).sort_values(by='marketCap', ascending=False).reset_index(drop=True)

    top_20_market_cap = df[df['ticker'] != 'GOOG'][:20].reset_index(drop=True)

    return top_20_market_cap



In [5]:
def json_data(df):
    json_output = []

    for index, row in df.iterrows():
        ticker = yf.Ticker(row.ticker)

        history = ticker.history(period='1y')

        # Calculate returns
        if len(history) >= 251:  # 1 year of trading days
            one_year_return = ((history['Close'].iloc[-1] / history['Close'].iloc[0]) - 1) * 100
        else:
            one_year_return = None
                
        if len(history) >= 124:  # ~6 months
            six_month_return = ((history['Close'].iloc[-1] / history['Close'].iloc[-126]) - 1) * 100
        else:
            six_month_return = None

        structure =  {
            #core information
            'ticker': row.ticker,
            'sector': ticker.info.get('sector'),
            'company_name': ticker.info.get('longName', ticker),
            'industry': ticker.info.get('industry'),

            #market cap
            'market_cap': row.marketCap,

            # Valuation
            'pe_ratio': ticker.info.get('trailingPE'),
            'forward_pe_ratio':ticker.info.get('forwardPE'),
            'pb_ratio':ticker.info.get('priceToBook'),
            'ps_ratio': ticker.info.get('priceToSalesTrailing12Months'),
            'ev_to_revenue': ticker.info.get('enterpriseToRevenue'),
            'ev_to_ebitda': ticker.info.get('enterpriseToEbitda'),

            #Growth
            'rev_growth': ticker.info.get('revenueGrowth', 0) * 100 if ticker.info.get('revenueGrowth') else None,  # Convert to %
            'earnings_growth': ticker.info.get('earningsGrowth', 0) * 100 if ticker.info.get('earningsGrowth') else None,

            # Profitability
            'roe': ticker.info.get('returnOnEquity', 0) * 100 if ticker.info.get('returnOnEquity') else None,
            'roa': ticker.info.get('returnOnAssets', 0) * 100 if ticker.info.get('returnOnAssets') else None,
            'operating_margin': ticker.info.get('operatingMargins', 0) * 100 if ticker.info.get('operatingMargins') else None,
            'net_margin': ticker. info.get('profitMargins', 0) * 100 if ticker.info.get('profitMargins') else None,
            'profit_margin': ticker.info.get('profitMargins', 0) * 100 if ticker.info.get('profitMargins') else None,
                    
            # Risk
        'beta':ticker.info.get('beta'),
                    
            # Returns (calculated)
            'one_year_return': one_year_return.astype(float),
            'six_month_return': six_month_return.astype(float),
                    
            # Leverage
            'debt_to_equity': ticker.info.get('debtToEquity'),
            'total_debt': ticker.info.get('totalDebt'),
            'total_Cash': ticker.info.get('totalCash'),
                    
            # Other
            'avg_vol': ticker.info.get('averageVolume')
        }
        json_output.append(structure)

    return json_output



In [6]:
def json_constructor(url):
    #scrape s&P 500 ticker list
    tickers = process_tickers(url)
    #baseline df for additional columns
    top_20_market_cap_df = top_20_market_cap(tickers)
    #construct json format
    json_data_output = json_data(top_20_market_cap_df)

    return json_data_output

    
    

In [7]:
# DataSets repo - updated regularly
url = "https://raw.githubusercontent.com/datasets/s-and-p-500-companies/main/data/constituents.csv"

In [8]:
stock_data = json_constructor(url)

In [25]:
sector = []
for i in stock_data:
    sector.append(i['sector'])

df = pd.DataFrame({'sector':sector})
df['sector'].unique()

array(['Technology', 'Communication Services', 'Consumer Cyclical',
       'Healthcare', 'Consumer Defensive', 'Financial Services', 'Energy'],
      dtype=object)