# Stock Valuation

This project aims to try and determine whether a stock price is fairly valued, undervalued or overvalued by making use of a variety of metrics. At present this is still a work in progress.

In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import yfinance as yf
import numpy as np
import requests

## Company Information

This code is a template for use later on when more company information is required to assess the value of a stock.

In [2]:
# Example stock symbol
stock_symbol = 'AAPL'

# Create a Ticker object
ticker = yf.Ticker(stock_symbol)

# Get stock information
info = ticker.info

# Access specific metrics
pe_ratio = info.get('trailingPE', None)  # P/E ratio
forward_pe = info.get('forwardPE', None)  # Forward P/E ratio
peg_ratio = info.get('pegRatio', None)  # PEG ratio
price_to_book = info.get('priceToBook', None)  # Price/Book ratio
enterprise_to_revenue = info.get('enterpriseToRevenue', None)  # Enterprise Value/Revenue
enterprise_to_ebitda = info.get('enterpriseToEbitda', None)  # Enterprise Value/EBITDA

# Cash flow data is not directly available through the `info` attribute. 
# You need to use `cashflow` for the cash flow statement:
cash_flow_statement = ticker.cashflow

# Print the metrics
print(f"Stock: {stock_symbol}")
print(f"P/E Ratio: {pe_ratio}")
print(f"Forward P/E Ratio: {forward_pe}")
print(f"PEG Ratio: {peg_ratio}")
print(f"Price to Book Ratio: {price_to_book}")
print(f"Enterprise to Revenue: {enterprise_to_revenue}")
print(f"Enterprise to EBITDA: {enterprise_to_ebitda}")
print("\nCash Flow Statement (Annual):")
print(cash_flow_statement)

# Note: The cash flow statement provided here is annual by default. 
# You can get quarterly data by passing `quarterly=True` to `cashflow`.

Stock: AAPL
P/E Ratio: 26.898754
Forward P/E Ratio: 24.186275
PEG Ratio: 2.45
Price to Book Ratio: 36.029625
Enterprise to Revenue: 7.004
Enterprise to EBITDA: 20.764

Cash Flow Statement (Annual):
                                                    2023-09-30  \
Free Cash Flow                                   99584000000.0   
Repurchase Of Capital Stock                     -77550000000.0   
Repayment Of Debt                               -11151000000.0   
Issuance Of Debt                                  5228000000.0   
Issuance Of Capital Stock                                  NaN   
Capital Expenditure                             -10959000000.0   
Interest Paid Supplemental Data                   3803000000.0   
Income Tax Paid Supplemental Data                18679000000.0   
End Cash Position                                30737000000.0   
Beginning Cash Position                          24977000000.0   
Changes In Cash                                   5760000000.0   
Financing 

***

# Value Analysis

### Information Technology Sector

In [3]:
# Defining some preliminary functions

def save_dataframe_to_csv(df, filename):
    '''Function to save stock data to csv (to prevent exceeding rate limit)'''
    try:
        df.to_csv(filename, index=False)  # Set index=False if you do not want to save the index as a separate column
        print(f"DataFrame is successfully saved to {filename}")
    except Exception as e:
        print(f"Error saving DataFrame to CSV: {e}")


def get_stock_sector(symbol):
    '''Fetches sector information for a given stock symbol using yfinance.'''
    stock = yf.Ticker(symbol)
    info = stock.info  # Fetch stock info
    
    # Extract the sector from the info dictionary
    sector = info.get('sector', 'Sector information not available')
    return sector


In [4]:
# Accessing and saving US Stock data 
# Use commented code to initially save the data to a csv


API_KEY = 'BOI2WXTWZ4CFZP8Q'
URL = "https://www.alphavantage.co/query?function=LISTING_STATUS&apikey=" + API_KEY

'''
# Make the API call
response = requests.get(URL)

# Check if the request was successful
if response.status_code == 200:
    # Assuming the response is a CSV, convert it into a DataFrame
    from io import StringIO
    data = StringIO(response.text)
    df = pd.read_csv(data)
    
    # Filter for active US stocks
    us_stocks_df = df[df['exchange'].isin(['NYSE', 'NASDAQ', 'AMEX']) & (df['status'] == 'Active')]
    save_dataframe_to_csv(us_stocks_df, 'us_equities.csv')
else:
    print("Failed to fetch data: ", response.status_code)

us_stocks_df
'''

# Assuming the dataframe of US stocks is already saved as a csv
df = pd.read_csv('us_equities.csv')
df

Unnamed: 0,symbol,name,exchange,assetType,ipoDate,delistingDate,status
0,A,Agilent Technologies Inc,NYSE,Stock,1999-11-18,,Active
1,AA,Alcoa Corp,NYSE,Stock,2016-10-18,,Active
2,AACG,ATA Creativity Global,NASDAQ,Stock,2008-01-29,,Active
3,AACI,Armada Acquisition Corp I,NASDAQ,Stock,2021-11-10,,Active
4,AACIU,Armada Acquisition Corp I - Units (1 Ord & 1/2...,NASDAQ,Stock,2021-08-13,,Active
...,...,...,...,...,...,...,...
8374,ZXZZT,NASDAQ TEST STOCK,NASDAQ,Stock,2006-07-10,,Active
8375,ZYME,Zymeworks BC Inc,NASDAQ,Stock,2017-04-28,,Active
8376,ZYNE,Zynerba Pharmaceuticals Inc,NASDAQ,Stock,2015-08-05,,Active
8377,ZYRX,Global Earnings Capital Ltd,NASDAQ,Stock,2007-07-13,,Active


In [5]:
# Using only Stocks (not ETFs)
us_stocks_df = df[df['assetType'] == 'Stock'].reset_index(drop=True)
us_stocks_df

Unnamed: 0,symbol,name,exchange,assetType,ipoDate,delistingDate,status
0,A,Agilent Technologies Inc,NYSE,Stock,1999-11-18,,Active
1,AA,Alcoa Corp,NYSE,Stock,2016-10-18,,Active
2,AACG,ATA Creativity Global,NASDAQ,Stock,2008-01-29,,Active
3,AACI,Armada Acquisition Corp I,NASDAQ,Stock,2021-11-10,,Active
4,AACIU,Armada Acquisition Corp I - Units (1 Ord & 1/2...,NASDAQ,Stock,2021-08-13,,Active
...,...,...,...,...,...,...,...
7349,ZXZZT,NASDAQ TEST STOCK,NASDAQ,Stock,2006-07-10,,Active
7350,ZYME,Zymeworks BC Inc,NASDAQ,Stock,2017-04-28,,Active
7351,ZYNE,Zynerba Pharmaceuticals Inc,NASDAQ,Stock,2015-08-05,,Active
7352,ZYRX,Global Earnings Capital Ltd,NASDAQ,Stock,2007-07-13,,Active


In [6]:
df_cleaned = us_stocks_df.dropna(subset=['name'])
df_unique = df_cleaned.drop_duplicates(subset='name', keep='first')

df_unique

Unnamed: 0,symbol,name,exchange,assetType,ipoDate,delistingDate,status
0,A,Agilent Technologies Inc,NYSE,Stock,1999-11-18,,Active
1,AA,Alcoa Corp,NYSE,Stock,2016-10-18,,Active
2,AACG,ATA Creativity Global,NASDAQ,Stock,2008-01-29,,Active
3,AACI,Armada Acquisition Corp I,NASDAQ,Stock,2021-11-10,,Active
4,AACIU,Armada Acquisition Corp I - Units (1 Ord & 1/2...,NASDAQ,Stock,2021-08-13,,Active
...,...,...,...,...,...,...,...
7348,ZXYZ-A,NASDAQ SYMBOLOGY TEST,NASDAQ,Stock,2016-01-19,,Active
7350,ZYME,Zymeworks BC Inc,NASDAQ,Stock,2017-04-28,,Active
7351,ZYNE,Zynerba Pharmaceuticals Inc,NASDAQ,Stock,2015-08-05,,Active
7352,ZYRX,Global Earnings Capital Ltd,NASDAQ,Stock,2007-07-13,,Active


In [7]:
'''
us_stocks_dict = {}
for number in range(len(us_stocks_df)):
    ticker = us_stocks_df['symbol'][number]
    us_stocks_dict[ticker] = get_stock_sector(ticker)

us_stocks_dict
'''

"\nus_stocks_dict = {}\nfor number in range(len(us_stocks_df)):\n    ticker = us_stocks_df['symbol'][number]\n    us_stocks_dict[ticker] = get_stock_sector(ticker)\n\nus_stocks_dict\n"

In [None]:
def fetch_metrics(tickers): # takes a list of tickers
    unique_tickers = list(set(tickers))  # Ensure tickers are unique
    
    metrics = {
        'Market Cap': [],
        'Trailing P/E': [],
        'P/B Ratio' : [],
        'Trailing EPS': [],
        'Forward P/E': [],
        'Forward EPS': [],
        'Dividend Yield %': [],
        'Beta': [],
        #'PayoutRatio': [],
        #'OperatingMargins': [],
        #'RevenueGrowth': [],
        #'ProfitMargins': [],
        #'ROA': [],
        #'ROE': []
    }
    for ticker in tickers:
        info = yf.Ticker(ticker).info
        metrics['Market Cap'].append(info.get('marketCap', np.nan))
        metrics['Trailing P/E'].append(info.get('trailingPE', np.nan))
        metrics['P/B Ratio'].append(info.get('priceToBook', np.nan))
        metrics['Trailing EPS'].append(info.get('trailingEps', np.nan))
        metrics['Forward P/E'].append(info.get('forwardPE', np.nan))
        metrics['Forward EPS'].append(info.get('forwardEps', np.nan))
        metrics['Dividend Yield %'].append(info.get('dividenYield', np.nan)*100)
        metrics['Beta'].append(info.get('beta', np.nan))
        #metrics['PayoutRatio'].append(info.get('payoutRatio', np.nan))
        #metrics['OperatingMargins'].append(info.get('operatingMargins', np.nan))
        #metrics['RevenueGrowth'].append(info.get('revenueGrowth', np.nan))
        #metrics['ProfitMargins'].append(info.get('profitMargins', np.nan))
        #metrics['ROA'].append(info.get('returnOnAssets', np.nan))
        #metrics['ROE'].append(info.get('returnOnEquity', np.nan))
    return pd.DataFrame(metrics)


def graham_valuation(stock_metrics):
    """Determines if a stock meets Benjamin Graham's criteria for investment."""
    valuation_status = {}
    for index, row in stock_metrics.iterrows():
        meets_criteria = True
        # Check Graham's criteria
        if row['Trailing P/E'] >= 15 or row['Trailing P/E'] * row['P/B Ratio'] >= 22.5:
            meets_criteria = False
        #if row['Current_Ratio'] < 2 or row['Debt_to_Equity'] >= 1:
        #    meets_criteria = False
        # EPS Growth and Dividend Yield can be considered based on availability and specific criteria
        
        valuation_status[row.name] = "Undervalued" if meets_criteria else "Not Undervalued"
    
    return valuation_status

def calculate_benchmarks(df):
    """Calculates benchmarks (median) for a DataFrame of financial metrics."""
    return df.median().to_dict()

def compare_stock_to_benchmarks(stock_metrics, sp500_benchmarks, sector_benchmarks):
    """Compares a single stock's metrics to S&P 500 and sector benchmarks."""
    comparison = {}
    for metric in stock_metrics.index:
        comparison[metric] = {
            'Stock': stock_metrics[metric],
            'S&P 500 Median': sp500_benchmarks.get(metric, np.nan),
            'Sector Median': sector_benchmarks.get(metric, np.nan),
        }
    return comparison

In [None]:
TICKERS = ['AAPL', 'NVDA', 'HASI']

fetch_metrics(TICKERS)
graham_valuation(TICKERS)

In [None]:
def sector_valuation_status(value, metric, sector_benchmarks):
    """Determines valuation status of a stock based on sector medians."""
    # Check if the metric is considered for sector comparison
    if metric in sector_benchmarks:
        median = sector_benchmarks[metric]
        if value < median * 0.8:
            return 'Undervalued'
        elif value > median * 1.2:
            return 'Overvalued'
        else:
            return 'Fair Value'
    return 'N/A'

In [None]:
def graham_valuation_status(stock_metrics):
    """Evaluates if a stock meets Benjamin Graham's investment criteria with transition ranges."""
    status = {}

    pe_ratio = stock_metrics.get('Trailing P/E', float('inf'))  # Use a high default value for missing
    pb_ratio = stock_metrics.get('P/B Ratio', float('inf'))

    # Evaluate P/E Ratio
    if pe_ratio < 15:
        status['Trailing P/E'] = 'Undervalued'
    elif 15 <= pe_ratio <= 20:
        status['Trailing P/E'] = 'Fair Value'
    else:
        status['Trailing P/E'] = 'Overvalued'

    # Evaluate P/B Ratio
    if pb_ratio < 1.5:
        status['P/B Ratio'] = 'Undervalued'
    elif 1.5 <= pb_ratio <= 2:
        status['P/B Ratio'] = 'Fair Value'
    else:
        status['P/B Ratio'] = 'Overvalued'

    # Combined P/E * P/B criterion, introducing a range for transition
    combined_value = pe_ratio * pb_ratio
    if combined_value < 22.5:
        status['Combined P/E x P/B'] = 'Undervalued'
    elif 22.5 <= combined_value <= 27:
        status['Combined P/E x P/B'] = 'Fair Value'
    else:
        status['Combined P/E x P/B'] = 'Overvalued'

    return status

In [None]:
def plot_table_colored(df):
    # Adjust the figure size here (width, height) to make the table bigger
    fig, ax = plt.subplots(figsize=(20, len(df) * 0.5))  # Adding +2 for a bit more space
    ax.axis('tight')
    ax.axis('off')
    
    # Use a larger font size for readability
    plt.rcParams.update({'font.size': 10})
    
    table = ax.table(cellText=df.values,
                     colLabels=df.columns,
                     cellLoc='center',
                     loc='center',
                     colWidths=[0.1 for _ in df.columns])  # Adjust column widths as necessary
    
    # Apply coloring based on valuation status
    for i, col in enumerate(df.columns):
        for j, _ in enumerate(df.index):
            cell_text = table[(j+1, i)].get_text().get_text()
            if "Graham Status" in col:
                # Use one color scheme for Graham-based valuation
                if "Undervalued" in cell_text:
                    table[(j+1, i)].set_facecolor('green')  # Light blue
                elif "Overvalued" in cell_text:
                    table[(j+1, i)].set_facecolor('red')  # Light coral
                elif "Fair Value" in cell_text:
                    table[(j+1, i)].set_facecolor('orange')  # Light green
            elif "Sector Status" in col:
                # Use a different color scheme for sector-based valuation
                if "Undervalued" in cell_text:
                    table[(j+1, i)].set_facecolor('#006400')  # Dark green
                    table[(j+1, i)].set_text_props(color='w')
                elif "Overvalued" in cell_text:
                    table[(j+1, i)].set_facecolor('#8b0000')  # Dark red
                    table[(j+1, i)].set_text_props(color='w')
                elif "Fair Value" in cell_text:
                    table[(j+1, i)].set_facecolor('#808080')  # Grey
                    table[(j+1, i)].set_text_props(color='w')
            else:
                # Neutral color for non-valued cells
                table[(j+1, i)].set_facecolor('#f5f5f5')
    
    plt.tight_layout()
    plt.show()

In [None]:
# Extract unique sectors from the 'GICS Sector' column
unique_sectors = sp500_df['GICS Sector'].unique()

# Convert the array of unique sectors to a list
sector_list = unique_sectors.tolist()

# Print the list of sectors
print(sector_list)

In [None]:
def process_sector(sector_name, sp500_df):
    print(f"Processing sector: {sector_name}")

    # Filter companies by sector
    sector_companies = sp500_df[sp500_df['GICS Sector'] == sector_name]['Symbol'].tolist()
    
    # Fetch metrics for sector companies
    sector_data = fetch_metrics(sector_companies)
    
    # Calculate sector benchmarks (medians)
    sector_benchmarks = calculate_benchmarks(sector_data)  # Ensure this is correctly calculated here
    
    results = []
    for symbol in sector_companies:
        metrics = fetch_metrics([symbol]).iloc[0]
        
        # Graham Valuation Status
        graham_status = graham_valuation_status(metrics.to_dict())
        
        # Sector-Specific Valuation Status
        sector_status = {metric: sector_valuation_status(metrics[metric], metric, sector_benchmarks) for metric in metrics.index}
        
        row = {
            'Ticker': symbol,
            **metrics.to_dict(),
            **{f'{metric} Graham Status': status for metric, status in graham_status.items()},
            **{f'{metric} Sector Status': status for metric, status in sector_status.items()}
        }
        results.append(row)

    df_results = pd.DataFrame(results)
    return df_results

In [None]:
def plot_sector_valuation_table(df, sector_name):
    print(f"Valuation Table for {sector_name} Sector")
    columns_to_display = ['Ticker'] + [col for col in df.columns if 'Status' in col]
    plot_table_colored(df[columns_to_display])

In [None]:
sector_results_dict = {}

for sector in unique_sectors:
    df_sector_results = process_sector(sector, sp500_df)
    sector_results_dict[sector] = df_sector_results

In [None]:
print(sector_results_dict["Consumer Discretionary"])

In [None]:
undervalued_stocks_by_sector = {}

for sector in unique_sectors:
    # Access the pre-processed DataFrame for each sector from sector_results_dict
    df_sector_results = sector_results_dict.get(sector, pd.DataFrame())

    if not df_sector_results.empty:
        # Optionally, plot the sector valuation table
        # plot_sector_valuation_table(df_sector_results, sector)

        # Extract tickers of undervalued stocks based on the "Combined P/E x P/B Graham Status"
        undervalued_stocks = df_sector_results[df_sector_results['Combined P/E x P/B Graham Status'] == 'Undervalued']['Ticker'].tolist()
        undervalued_stocks_by_sector[sector] = undervalued_stocks

print(undervalued_stocks_by_sector)

In [None]:
def plot_pe_ratios(df, sector_name):
    plt.figure(figsize=(14, 7))
    plt.title(f'P/E Ratios for {sector_name} Sector')
    plt.xlabel('Company Ticker')
    plt.ylabel('Trailing P/E Ratio')

    # Determine colors based on valuation status
    colors = df['Combined P/E x P/B Graham Status'].map({
        'Undervalued': 'green',
        'Fair Value': 'orange',
        'Overvalued': 'red',
        'N/A': 'gray'
    })

    # Plot each company's P/E ratio
    plt.scatter(df['Ticker'], df['Trailing P/E'], color=colors, alpha=0.7, s=100)

    # Optional: Add sector median P/E ratio line
    sector_median_pe = df['Trailing P/E'].median()
    plt.axhline(y=sector_median_pe, color='blue', linestyle='--', label=f'Sector Median P/E: {sector_median_pe:.2f}')

    plt.xticks(rotation=90)  # Rotate company tickers for better readability
    plt.legend()
    plt.tight_layout()
    plt.show()

In [None]:
for sector in unique_sectors:
    df_sector_results = sector_results_dict.get(sector, pd.DataFrame())
    plot_pe_ratios(df_sector_results, sector)

In [None]:
import numpy as np

def plot_pe_pb_with_marketcap(df):
    # Ensure Market Cap is in a suitable scale (e.g., billions) and convert to a size for the plot
    df['MarketCap_Billions'] = df['Market Cap'] / 1e9
    sizes = df['MarketCap_Billions'] * 10  # Scale market cap sizes for visibility

    # Create the scatter plot
    plt.figure(figsize=(14, 8))
    plt.scatter(df['P/B Ratio'], df['Trailing P/E'], s=sizes, alpha=0.5, cmap='viridis')
    
    # Loop through the DataFrame and add ticker symbols as text labels for each bubble
    for i, row in df.iterrows():
        # Check if both PB_Ratio and Trailing P/E are finite numbers
        if np.isfinite(row['P/B Ratio']) and np.isfinite(row['Trailing P/E']):
            plt.text(row['P/B Ratio'], row['Trailing P/E'], row['Ticker'], 
                     ha='center', va='center', fontsize=8, color='black', alpha=0.7)
    
    plt.title('P/B Ratio vs. Trailing P/E with Market Cap Size Indicator')
    plt.xlabel('P/B Ratio')
    plt.ylabel('Trailing P/E Ratio')
    plt.xscale('log')  # Using log scale for P/B Ratio if wide range values are expected
    plt.yscale('log')  # Using log scale for P/E Ratio to handle wide range of values and outliers

    plt.grid(True, which="both", ls="--", linewidth=0.5)
    plt.tight_layout()
    plt.show()

In [None]:
'''
******
Use this if you want to plot all the data in one figure, but it gets quite messy
******

# Initialize an empty DataFrame
all_sectors_df = pd.DataFrame()

for sector, df in sector_results_dict.items():
    # Optionally, add a sector column to each sector's DataFrame before concatenation
    df['Sector'] = sector
    # Concatenate the current sector's DataFrame to the comprehensive DataFrame
    all_sectors_df = pd.concat([all_sectors_df, df], ignore_index=True)
'''

In [None]:
for sector in unique_sectors:
    plot_pe_pb_marketcap(sector_results_dict[sector])

In [None]:
us_stocks_df