# Personal FDA Project

In [1]:
# Data Handling
import pandas as pd
import numpy as np

# Data Collection
import yfinance as yf
import wrds

# Visualization
import matplotlib.pyplot as plt
import plotly.express as px

# Valuation Models
from scipy.optimize import minimize

# User Interface (Optional)
import streamlit as st

# Additional Tools
import os
import warnings
warnings.filterwarnings('ignore')
from fuzzywuzzy import fuzz, process

In [2]:
# List of 10 stock tickers
tickers = ["AAPL", "MSFT", "KO", "AMZN", "TSLA", "NVDA", "META", "JNJ", "PG", "DIS"]

### 1. WRDS Data Collection

In [3]:
# Connect to WRDS
db = wrds.Connection()

query = f"""
    SELECT a.gvkey, a.tic, a.datadate, a.at, a.lt, a.ebit, a.revt, a.ni, 
           a.oibdp, a.capx, a.dvpsp_f, a.csho, a.prcc_f, a.che, a.naicsh
    FROM comp.funda AS a
    WHERE a.indfmt = 'INDL'
      AND a.datafmt = 'STD'
      AND a.popsrc = 'D'
      AND a.consol = 'C'
      AND a.datadate >= '2018-01-01'
      AND a.tic IN ({', '.join([f"'{ticker}'" for ticker in tickers])})
"""
compustat_data = db.raw_sql(query)

# Preprocess WRDS data
compustat_data['tic'] = compustat_data['tic'].str.strip()
compustat_data.to_csv("financial_data_10_companies.csv", index=False)

# Print a sample to confirm the data is correct
print("Sample Data from WRDS:")
print(compustat_data.head())


WRDS recommends setting up a .pgpass file.
Created .pgpass file successfully.
You can create this file yourself at any time with the create_pgpass_file() function.
Loading library list...
Done
Sample Data from WRDS:
    gvkey   tic    datadate        at        lt      ebit      revt       ni  \
0  001690  AAPL  2018-09-30  365725.0  258578.0   70662.0  265359.0  59531.0   
1  001690  AAPL  2019-09-30  338516.0  248028.0   63930.0  260174.0  55256.0   
2  001690  AAPL  2020-09-30  323888.0  258549.0   66288.0  274515.0  57411.0   
3  001690  AAPL  2021-09-30  351002.0  287912.0  108949.0  365817.0  94680.0   
4  001690  AAPL  2022-09-30  352755.0  302083.0  119437.0  394328.0  99803.0   

      oibdp     capx  dvpsp_f       csho  prcc_f       che  naicsh  
0   79962.0  13313.0    2.720   4754.986  225.74   66301.0  334220  
1   75230.0  10495.0    3.000   4443.236  223.97  100580.0  334220  
2   75988.0   7309.0    0.795  16976.763  115.81   90979.0  334220  
3  118449.0  11085.0    0.8

In [4]:
# Initialize dictionaries to store data for each ticker
historical_prices_data = {}
dividends_data = {}
financials_data = {}
balance_sheet_data = {}
cashflow_data = {}

# Iterate through each ticker in the list
for ticker in tickers:  # tickers should be a list of ticker symbols
    stock = yf.Ticker(ticker)
    
    try:
        # Fetch historical prices
        historical_prices = stock.history(period="5y")  # Last 5 years of stock prices
        if not historical_prices.empty:
            historical_prices_data[ticker] = historical_prices
            historical_prices.to_csv(f"{ticker}_historical_prices.csv", index=True)
        else:
            print(f"No historical prices found for {ticker}.")
    except Exception as e:
        print(f"Error fetching historical prices for {ticker}: {e}")

    try:
        # Fetch dividend history
        dividends = stock.dividends
        if not dividends.empty:
            dividends_data[ticker] = dividends
            dividends.to_csv(f"{ticker}_dividends.csv", index=True)
        else:
            print(f"No dividends data found for {ticker}.")
    except Exception as e:
        print(f"Error fetching dividends for {ticker}: {e}")

    try:
        # Fetch financials
        financials = stock.financials
        if not financials.empty:
            financials_data[ticker] = financials
            financials.to_csv(f"{ticker}_financials.csv", index=True)
        else:
            print(f"No financials data found for {ticker}.")
    except Exception as e:
        print(f"Error fetching financials for {ticker}: {e}")

    try:
        # Fetch balance sheet
        balance_sheet = stock.balance_sheet
        if not balance_sheet.empty:
            balance_sheet_data[ticker] = balance_sheet
            balance_sheet.to_csv(f"{ticker}_balance_sheet.csv", index=True)
        else:
            print(f"No balance sheet data found for {ticker}.")
    except Exception as e:
        print(f"Error fetching balance sheet for {ticker}: {e}")

    try:
        # Fetch cash flow statement
        cashflow = stock.cashflow
        if not cashflow.empty:
            cashflow_data[ticker] = cashflow
            cashflow.to_csv(f"{ticker}_cashflow.csv", index=True)
        else:
            print(f"No cash flow data found for {ticker}.")
    except Exception as e:
        print(f"Error fetching cash flow data for {ticker}: {e}")


No dividends data found for AMZN.
No dividends data found for TSLA.


### Validate and Match Tickers with Fuzzy Matching

In [5]:
# Match user-provided tickers to WRDS tickers
wrds_tickers = compustat_data['tic'].unique()
validated_tickers = [process.extractOne(ticker, wrds_tickers)[0] for ticker in tickers]

print("Validated Tickers (Fuzzy Matched):")
print(validated_tickers)

# Update WRDS data with matched tickers
compustat_data['best_match'] = compustat_data['tic'].apply(
    lambda x: process.extractOne(x, validated_tickers)[0] if pd.notnull(x) else None
)

Validated Tickers (Fuzzy Matched):
['AAPL', 'MSFT', 'KO', 'AMZN', 'TSLA', 'NVDA', 'META', 'JNJ', 'PG', 'DIS']


### Fetch Data from Yahoo Finance

In [6]:
# Fetch stock data from Yahoo Finance
historical_data = {}
dividends_data = {}

# Initialize dictionaries to store data
historical_data = {}
dividends_data = {}

# Fetch data for each validated ticker
for ticker in validated_tickers:
    print(f"Fetching data for {ticker}...")
    stock = yf.Ticker(ticker)
    
    try:
        # Fetch historical prices
        historical_prices = stock.history(period="5y")
        if not historical_prices.empty:
            historical_data[ticker] = historical_prices
            historical_prices.to_csv(f"{ticker}_historical_prices.csv", index=True)
            print(f"Historical prices saved for {ticker}.")
        else:
            print(f"No historical prices found for {ticker}.")
        
        # Fetch dividend data
        dividends = stock.dividends
        if not dividends.empty:
            dividends_data[ticker] = dividends
            dividends.to_csv(f"{ticker}_dividends.csv", index=True)
            print(f"Dividends data saved for {ticker}.")
        else:
            print(f"No dividend data found for {ticker}.")
    
    except Exception as e:
        print(f"Error fetching data for {ticker}: {e}")

Fetching data for AAPL...
Historical prices saved for AAPL.
Dividends data saved for AAPL.
Fetching data for MSFT...
Historical prices saved for MSFT.
Dividends data saved for MSFT.
Fetching data for KO...
Historical prices saved for KO.
Dividends data saved for KO.
Fetching data for AMZN...
Historical prices saved for AMZN.
No dividend data found for AMZN.
Fetching data for TSLA...
Historical prices saved for TSLA.
No dividend data found for TSLA.
Fetching data for NVDA...
Historical prices saved for NVDA.
Dividends data saved for NVDA.
Fetching data for META...
Historical prices saved for META.
Dividends data saved for META.
Fetching data for JNJ...
Historical prices saved for JNJ.
Dividends data saved for JNJ.
Fetching data for PG...
Historical prices saved for PG.
Dividends data saved for PG.
Fetching data for DIS...
Historical prices saved for DIS.
Dividends data saved for DIS.


### Process Data for Valuation Models

In [7]:
# Initialize DataFrames for combined results
fcf_data = pd.DataFrame()
valuation_data = pd.DataFrame()

# Process each company
for ticker in validated_tickers:
    # Filter data for this ticker
    company_data = compustat_data[compustat_data['tic'] == ticker]
    
    # Ensure required columns exist
    required_columns = ['oibdp', 'capx', 'csho', 'dvpsp_f', 'ni', 'at', 'lt']
    if not all(col in company_data.columns for col in required_columns):
        print(f"Missing required columns for {ticker}, skipping...")
        continue
    
    # Calculate Free Cash Flow (FCF)
    company_data['FCF'] = company_data['oibdp'] - company_data['capx']
    
    # Handle zero or missing values in `ni` and `oibdp` for valuation multiples
    company_data['P/E'] = (company_data['csho'] * company_data['dvpsp_f']) / company_data['ni'].replace(0, np.nan)
    company_data['EV/EBITDA'] = (company_data['at'] - company_data['lt']) / company_data['oibdp'].replace(0, np.nan)
    
    # Append to combined DataFrames
    fcf_data = pd.concat([fcf_data, company_data[['tic', 'datadate', 'FCF']].reset_index(drop=True)])
    valuation_data = pd.concat([valuation_data, company_data[['tic', 'datadate', 'P/E', 'EV/EBITDA']].reset_index(drop=True)])

# Save processed data
fcf_data.to_csv("fcf_data.csv", index=False)
valuation_data.to_csv("valuation_data.csv", index=False)

# Print confirmation
print("Data processing complete. FCF and valuation data saved.")


Data processing complete. FCF and valuation data saved.


### Process Dividend Growth Data

In [8]:
# Initialize DataFrame for dividend growth rates
dividend_growth_data = pd.DataFrame()

for ticker, dividends in dividends_data.items():
    if not dividends.empty:
        print(f"Processing dividends for {ticker}...")
        # Convert to DataFrame and calculate growth rate
        dividends_df = dividends.to_frame().reset_index()
        dividends_df.columns = ['Date', 'Dividends']  # Standardize column names
        dividends_df['Date'] = pd.to_datetime(dividends_df['Date'])  # Ensure datetime format
        if len(dividends_df) > 1:  # Only calculate growth rate if there's more than one record
            dividends_df['Growth Rate'] = dividends_df['Dividends'].pct_change()
        else:
            dividends_df['Growth Rate'] = None  # No growth rate for single record
        
        # Add ticker identifier
        dividends_df['Ticker'] = ticker
        
        # Append to combined DataFrame
        dividend_growth_data = pd.concat([dividend_growth_data, dividends_df.reset_index(drop=True)])
    else:
        print(f"No dividends data for {ticker}.")

# Save dividend growth data
dividend_growth_data.to_csv("dividend_growth_data.csv", index=False)

# Print confirmation
print("Dividend growth data processing complete. Saved to dividend_growth_data.csv.")


Processing dividends for AAPL...
Processing dividends for MSFT...
Processing dividends for KO...
Processing dividends for NVDA...
Processing dividends for META...
Processing dividends for JNJ...
Processing dividends for PG...
Processing dividends for DIS...
Dividend growth data processing complete. Saved to dividend_growth_data.csv.


### Generate Summary

In [9]:
# Summarize Free Cash Flow (FCF) Data
print("Summary of Free Cash Flow Data:")
print(fcf_data.describe())

# Summarize Valuation Multiples Data
print("\nSummary of Valuation Multiples Data:")
print(valuation_data.describe())

# Summarize Dividend Growth Data
if not dividend_growth_data.empty:
    print("\nSummary of Dividend Growth Data:")
    print(dividend_growth_data.describe())

# Key Insights
print("\nKey Insights from Processed Data:")

# Highest and Lowest FCF
highest_fcf = fcf_data.loc[fcf_data['FCF'].idxmax()]
lowest_fcf = fcf_data.loc[fcf_data['FCF'].idxmin()]
print(f"Highest FCF:\n{highest_fcf}\n")
print(f"Lowest FCF:\n{lowest_fcf}\n")

# Companies with Highest P/E Ratio
highest_pe = valuation_data.loc[valuation_data['P/E'].idxmax()]
print(f"Company with the Highest P/E Ratio:\n{highest_pe}\n")

# Companies with the Highest Dividend Growth Rate
if not dividend_growth_data.empty:
    highest_div_growth = dividend_growth_data.loc[dividend_growth_data['Growth Rate'].idxmax()]
    print(f"Company with the Highest Dividend Growth Rate:\n{highest_div_growth}\n")


Summary of Free Cash Flow Data:
                 FCF
count      65.000000
mean    26232.298923
std     31311.260693
min    -25210.000000
25%      9392.000000
50%     14043.000000
75%     33326.000000
max    121969.000000

Summary of Valuation Multiples Data:
             P/E  EV/EBITDA
count  65.000000  65.000000
mean    0.284872   2.893145
std     0.367336   2.113563
min    -0.556145   0.395452
25%     0.000000   1.875584
50%     0.158044   2.282145
75%     0.572033   3.002157
max     1.862334  11.656100

Summary of Dividend Growth Data:
        Dividends  Growth Rate
count  125.000000   117.000000
mean     0.555660     0.024592
std      0.374797     0.160186
min      0.004000    -0.659091
25%      0.230000     0.000000
50%      0.500000     0.000000
75%      0.880000     0.000000
max      1.240000     1.500000

Key Insights from Processed Data:
Highest FCF:
    tic    datadate       FCF
6  AAPL  2024-09-30  121969.0
6  MSFT  2024-06-30   84956.0
6  NVDA  2024-01-31   33411.0
6    PG 

## Step 1: Process Data

### Prepare Free Cash Flow Data for DCF

In [10]:
# Calculate Free Cash Flow (FCF) for each company
compustat_data['FCF'] = compustat_data['oibdp'] - compustat_data['capx']

# Ensure data is sorted chronologically
compustat_data['datadate'] = pd.to_datetime(compustat_data['datadate'])  # Convert to datetime
compustat_data = compustat_data.sort_values(by=['tic', 'datadate'])

# Group data by company and retain year-by-year FCF
fcf_data = compustat_data[['tic', 'datadate', 'FCF']]

# Save to CSV
fcf_data.to_csv("fcf_data_detailed.csv", index=False)

# Print a sample of the detailed FCF data
print("Detailed Free Cash Flow Data (Year-by-Year):")
print(fcf_data.head())


Detailed Free Cash Flow Data (Year-by-Year):
    tic   datadate       FCF
0  AAPL 2018-09-30   66649.0
1  AAPL 2019-09-30   64735.0
2  AAPL 2020-09-30   68679.0
3  AAPL 2021-09-30  107364.0
4  AAPL 2022-09-30  117429.0


### Prepare Valuation Multiples for CCA

In [11]:
# Ensure necessary columns exist
required_columns = ['csho', 'prcc_f', 'ni', 'at', 'lt', 'oibdp']
if not all(col in compustat_data.columns for col in required_columns):
    raise ValueError("One or more required columns are missing from the data.")

# Calculate Valuation Multiples
compustat_data['P/E'] = (compustat_data['csho'] * compustat_data['prcc_f']) / compustat_data['ni'].replace(0, np.nan)
compustat_data['EV/EBITDA'] = (compustat_data['at'] - compustat_data['lt']) / compustat_data['oibdp'].replace(0, np.nan)

# Filter relevant columns
valuation_data = compustat_data[['tic', 'datadate', 'P/E', 'EV/EBITDA']]

# Keep the most recent valuation multiples for each company
valuation_data = valuation_data.sort_values(by=['tic', 'datadate']).groupby('tic').last().reset_index()

# Save to CSV
valuation_data.to_csv("valuation_data.csv", index=False)

# Print sample data
print("Valuation Multiples Data:")
print(valuation_data.head())


Valuation Multiples Data:
    tic   datadate        P/E  EV/EBITDA
0  AAPL 2024-09-30  37.575863   0.433357
1  AMZN 2023-12-31  51.851866   2.975576
2   DIS 2024-09-30  35.055567   6.242428
3   JNJ 2023-12-31  10.732666   2.261261
4    KO 2023-12-31  23.695206   1.906348


### Dividend Growth Rate Processing

In [12]:
# Initialize DataFrame for dividend growth rates
dividend_growth_data = pd.DataFrame()

# Process dividends for each company
for ticker, dividends in dividends_data.items():
    if not dividends.empty:
        print(f"Processing dividend data for {ticker}...")
        # Convert dividend data to DataFrame
        dividends_df = dividends.to_frame().reset_index()
        dividends_df.columns = ['Date', 'Dividends']  # Rename columns for consistency
        dividends_df['Date'] = pd.to_datetime(dividends_df['Date'])  # Ensure proper datetime format
        dividends_df['Dividends'] = pd.to_numeric(dividends_df['Dividends'], errors='coerce')  # Ensure numeric
        
        if len(dividends_df) > 1:  # Ensure sufficient data for growth rate calculation
            # Calculate Dividend Growth Rate
            dividends_df['Growth Rate'] = dividends_df['Dividends'].pct_change()
        else:
            print(f"Not enough dividend data for {ticker} to calculate growth rate.")
            dividends_df['Growth Rate'] = None
        
        # Add ticker identifier
        dividends_df['Ticker'] = ticker
        
        # Append to combined DataFrame
        dividend_growth_data = pd.concat([dividend_growth_data, dividends_df.reset_index(drop=True)])
    else:
        print(f"No dividend data available for {ticker}.")

# Save Dividend Growth Data
dividend_growth_data.to_csv("dividend_growth_data.csv", index=False)

# Print sample data
print("Dividend Growth Data:")
print(dividend_growth_data.head())


Processing dividend data for AAPL...
Processing dividend data for MSFT...
Processing dividend data for KO...
Processing dividend data for NVDA...
Processing dividend data for META...
Processing dividend data for JNJ...
Processing dividend data for PG...
Processing dividend data for DIS...
Dividend Growth Data:
                       Date  Dividends  Growth Rate Ticker
0 2020-02-07 00:00:00-05:00     0.1925          NaN   AAPL
1 2020-05-08 00:00:00-04:00     0.2050     0.064935   AAPL
2 2020-08-07 00:00:00-04:00     0.2050     0.000000   AAPL
3 2020-11-06 00:00:00-05:00     0.2050     0.000000   AAPL
4 2021-02-05 00:00:00-05:00     0.2050     0.000000   AAPL


### DCF Code Implentation

In [13]:
# Load the detailed FCF data
fcf_data = pd.read_csv("fcf_data_detailed.csv")

# Initialize FCF Projections
fcf_projections = {}

# Projection parameters
projection_years = 5
default_growth_rate = 0.05  # Default growth rate if historical growth is unavailable

# Process each company
for ticker in fcf_data['tic'].unique():
    print(f"Processing FCF data for {ticker}...")
    
    # Extract historical FCF for the company
    company_fcf = fcf_data[fcf_data['tic'] == ticker].sort_values(by='datadate')
    
    if len(company_fcf) < 2 or company_fcf['FCF'].iloc[-1] <= 0:
        print(f"Not enough or invalid data for {ticker}. Using default growth rate.")
        historical_growth_rate = default_growth_rate
        last_fcf = max(company_fcf['FCF'].iloc[-1], 1)  # Ensure FCF is positive
    else:
        # Calculate historical growth rate
        company_fcf['Growth Rate'] = company_fcf['FCF'].pct_change()
        historical_growth_rate = company_fcf['Growth Rate'].mean()
        
        # Handle invalid growth rates
        if pd.isnull(historical_growth_rate) or historical_growth_rate <= 0:
            print(f"Invalid or negative growth rate for {ticker}. Using default.")
            historical_growth_rate = default_growth_rate
        
        last_fcf = company_fcf['FCF'].iloc[-1]
    
    # Project future FCF
    projected_fcf = [last_fcf * ((1 + historical_growth_rate) ** i) for i in range(1, projection_years + 1)]
    
    # Save projections
    fcf_projections[ticker] = {
        'Historical FCF': company_fcf['FCF'].values,
        'Historical Growth Rate': historical_growth_rate,
        'Projected FCF': projected_fcf
    }

print("FCF projections prepared.")

# Assumptions for DCF
WACC = 0.08  # Weighted Average Cost of Capital (8%)
terminal_growth_rate = 0.02  # Perpetual growth rate (2%)

# Initialize results
dcf_results = {}

# Process each company
for ticker, data in fcf_projections.items():
    print(f"Calculating DCF for {ticker}...")
    
    # Projected FCFs and growth rate
    projected_fcf = data['Projected FCF']
    
    # Discount projected FCFs
    discounted_fcf = [
        fcf / ((1 + WACC) ** i) for i, fcf in enumerate(projected_fcf, start=1)
    ]
    
    # Terminal Value
    last_fcf = projected_fcf[-1]
    terminal_value = (last_fcf * (1 + terminal_growth_rate)) / (WACC - terminal_growth_rate)
    discounted_tv = terminal_value / ((1 + WACC) ** len(projected_fcf))
    
    # Enterprise Value
    enterprise_value = sum(discounted_fcf) + discounted_tv
    
    # Adjust for net debt
    company_data = compustat_data[compustat_data['tic'] == ticker].iloc[-1]
    if pd.isnull(company_data['lt']) or pd.isnull(company_data['che']) or pd.isnull(company_data['csho']):
        print(f"Missing data for {ticker}. Skipping DCF.")
        continue
    
    net_debt = company_data['lt'] - company_data['che']  # Total Liabilities - Cash & Equivalents
    equity_value = enterprise_value - net_debt
    
    # Calculate intrinsic value per share
    shares_outstanding = company_data['csho']
    intrinsic_value_per_share = equity_value / shares_outstanding
    
    # Handle invalid intrinsic values
    if intrinsic_value_per_share < 0 or pd.isnull(intrinsic_value_per_share):
        intrinsic_value_per_share = 0
    
    # Store results
    dcf_results[ticker] = {
        'Enterprise Value': enterprise_value,
        'Net Debt': net_debt,
        'Equity Value': equity_value,
        'Intrinsic Value per Share': intrinsic_value_per_share
    }

# Convert results to DataFrame
dcf_results_df = pd.DataFrame.from_dict(dcf_results, orient='index')

# Reload the existing DCF results
dcf_results = pd.read_csv("dcf_results.csv", index_col=0)

# Reset the index to include Ticker as a column
dcf_results.reset_index(inplace=True)

# Rename the index column (if it exists) to Ticker
if 'index' in dcf_results.columns:
    dcf_results.rename(columns={'index': 'Ticker'}, inplace=True)

# Save the corrected file without including the index
dcf_results.to_csv("dcf_results_corrected.csv", index=False)

# Verify the corrected file
print(dcf_results.head())


Processing FCF data for AAPL...
Processing FCF data for AMZN...
Processing FCF data for DIS...
Processing FCF data for JNJ...
Processing FCF data for KO...
Processing FCF data for META...
Processing FCF data for MSFT...
Processing FCF data for NVDA...
Processing FCF data for PG...
Processing FCF data for TSLA...
FCF projections prepared.
Calculating DCF for AAPL...
Calculating DCF for AMZN...
Calculating DCF for DIS...
Calculating DCF for JNJ...
Calculating DCF for KO...
Calculating DCF for META...
Calculating DCF for MSFT...
Calculating DCF for NVDA...
Calculating DCF for PG...
Calculating DCF for TSLA...


  Ticker  Enterprise Value  Net Debt  Equity Value  Intrinsic Value per Share
0   AAPL      3.194470e+06  242859.0  2.951611e+06                 195.253844
1   AMZN      4.093169e+07  238702.0  4.069299e+07                3919.194092
2    DIS      2.147888e+05   84695.0  1.300938e+05                  71.795718
3    JNJ      4.278144e+05   75857.0  3.519574e+05                 146.217712
4   META      4.025355e+06   10953.0  4.014402e+06                1567.513426


### Code for CCA

In [14]:
# Ensure required columns exist
required_columns = ['tic', 'datadate', 'csho', 'prcc_f', 'ni', 'at', 'lt', 'oibdp', 'naicsh']
if not all(col in compustat_data.columns for col in required_columns):
    raise ValueError("One or more required columns are missing from the data.")

# Calculate Valuation Multiples
compustat_data['Market Cap'] = compustat_data['csho'] * compustat_data['prcc_f']
compustat_data['P/E'] = compustat_data['Market Cap'] / compustat_data['ni'].replace(0, np.nan)
compustat_data['EV'] = compustat_data['at'] - compustat_data['lt']  # Enterprise Value
compustat_data['EV/EBITDA'] = compustat_data['EV'] / compustat_data['oibdp'].replace(0, np.nan)

# Filter relevant columns for peers
peer_data = compustat_data[['tic', 'naicsh', 'P/E', 'EV/EBITDA']]

# Group by NAICS to find peers
peer_multiples = peer_data.groupby('naicsh').agg({
    'P/E': ['mean', 'median'],
    'EV/EBITDA': ['mean', 'median']
}).reset_index()
peer_multiples.columns = ['NAICSH', 'P/E Mean', 'P/E Median', 'EV/EBITDA Mean', 'EV/EBITDA Median']

# Save peer multiples to CSV
peer_multiples.to_csv("peer_multiples.csv", index=False)

# Initialize results dictionary
cca_results = {}

# Iterate through all target tickers
for target_ticker in ["AAPL", "MSFT", "GOOG", "AMZN", "TSLA", "NVDA", "META", "JNJ", "PG", "DIS"]:
    try:
        # Get target company's data
        target_data = compustat_data[compustat_data['tic'] == target_ticker].iloc[-1]

        # Fetch target company's metrics
        target_ebitda = target_data['oibdp']
        target_net_income = target_data['ni']

        # Find peer group multiples (based on NAICS)
        naics_code = target_data['naicsh']
        target_peer_multiples = peer_multiples[peer_multiples['NAICSH'] == naics_code]

        if not target_peer_multiples.empty:
            pe_median = target_peer_multiples['P/E Median'].values[0]
            ev_ebitda_median = target_peer_multiples['EV/EBITDA Median'].values[0]

            # Valuation estimates
            pe_valuation = pe_median * target_net_income
            ev_valuation = ev_ebitda_median * target_ebitda

            # Store results
            cca_results[target_ticker] = {
                'P/E Median': pe_median,
                'EV/EBITDA Median': ev_ebitda_median,
                'P/E Valuation': pe_valuation,
                'EV/EBITDA Valuation': ev_valuation
            }

            # Print results for each company
            print(f"CCA Valuation for {target_ticker}:")
            print(f"P/E-based Valuation: ${pe_valuation:,.2f}")
            print(f"EV/EBITDA-based Valuation: ${ev_valuation:,.2f}")
        else:
            print(f"No peer multiples found for NAICS code: {naics_code} (Ticker: {target_ticker})")
    except Exception as e:
        print(f"Error processing {target_ticker}: {e}")

# Convert results to DataFrame
cca_results_df = pd.DataFrame.from_dict(cca_results, orient='index').reset_index()

# Rename the index column to 'Ticker'
cca_results_df.rename(columns={'index': 'Ticker'}, inplace=True)

# Save results to CSV
cca_results_df.to_csv("cca_results_corrected.csv", index=False)

# Print sample results
print("Sample CCA Results:")
print(cca_results_df.head())


CCA Valuation for AAPL:
P/E-based Valuation: $2,301,215.06
EV/EBITDA-based Valuation: $69,996.67
CCA Valuation for MSFT:
P/E-based Valuation: $3,082,638.58
EV/EBITDA-based Valuation: $258,659.05
Error processing GOOG: single positional indexer is out-of-bounds
CCA Valuation for AMZN:
P/E-based Valuation: $1,577,593.02
EV/EBITDA-based Valuation: $201,875.00
CCA Valuation for TSLA:
P/E-based Valuation: $628,459.80
EV/EBITDA-based Valuation: $51,316.59
CCA Valuation for NVDA:
P/E-based Valuation: $1,540,102.99
EV/EBITDA-based Valuation: $81,817.37
CCA Valuation for META:
P/E-based Valuation: $718,310.64
EV/EBITDA-based Valuation: $170,704.32
CCA Valuation for JNJ:
P/E-based Valuation: $841,192.06
EV/EBITDA-based Valuation: $69,379.31
CCA Valuation for PG:
P/E-based Valuation: $347,414.43
EV/EBITDA-based Valuation: $52,053.76
CCA Valuation for DIS:
P/E-based Valuation: $272,010.17
EV/EBITDA-based Valuation: $133,023.80
Sample CCA Results:
  Ticker  P/E Median  EV/EBITDA Median  P/E Valuati

### CODE for DDM

In [15]:
# Required rate of return
required_rate_of_return = 0.08  # 8% return

# Filter relevant columns
ddm_data = compustat_data[['tic', 'datadate', 'dvpsp_f']].copy()

# Ensure data is sorted by ticker and date
ddm_data['datadate'] = pd.to_datetime(ddm_data['datadate'])  # Ensure datadate is a datetime object
ddm_data = ddm_data.sort_values(by=['tic', 'datadate'])

# Initialize results list
ddm_results_list = []

# Process each ticker
for ticker in ddm_data['tic'].unique():
    company_data = ddm_data[ddm_data['tic'] == ticker].copy()
    
    # Remove rows with missing or zero dividends
    company_data = company_data[company_data['dvpsp_f'] > 0]
    if company_data.empty:
        print(f"No dividend data available for {ticker}. Skipping DDM.")
        continue
    
    # Calculate most recent dividend
    most_recent_dividend = company_data['dvpsp_f'].iloc[-1]
    
    # Calculate average dividend growth rate
    if len(company_data) > 1:
        company_data['Growth Rate'] = company_data['dvpsp_f'].pct_change()
        dividend_growth_rate = company_data['Growth Rate'].mean()
    else:
        dividend_growth_rate = 0.05  # Default growth rate for limited data
    
    # Handle invalid or negative growth rates
    if dividend_growth_rate <= 0 or pd.isnull(dividend_growth_rate):
        dividend_growth_rate = 0.05  # Default growth rate
    
    # Calculate Intrinsic Value using DDM
    try:
        next_year_dividend = most_recent_dividend * (1 + dividend_growth_rate)
        intrinsic_value = next_year_dividend / (required_rate_of_return - dividend_growth_rate)
    except ZeroDivisionError:
        intrinsic_value = 0  # If (required_rate_of_return - dividend_growth_rate) is zero
    
    # Append to results list
    ddm_results_list.append({
        'Ticker': ticker,
        'Most Recent Dividend': most_recent_dividend,
        'Dividend Growth Rate': dividend_growth_rate,
        'Intrinsic Value': intrinsic_value
    })

# Convert results list to DataFrame
ddm_results = pd.DataFrame(ddm_results_list)

# Validate Intrinsic Values
ddm_results['Intrinsic Value'] = ddm_results['Intrinsic Value'].apply(
    lambda x: max(x, 0) if pd.notnull(x) else 'Not Applicable'
)

# Add missing companies with "Not Applicable" results
missing_tickers = set(["AAPL", "MSFT", "GOOG", "AMZN", "TSLA", "NVDA", "META", "JNJ", "PG", "DIS"]) - set(ddm_results['Ticker'])
for ticker in missing_tickers:
    ddm_results = pd.concat([ddm_results, pd.DataFrame({
        'Ticker': [ticker],
        'Most Recent Dividend': [0],
        'Dividend Growth Rate': ['N/A'],
        'Intrinsic Value': ['Not Applicable']
    })], ignore_index=True)

# Save Finalized DDM Results to CSV
ddm_results.to_csv("final_ddm_results.csv", index=False)

# Print Finalized Results
print("Finalized DDM Results Saved to 'final_ddm_results.csv'")
print(ddm_results.head())


No dividend data available for AMZN. Skipping DDM.
No dividend data available for META. Skipping DDM.
No dividend data available for TSLA. Skipping DDM.
Finalized DDM Results Saved to 'final_ddm_results.csv'
  Ticker  Most Recent Dividend Dividend Growth Rate Intrinsic Value
0   AAPL                  0.98                 0.05            34.3
1    DIS                  0.75                 0.05           26.25
2    JNJ                  4.70              0.05833      229.544059
3     KO                  1.84             0.033621       41.006953
4   MSFT                  2.93             0.100445             0.0


### Integration Code

In [16]:
import yfinance as yf
import pandas as pd
import numpy as np

# Load Corrected DataFrames
dcf_results = pd.read_csv("dcf_results_corrected.csv")
cca_results = pd.read_csv("cca_results_corrected.csv")
ddm_results = pd.read_csv("final_ddm_results.csv")

# Ensure consistent column names
dcf_results.rename(columns={'tic': 'Ticker', 'ticker': 'Ticker'}, inplace=True)
cca_results.rename(columns={'tic': 'Ticker', 'ticker': 'Ticker'}, inplace=True)
ddm_results.rename(columns={'tic': 'Ticker', 'ticker': 'Ticker'}, inplace=True)

# Merge DataFrames
valuation_results = pd.merge(dcf_results, cca_results, on='Ticker', how='outer')
valuation_results = pd.merge(valuation_results, ddm_results, on='Ticker', how='outer')

# Handle Missing Values
valuation_results.fillna({
    'Intrinsic Value per Share': 'N/A',
    'P/E Valuation': 'N/A',
    'Intrinsic Value': 'N/A'
}, inplace=True)

# Safely replace 'N/A' and non-numeric values with 0 and convert to float
valuation_results['Intrinsic Value per Share'] = pd.to_numeric(
    valuation_results['Intrinsic Value per Share'], errors='coerce').fillna(0)
valuation_results['P/E Valuation'] = pd.to_numeric(
    valuation_results['P/E Valuation'], errors='coerce').fillna(0)
valuation_results['Intrinsic Value'] = pd.to_numeric(
    valuation_results['Intrinsic Value'], errors='coerce').fillna(0)

# Normalize values to a common scale
valuation_results['Intrinsic Value per Share'] = (
    valuation_results['Intrinsic Value per Share'] / valuation_results['Intrinsic Value per Share'].max()
)
valuation_results['P/E Valuation'] = (
    valuation_results['P/E Valuation'] / valuation_results['P/E Valuation'].max()
)
valuation_results['Intrinsic Value'] = (
    valuation_results['Intrinsic Value'] / valuation_results['Intrinsic Value'].max()
)

# Define weights for valuation models
weights = {
    'DCF': 0.4,  # 40% weight for DCF
    'CCA': 0.4,  # 40% weight for CCA
    'DDM': 0.2   # 20% weight for DDM
}

# Calculate Weighted Valuation
valuation_results['Weighted Valuation'] = (
    valuation_results['Intrinsic Value per Share'] * weights['DCF'] +
    valuation_results['P/E Valuation'] * weights['CCA'] +
    valuation_results['Intrinsic Value'] * weights['DDM']
)

# Fetch current prices using yfinance
tickers = valuation_results['Ticker'].unique()
current_prices = {}

for ticker in tickers:
    try:
        stock = yf.Ticker(ticker)
        price = stock.history(period="1d")["Close"].iloc[-1]  # Fetch the last closing price
        current_prices[ticker] = price
    except Exception as e:
        print(f"Error fetching price for {ticker}: {e}")
        current_prices[ticker] = None

# Add current prices to the DataFrame
valuation_results['Current Price'] = valuation_results['Ticker'].map(current_prices)

# Handle missing prices
valuation_results['Current Price'] = valuation_results['Current Price'].fillna(0)

# Calculate Valuation Ratio
valuation_results['Valuation Ratio'] = valuation_results['Weighted Valuation'] / valuation_results['Current Price']

# Categorize as Undervalued or Overvalued
valuation_results['Valuation Category'] = valuation_results['Valuation Ratio'].apply(
    lambda x: 'Undervalued' if x > 1 else 'Overvalued'
)

# Save Final Consolidated Results
valuation_results.to_csv("final_valuation_results_with_prices.csv", index=False)

# Print Final Consolidated Results
print("Final Consolidated Valuation Results Saved to 'final_valuation_results_with_prices.csv'")
print(valuation_results.head())

Final Consolidated Valuation Results Saved to 'final_valuation_results_with_prices.csv'
  Ticker  Enterprise Value  Net Debt  Equity Value  Intrinsic Value per Share  \
0   AAPL      3.194470e+06  242859.0  2.951611e+06                   0.026451   
1   AMZN      4.093169e+07  238702.0  4.069299e+07                   0.530926   
2    DIS      2.147888e+05   84695.0  1.300938e+05                   0.009726   
3   GOOG               NaN       NaN           NaN                   0.000000   
4    JNJ      4.278144e+05   75857.0  3.519574e+05                   0.019808   

   P/E Median  EV/EBITDA Median  P/E Valuation  EV/EBITDA Valuation  \
0   24.549960          0.532634       0.746508         69996.668946   
1   51.851866          2.975576       0.511767        201875.000000   
2   54.708401          7.869368       0.088239        133023.803913   
3         NaN               NaN       0.000000                  NaN   
4   23.929453          2.281164       0.272881         69379.314371   

In [17]:
db.close()