#### Define list of stocks

In [142]:
# List of stock tickers you want to analyze
stock_list = ["AAPL", "BRK.A", "NVDA", "MSFT", "GOOGL", "AMZN", "V", "PLTR", "OKLO", "BABA", "BIDU", "QCOM", "JD"]

#### Ensure yfinance package is installed.

In [143]:
# Install yfinance package.
!pip install yfinance html5lib --q

#### Define function to import metrics from a given stock list.

In [144]:
import yfinance as yf
import pandas as pd

def get_stock_financial_metrics(ticker_symbol):
    """
    Retrieves key financial metrics for a given stock ticker using yfinance.

    Args:
        ticker_symbol (str): The stock ticker symbol (e.g., "AAPL", "MSFT").

    Returns:
        dict: A dictionary containing the financial metrics.
              Returns None for metrics not available.
              Returns an error message string if the ticker is invalid or data cannot be fetched.
    """
    try:
        stock = yf.Ticker(ticker_symbol)
        info = stock.info

        # A more robust check for valid ticker data
        if not info or 'symbol' not in info or info.get('symbol', '').lower() != ticker_symbol.lower():
            # Check if it's a known "bad" ticker pattern from yfinance for delisted/problematic ones
            if info.get('regularMarketPrice') is None and info.get('logo_url') == '': # Common pattern for invalid tickers
                 return f"Could not retrieve valid data for ticker: {ticker_symbol}. It might be an invalid or delisted ticker."
            # If 'symbol' is present but doesn't match, it's odd, but let's flag it.
            # If 'symbol' is missing, it's definitely problematic.
            if 'symbol' not in info:
                return f"Could not retrieve valid data for ticker: {ticker_symbol}. Essential 'symbol' info missing."


        metrics = {
            "ticker": ticker_symbol, # Ensure ticker is always present
            "price": info.get('currentPrice', info.get('regularMarketPrice', info.get('previousClose'))),
            "pe_ratio": info.get('trailingPE', info.get('forwardPE')),
            "eps": info.get('trailingEps', info.get('forwardEps')),
            "roe": info.get('returnOnEquity'),
            "roa": info.get('returnOnAssets'),
            "profit_margin": info.get('profitMargins'), # Added profit margin
            "book_value_per_share": info.get('bookValue'),
            "shares_outstanding": info.get('sharesOutstanding'),
            "price_to_book": info.get('priceToBook'),
            "shortName": info.get('shortName') # Adding company name for clarity
        }
        return metrics

    except Exception as e:
        # For truly problematic tickers, yfinance might raise an exception before .info
        # or if .info itself is problematic (e.g., not a dict)
        return {
            "ticker": ticker_symbol,
            "price": None,
            "pe_ratio": None,
            "eps": None,
            "roe": None,
            "roa": None,
            "profit_margin": None, # Added profit margin
            "book_value_per_share": None,
            "shares_outstanding": None,
            "price_to_book": None,
            "shortName": f"Error: {str(e)}", # Store error in a field
            "error_message": str(e) # Explicit error message field
        }


def get_financials_for_stock_list(ticker_list):
    """
    Fetches financial metrics for a list of stock tickers and returns them as a Pandas DataFrame.

    Args:
        ticker_list (list): A list of stock ticker symbols (e.g., ["AAPL", "MSFT", "GOOGL"]).

    Returns:
        pandas.DataFrame: A DataFrame containing the financial metrics for each stock.
                          Includes an 'error_message' column for tickers where data couldn't be fetched.
    """
    all_metrics_data = []
    for ticker in ticker_list:
        print(f"Fetching data for {ticker}...")
        data = get_stock_financial_metrics(ticker)
        
        # If the function returns a string (our old error handling), convert to dict
        if isinstance(data, str) and "Could not retrieve" in data: # Check for our specific error string
            metrics_dict = {
                "ticker": ticker, "price": None, "pe_ratio": None, "eps": None,
                "roe": None, "roa": None, "profit_margin": None, # Added profit margin
                "book_value_per_share": None, "shares_outstanding": None, 
                "price_to_book": None, "shortName": None,
                "error_message": data
            }
        elif isinstance(data, dict):
            metrics_dict = data
            if "error_message" not in metrics_dict: # Ensure error_message field exists
                 metrics_dict["error_message"] = None
        else: # Should not happen with current get_stock_financial_metrics
            metrics_dict = {
                "ticker": ticker, "price": None, "pe_ratio": None, "eps": None,
                "roe": None, "roa": None, "profit_margin": None, # Added profit margin
                "book_value_per_share": None, "shares_outstanding": None, 
                "price_to_book": None, "shortName": None,
                "error_message": "Unknown error structure from get_stock_financial_metrics"
            }
            
        all_metrics_data.append(metrics_dict)
        
    # Create DataFrame from the list of dictionaries
    df = pd.DataFrame(all_metrics_data)
    
    # Reorder columns to have ticker and shortName first, and error_message last
    if not df.empty:
        cols = ["ticker", "shortName", "price", "pe_ratio", "eps", "roe", "roa", "profit_margin",
                "book_value_per_share", "shares_outstanding", "price_to_book", "error_message"]
        # Filter out columns not present in the DataFrame (e.g., if all tickers failed identically)
        existing_cols = [col for col in cols if col in df.columns]
        df = df[existing_cols]
        
    return df

#### Execute function with current stock list and store into df "successful_data_df"

In [145]:
# Execute conditional for each stock and return specified metrics.
if __name__ == "__main__":
    print("Starting financial data retrieval...")
    financials_df = get_financials_for_stock_list(stock_list)

    #print("\n--- Financial Data DataFrame ---")
    #print(financials_df)

    # Further analysis or saving the DataFrame
    if not financials_df.empty:
        #print("\n--- DataFrame Info ---")
        #financials_df.info()

        # Example: Filter out rows with errors for cleaner analysis
        successful_data_df = financials_df[financials_df['error_message'].isnull()].copy() # Use .copy() to avoid SettingWithCopyWarning
        
        # Convert relevant columns to numeric, coercing errors to NaN
        numeric_cols = ["price", "pe_ratio", "eps", "roe", "roa", "profit_margin", 
                        "book_value_per_share", "shares_outstanding", "price_to_book"]
        for col in numeric_cols:
            if col in successful_data_df.columns:
                successful_data_df[col] = pd.to_numeric(successful_data_df[col], errors='coerce')

Starting financial data retrieval...
Fetching data for AAPL...
Fetching data for BRK.A...
Fetching data for NVDA...
Fetching data for MSFT...
Fetching data for GOOGL...
Fetching data for AMZN...
Fetching data for V...
Fetching data for PLTR...
Fetching data for OKLO...
Fetching data for BABA...
Fetching data for BIDU...
Fetching data for QCOM...
Fetching data for JD...


---

## Scrape ValueInvesting.io to get DCF.

In [152]:
import requests
from bs4 import BeautifulSoup
import pandas as pd # Import pandas for DataFrame

def extract_div_value_by_class(url, div_class_name, instance_number=1):
    """
    Extracts the text value from the Nth instance of a div element
    with the specified class name on a given URL.

    Args:
        url (str): The URL of the webpage to scrape.
        div_class_name (str): The class name of the div to search for.
        instance_number (int): The instance number to target (1 for first, 2 for second, etc.).
                               Defaults to 1 (the first instance).

    Returns:
        str: The text content of the Nth div with the specified class,
             or None if not found, instance_number is invalid, or an error occurs.
    """
    if not isinstance(instance_number, int) or instance_number < 1:
        #print(f"Error: instance_number must be a positive integer. Received: {instance_number}")
        return None
    try:
        # Send a GET request to the URL
        headers = {
            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
        }
        response = requests.get(url, headers=headers, timeout=10)
        response.raise_for_status()  # Raise an exception for bad status codes (4xx or 5xx)

        # Parse the HTML content of the page
        soup = BeautifulSoup(response.content, 'html.parser')

        # Find all div elements with the specified class
        all_target_divs = soup.find_all('div', class_=div_class_name)

        # Check if the requested instance exists
        if len(all_target_divs) >= instance_number:
            # Select the Nth instance (index is instance_number - 1 because lists are 0-indexed)
            target_div = all_target_divs[instance_number - 1]
            value = target_div.get_text(strip=True)
            return value
        else:
            print(f"Could not find instance {instance_number} of div with class '{div_class_name}'. "
                  f"Found {len(all_target_divs)} instance(s) on the page: {url}")
            return None

    except requests.exceptions.RequestException as e:
        #print(f"Error during requests to {url}: {e}")
        return None
    except Exception as e:
        #print(f"An unexpected error occurred while processing {url}: {e}")
        return None

if __name__ == "__main__":
    # --- Dynamic URL Generation ---
    # Base URL template with a placeholder for the value to be inserted
    base_url_template = "https://valueinvesting.io/{TICKER}/valuation/dcf-growth-exit-5y"
    
        # Generate the list of URLs to scrape
    urls_to_scrape = [base_url_template.format(TICKER=value) for value in stock_list]
    
    # --- Configuration for extraction ---
    # Define the class name and instance number you want to extract from EACH URL in the list
    class_to_extract = "norm"  # Example: extracting the 'norm' class value
    instance_to_target = 3     # Example: extracting the first instance

    results_data = [] # List to store dictionaries of results

    #print(f"Starting extraction for {len(urls_to_scrape)} URLs.")
    #print(f"Targeting class: '{class_to_extract}', instance #{instance_to_target}\n")

    for current_url in urls_to_scrape:
        #print(f"Processing URL: {current_url}")
        
        # Attempt to extract ticker from URL
        ticker = None
        try:
            url_parts = current_url.split('/')
            # Assuming URL structure like https://domain.com/TICKER/...
            if len(url_parts) > 3 and url_parts[2].endswith("valueinvesting.io"): 
                ticker = url_parts[3]
            elif "nonexistentwebsite.com" in current_url:
                 ticker = "N/A_NonExistentSite"
            else:
                ticker = "N/A_UnknownFormat"
        except Exception as e_ticker:
            #print(f"  Could not extract ticker from URL {current_url}: {e_ticker}")
            ticker = "N/A_ExtractionError"

        raw_extracted_value = extract_div_value_by_class(current_url, class_to_extract, instance_number=instance_to_target)
        
        processed_value_for_df = None # To store the value after potential cleaning/conversion

        if raw_extracted_value:
            #print(f"  Raw extracted value: '{raw_extracted_value}' for Ticker: {ticker}")
            if class_to_extract == "norm" and raw_extracted_value.startswith('$'):
                try:
                    numeric_value_str = raw_extracted_value.replace('$', '').replace(',', '')
                    multiplier = 1
                    if numeric_value_str.endswith('B'):
                        multiplier = 1_000_000_000
                        numeric_value_str = numeric_value_str[:-1]
                    elif numeric_value_str.endswith('M'):
                        multiplier = 1_000_000
                        numeric_value_str = numeric_value_str[:-1]
                    elif numeric_value_str.endswith('K'):
                        multiplier = 1_000
                        numeric_value_str = numeric_value_str[:-1]
                    
                    if numeric_value_str: 
                        processed_value_for_df = float(numeric_value_str) * multiplier
                        #print(f"  Processed numeric value: {processed_value_for_df}")
                    else: 
                        #print(f"  Could not convert '{raw_extracted_value}' to a numeric value (empty after stripping).")
                        processed_value_for_df = raw_extracted_value 
                except ValueError:
                    #print(f"  Could not convert '{raw_extracted_value}' to a numeric value.")
                    processed_value_for_df = raw_extracted_value 
                except Exception as e_proc:
                    #print(f"  Error processing value '{raw_extracted_value}': {e_proc}")
                    processed_value_for_df = raw_extracted_value
            else:
                processed_value_for_df = raw_extracted_value
        else:
            #print(f"  Failed to extract value for class '{class_to_extract}' (instance #{instance_to_target}) for Ticker: {ticker} from {current_url}.")
            processed_value_for_df = None 

        results_data.append({
            "URL": current_url,
            "ticker": ticker, # Added Ticker
            "target_class": class_to_extract,
            "target_instance": instance_to_target,
            "extracted_raw_value": raw_extracted_value,
            "processed_value": processed_value_for_df
        })
        #print("-" * 40) 

    # Create a Pandas DataFrame from the list of results
    results_df = pd.DataFrame(results_data)

    # Convert DCF value to numeric.
    results_df['processed_value'] = pd.to_numeric(
    results_df['processed_value']
    .astype(str)
    .str.replace(r'[^0-9.-]', '', regex=True),
    errors='coerce'  # invalid parsing will be set as NaN
)

Could not find instance 3 of div with class 'norm'. Found 0 instance(s) on the page: https://valueinvesting.io/OKLO/valuation/dcf-growth-exit-5y
Could not find instance 3 of div with class 'norm'. Found 0 instance(s) on the page: https://valueinvesting.io/JD/valuation/dcf-growth-exit-5y


#### Combine DCF values into analysis table and calulate opportunity.
Opportuniy = % difference between 5 Year Growth DCF Fair Value and current share price

In [162]:
# add dcf value to main dataframe.
successful_data_df['dcf_5yg'] = successful_data_df['ticker'].map(
    results_df.set_index('ticker')['processed_value']
)

# add calculated column (percent diff between dcf fair value and share price)
successful_data_df['opportuniy'] = (successful_data_df.dcf_5yg - successful_data_df.price)/successful_data_df.price

#### Analysis: Explore DF

In [163]:
successful_data_df

Unnamed: 0,ticker,shortName,price,pe_ratio,eps,roe,roa,profit_margin,book_value_per_share,shares_outstanding,price_to_book,error_message,dcf_5yg,opportuniy
0,AAPL,Apple Inc.,199.95,31.14486,6.42,1.38015,0.2381,0.24301,4.471,14935800000.0,44.721535,,176.25,-0.11853
1,BRK.A,,,,,,,,,,,,554.5,
2,NVDA,NVIDIA Corporation,139.19,44.9,3.1,1.15463,0.53245,0.51694,3.438,24387600000.0,40.48575,,109.86,-0.210719
3,MSFT,Microsoft Corporation,458.68,35.47409,12.93,0.3361,0.14582,0.35789,43.3,7432540000.0,10.593072,,282.92,-0.383187
4,GOOGL,Alphabet Inc.,171.86,19.202234,8.95,0.34789,0.16897,0.30857,28.405,5820000000.0,6.050343,,196.58,0.143838
5,AMZN,"Amazon.com, Inc.",205.7,33.55628,6.13,0.2524,0.07632,0.1014,28.82,10616400000.0,7.137404,,240.85,0.17088
6,V,Visa Inc.,362.4,36.349045,9.97,0.50655,0.16936,0.52859,19.801,1710990000.0,18.302105,,324.15,-0.105546
7,PLTR,Palantir Technologies Inc.,122.32,531.82605,0.23,0.12357,0.04392,0.18321,2.299,2262910000.0,53.20574,,5.09,-0.958388
8,OKLO,Oklo Inc.,52.93,-135.71796,-0.47,-0.56027,-0.22832,0.0,1.935,139205000.0,27.354006,,,
9,BABA,Alibaba Group Holding Limited,117.18,15.75,7.44,0.11438,0.05173,0.13059,437.002,2385510000.0,0.268145,,152.43,0.300819
