In [27]:
import bs4
import pandas as pd
import requests
from io import StringIO
import time
import os
import random




In [28]:
headers = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36',
    'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
    'Accept-Language': 'en-US,en;q=0.5',
    'Accept-Encoding': 'gzip, deflate',
    'Connection': 'keep-alive',
    'Upgrade-Insecure-Requests': '1',
}

tickers = {
    'NVDA': 'nvidia',
    'AAPL': 'apple',
    'MSFT': 'microsoft',
    'GOOGL': 'alphabet',
    'AMZN': 'amazon',
    'META': 'meta-platforms',
    'TSLA': 'tesla'
}

In [29]:
def fetch_table(url):
    try:
        resp = requests.get(url, headers=headers, timeout=10)
        resp.raise_for_status()
        soup = bs4.BeautifulSoup(resp.text, "html.parser")
        tables = soup.find_all("table")
        if not tables:
            raise ValueError("No tables found on the page.")
        df = pd.read_html(StringIO(str(tables[0])))[0]
        if isinstance(df.columns, pd.MultiIndex):
            df.columns = df.columns.droplevel(0)
        df.columns = [col.strip() for col in df.columns]  # clean spaces
        return df
    except Exception as e:
        print(f"[ERROR] Failed to fetch data from {url}: {e}")
        return None

pe_data = []
pb_data = []

Here same logic has been used to extract the PB ratio data from Marcotrends.

In [30]:
for symbol, name in tickers.items():
    url_pe = f"https://www.macrotrends.net/stocks/charts/{symbol}/{name}/pe-ratio"
    df_pe = fetch_table(url_pe)

    if df_pe is not None and not df_pe.empty:
        expected_cols_pe = {'Date', 'Stock Price', 'TTM Net EPS', 'PE Ratio'}
        if not expected_cols_pe.issubset(df_pe.columns):
            print("Error")
        else:
            try:
                df_pe = df_pe.rename(columns={
                    'Date': 'Date',
                    'Stock Price': 'StockPrice',
                    'TTM Net EPS': 'NetEPS',
                    'PE Ratio': 'PE'
                })
                df_pe['Date'] = pd.to_datetime(df_pe['Date'])
                df_pe['TickerName'] = symbol
                df_pe = df_pe[['Date', 'TickerName', 'StockPrice', 'NetEPS', 'PE']]
                pe_data.append(df_pe)
            
            except Exception as e:
                print("Error")
    else:
        print("Data not found")

    
    time.sleep(random.uniform(5, 10))  # sleep between 5 to 10 seconds


    # PB Ratio
    url_pb = f"https://www.macrotrends.net/stocks/charts/{symbol}/{name}/price-book"
    df_pb = fetch_table(url_pb)

    if df_pb is not None and not df_pb.empty:
        expected_cols_pb = {'Date', 'Stock Price', 'Book Value per Share', 'Price to Book Ratio'}
        if not expected_cols_pb.issubset(df_pb.columns):
            print("Error")
        else:
            try:
                df_pb = df_pb.rename(columns={
                    'Date': 'Date',
                    'Stock Price': 'StockPrice',
                    'Book Value per Share': 'BookValuePerShare',
                    'Price to Book Ratio': 'PB'
                })
                df_pb['Date'] = pd.to_datetime(df_pb['Date'])
                df_pb['TickerName'] = symbol
                df_pb = df_pb[['Date', 'TickerName', 'StockPrice', 'BookValuePerShare', 'PB']]
                pb_data.append(df_pb)
            except Exception as e:
                print("Error")
    else:
        print("Data not found")

    time.sleep(random.uniform(5, 10)) 

print("\nAll data extraction complete.")


All data extraction complete.


In [32]:

os.makedirs("data", exist_ok=True)

if pe_data:
    combined_pe_df = pd.concat(pe_data, ignore_index=True).sort_values(['TickerName', 'Date']).reset_index(drop=True)
    combined_pe_df.to_csv("data/pe_ratio_data.csv", index=False)
    print("PE Ratio data saved")
else:
    print("No data saved.")

if pb_data:
    combined_pb_df = pd.concat(pb_data, ignore_index=True).sort_values(['TickerName', 'Date']).reset_index(drop=True)
    combined_pb_df.to_csv("data/pb_ratio_data.csv", index=False)
    print("PB Rato data saved")
else:
    print("No data saved.")




PE Ratio data saved
PB Rato data saved
