In [7]:
import bs4
import pandas as pd
import requests
from io import StringIO
import time
import os

In [8]:
headers = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36',
    'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
    'Accept-Language': 'en-US,en;q=0.5',
    'Accept-Encoding': 'gzip, deflate',
    'Connection': 'keep-alive',
    'Upgrade-Insecure-Requests': '1',
}

tickers = {
    'AAPL': 'apple',
    'MSFT': 'microsoft',
    'GOOGL': 'alphabet',
    'TSLA': 'tesla',
    'META': 'meta-platforms',
    'NVDA': 'nvidia-corp',
}



In [9]:
def fetch_table(url):
    try:
        resp = requests.get(url, headers=headers)
        soup = bs4.BeautifulSoup(resp.text, "html.parser")
        table = soup.find_all("table")[0]
        df = pd.read_html(StringIO(str(table)))[0]
        df.columns = df.columns.droplevel(0)
        return df
    except Exception as e:
        print(f"Failed to fetch data from {url}: {e}")
        return None

pe_data = []
pb_data = []



Here same logic has been used to extract the PB ratio data from Marcotrends.

In [10]:

for symbol, name in tickers.items():

    url_pe = f"https://www.macrotrends.net/stocks/charts/{symbol}/{name}/pe-ratio"
    df_pe = fetch_table(url_pe)
    if df_pe is not None:
        df_pe = df_pe.rename(columns={
            'Date': 'Date',
            'Stock Price': 'StockPrice',
            'TTM Net EPS': 'NetEPS',
            'PE Ratio': 'PE'
        })
        df_pe['Date'] = pd.to_datetime(df_pe['Date'])
        df_pe['TickerName'] = symbol
        df_pe = df_pe[['Date', 'TickerName', 'StockPrice', 'NetEPS', 'PE']]
        pe_data.append(df_pe)
    else:
        print(f"Failed PE for {symbol}")

    time.sleep(1.5)  

    url_pb = f"https://www.macrotrends.net/stocks/charts/{symbol}/{name}/price-book"
    df_pb = fetch_table(url_pb)
    if df_pb is not None:
        df_pb = df_pb.rename(columns={
            'Date': 'Date',
            'Stock Price': 'StockPrice',
            'Book Value per Share': 'BookValuePerShare',
            'Price to Book Ratio': 'PB'
        })
        df_pb['Date'] = pd.to_datetime(df_pb['Date'])
        df_pb['TickerName'] = symbol
        df_pb = df_pb[['Date', 'TickerName', 'StockPrice', 'BookValuePerShare', 'PB']]
        pb_data.append(df_pb)
    else:
        print(f"Failed PB for {symbol}")

    time.sleep(1.5)

print("Data extraction complete.")

Failed to fetch data from https://www.macrotrends.net/stocks/charts/NVDA/nvidia-corp/pe-ratio: list index out of range
Failed PE for NVDA
Failed to fetch data from https://www.macrotrends.net/stocks/charts/NVDA/nvidia-corp/price-book: list index out of range
Failed PB for NVDA
Data extraction complete.


In [11]:
combined_pe_df = pd.concat(pe_data, ignore_index=True)
combined_pe_df = combined_pe_df.sort_values(['TickerName', 'Date']).reset_index(drop=True)

combined_pb_df = pd.concat(pb_data, ignore_index=True)
combined_pb_df = combined_pb_df.sort_values(['TickerName', 'Date']).reset_index(drop=True)

In [12]:
os.makedirs("data", exist_ok=True)
combined_pe_df.to_csv("data/pe_ratio_data.csv", index=False)
combined_pb_df.to_csv("data/pb_ratio_data.csv", index=False)

print("PE Ratio data saved ")
print("PB Ratio data saved ")


PE Ratio data saved 
PB Ratio data saved 
