In [5]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
import re

def scrape_financials(tickers: list, base_url: str) -> pd.DataFrame:
    """
    Scrapes financial data for the given tickers from wallmine.com

    Args:
    tickers (list): List of stock tickers.
    base_url (str): Base URL for scraping financial data.

    Returns:
    pd.DataFrame: DataFrame containing the scraped financial data.
    """
    data = {}

    for ticker in tickers:
        url = f"{base_url}{ticker}"
        response = requests.get(url)
        soup = BeautifulSoup(response.content, 'html.parser')

        property_data = {}
        properties = [
            'revenue', 'yearly_revenue_growth', 'quarterly_revenue_growth', 'ebitda', 'ebitda_margin',
            'profit_margin', 'market_cap', 'enterprise_value', 'ev_sales', 'ev_ebitda', 'pe', 'shares_outstanding'
        ]
        # Scrape the data for each property in the properties list
        for prop in properties:
            values = soup.find('td', {'data-property': prop})
            property_values = values.get_text().strip() 
            property_data[prop] = property_values

        # Scrape the 52 week high
        yearly_high_element = soup.find_all('td', {'class': 'small text-mobile-small'})[1]
        fifty_two_week_high = yearly_high_element.get_text().strip()
        property_data['fifty_two_week_high'] = fifty_two_week_high

        # Scraping the price (price was not in html, it was inside a script tag within the source)
        script_tag = soup.find('body')
        script = script_tag.find('script').get_text()
        match = re.search(r'"price":\s*([\d.]+)', script) # Use regex to find the price
        if match:
            price = match.group(1) # Extract the price
        property_data['share_price'] = price

        data[ticker] = property_data

    df = pd.DataFrame(data).T

    return df

base_url = 'https://wallmine.com/'
tickers = ['AAPL', 'GOOGL', 'MSFT', 'NASDAQ/AMZN', 'NASDAQ/META', 'SMSN', 'TCEHY', 'NASDAQ/NVDA', 'ADBE']  # Add more tickers as needed,
# NOTE: Some tickers requrie exchange in front to work
financial_data = scrape_financials(tickers, base_url)

financial_data

Unnamed: 0,revenue,yearly_revenue_growth,quarterly_revenue_growth,ebitda,ebitda_margin,profit_margin,market_cap,enterprise_value,ev_sales,ev_ebitda,pe,shares_outstanding,fifty_two_week_high,share_price
AAPL,$381.623B,-0.90%,-4.31%,$129.629B,33.97%,25.31%,$1.97T,$3.22T,8.44,24.83,34.72,15.334B,$164.08 – $237.23,224.31
GOOGL,$318.146B,11.78%,15.41%,$103.970B,32.68%,24.01%,$1.23T,$1.97T,6.2,18.97,27.04,12.609B,$118.73 – $191.75,177.66
MSFT,$236.584B,13.97%,17.03%,$125.543B,53.06%,34.15%,$1.87T,$3.33T,14.07,26.51,37.71,7.432B,$309.45 – $468.33,437.11
NASDAQ/AMZN,$590.740B,12.54%,12.53%,$96.511B,16.34%,5.29%,$44.86B,$1.95T,3.3,20.2,50.31,10.407B,$118.35 – $201.20,183.13
NASDAQ/META,$142.711B,21.62%,27.26%,$65.370B,45.81%,28.98%,$542.80B,$1.10T,7.67,16.75,26.71,2.690B,$274.38 – $542.79,476.79
SMSN,₩308.330T,16.53%,3.79%,₩90.944T,29.50%,14.04%,$310.43B,$221.56B,0.94,3.19,13.5,271.707M,"$1,207.00 – $1,612.00",1507.0
TCEHY,¥553.786B,0.76%,-1.60%,¥153.593B,27.74%,40.14%,$399.92B,$410.64B,5.02,18.09,34.44,9.570B,$33.00 – $52.10,46.66
NASDAQ/NVDA,$79.774B,208.27%,262.12%,$49.275B,61.77%,48.85%,$422.44B,$289.63B,3.63,5.88,68.29,24.598B,"$116.56 – $1,255.87",117.93
ADBE,$20.429B,10.85%,10.24%,$7.139B,34.95%,27.97%,$156.40B,$235.80B,11.54,33.03,49.24,443.400M,$433.98 – $638.25,551.0


In [7]:
dollar_sign_values = ['revenue', 'ebitda', 'market_cap', 'enterprise_value', 'fifty_two_week_high']

# Gets only the value after the hyphon from the fifty two week high column
financial_data['fifty_two_week_high'] = financial_data['fifty_two_week_high'].str.rsplit(' – ').str[1]

# Removes the dollar signs
for value in dollar_sign_values:                
    financial_data[value] = financial_data[value].str.replace('$', '')

financial_data

Unnamed: 0,revenue,yearly_revenue_growth,quarterly_revenue_growth,ebitda,ebitda_margin,profit_margin,market_cap,enterprise_value,ev_sales,ev_ebitda,pe,shares_outstanding,fifty_two_week_high,share_price
AAPL,381.623B,-0.90%,-4.31%,129.629B,33.97%,25.31%,1.97T,3.22T,8.44,24.83,34.72,15.334B,237.23,224.31
GOOGL,318.146B,11.78%,15.41%,103.970B,32.68%,24.01%,1.23T,1.97T,6.2,18.97,27.04,12.609B,191.75,177.66
MSFT,236.584B,13.97%,17.03%,125.543B,53.06%,34.15%,1.87T,3.33T,14.07,26.51,37.71,7.432B,468.33,437.11
NASDAQ/AMZN,590.740B,12.54%,12.53%,96.511B,16.34%,5.29%,44.86B,1.95T,3.3,20.2,50.31,10.407B,201.2,183.13
NASDAQ/META,142.711B,21.62%,27.26%,65.370B,45.81%,28.98%,542.80B,1.10T,7.67,16.75,26.71,2.690B,542.79,476.79
SMSN,₩308.330T,16.53%,3.79%,₩90.944T,29.50%,14.04%,310.43B,221.56B,0.94,3.19,13.5,271.707M,1612.0,1507.0
TCEHY,¥553.786B,0.76%,-1.60%,¥153.593B,27.74%,40.14%,399.92B,410.64B,5.02,18.09,34.44,9.570B,52.1,46.66
NASDAQ/NVDA,79.774B,208.27%,262.12%,49.275B,61.77%,48.85%,422.44B,289.63B,3.63,5.88,68.29,24.598B,1255.87,117.93
ADBE,20.429B,10.85%,10.24%,7.139B,34.95%,27.97%,156.40B,235.80B,11.54,33.03,49.24,443.400M,638.25,551.0


In [9]:
# Export dataframe to excel file
file_name = 'trading_comps_data.xlsx'
with pd.ExcelWriter(file_name, engine='openpyxl') as writer:
    financial_data.to_excel(writer, sheet_name='Data')

print(f"DataFrames have been exported to {file_name}")

DataFrames have been exported to trading_comps_data.xlsx
