In [93]:
import requests
from functools import cache
import re

from bs4 import BeautifulSoup

# Drawing the ASCII table

In [178]:
from prettytable import PrettyTable, TableStyle

def draw_table(fields, data: list[list[any]], title="Title") -> str:
    table = PrettyTable()
    table.field_names = fields
    table.add_rows(data)

    table.junction_char = '-'
    table.align = 'l'

    table_str = str(table)

    # Strip the top border from table
    table_str = table_str[table_str.find('\n')+1:]
    # Strip the bottom border from table
    table_str = table_str[:table_str.rfind('\n')]

    ## Add title bar
    width = table_str.find('\n')
    title_bar = title.center(width, '=')
    table_str = title_bar + '\n' + table_str

    return table_str

In [179]:
fields = ["Name", "Code", "Country", "Employees", "CEO Name", "CEO Year Born"]
data = [["Pfizer Inc.", "PFE", "United States", 78500, "Dr. Albert Bourla D.V.M., DVM, Ph.D.", 1962]]
print(draw_table(fields, data, ' 5 stocks with most youngest CEOs '))

| Name        | Code | Country       | Employees | CEO Name                             | CEO Year Born |
---------------------------------------------------------------------------------------------------------
| Pfizer Inc. | PFE  | United States | 78500     | Dr. Albert Bourla D.V.M., DVM, Ph.D. | 1962          |


# Scraping Data

For the requests to pass:

In [11]:
headers = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3'
}

We'll cache the request responses to not hit the rate limits and for faster testing

In [211]:
## Make sure to not clear the cache on accident!
response_cache = {}

In [13]:
def request(url: str):
    try:
        return response_cache[url]
    except:
        pass
    r = requests.get(url, headers=headers)
    response_cache[url] = r
    return r

## Top 25 most active stocks

In [214]:
stocks = []
stock_names = {}


r = request('https://finance.yahoo.com/markets/stocks/most-active/?start=0&count=25')
soup = BeautifulSoup(r.text, 'html.parser')

ls = soup.find_all('table', class_='markets-table')
assert len(ls) == 1
table = ls[0]
for row in table.find_all('tr')[1:]:
    a = row.find('a', href=True)
    symbol = a['href'].split('/')[-2]
    name = row.find_all('td')[1].text.strip()
    stock_names[symbol] = name
    stocks.append(symbol)
print(stocks)

['F', 'NVDA', 'NIO', 'TSLA', 'GME', 'PLTR', 'MLGO', 'LCID', 'IQ', 'RIVN', 'SOFI', 'PSLV', 'AAL', 'NU', 'INTC', 'HOOD', 'WBD', 'SMCI', 'PFE', 'EDR', 'T', 'BTG', 'STLA', 'CORZ', 'LYG']


In [215]:
r.status_code

200

## 1. Top 5 stocks with youngest CEOs

Fields: Country, Employees, CEO Name, CEO Year Born

In [50]:
def get_data1(stock):
    r = request(f'https://finance.yahoo.com/quote/{stock}/profile/')
    soup = BeautifulSoup(r.text)
    # assuming the CEO is on the first row
    ceo_row = soup.find('table').find_all('tr')[1]    
    try:
        year = int(ceo_row.find_all('td')[-1].text)
    except:
        year = None
    name = ceo_row.find('td').text.strip()
    country = soup.find('div', class_='address').find_all('div')[-1].text.strip()

    try:
        dt = soup.find('dt', string=re.compile('.*Employees.*'))
        employees = int(dt.find_next_sibling('dd').text.replace(',', ''))
    except:
        employees = None

    return country, employees, name, year


In [208]:
data1 = [(stock_names[s], s, *get_data1(s)) for s in stocks]
# sort by age
data1 = sorted(data1, key=lambda l: l[-1] or 0, reverse=True)

In [210]:
fields = ["Name", "Code", "Country", "Employees", "CEO Name", "CEO Year Born"]
print(draw_table(fields, data1[:5], " 5 stocks with the youngest CEOs "))

| Name                    | Code | Country       | Employees | CEO Name                         | CEO Year Born |
-----------------------------------------------------------------------------------------------------------------
| Robinhood Markets, Inc. | HOOD | United States | 2300      | Mr. Vladimir  Tenev              | 1988          |
| GameStop Corp.          | GME  | United States | None      | Mr. Ryan  Cohen                  | 1986          |
| Rivian Automotive, Inc. | RIVN | United States | 14861     | Mr. Robert Joseph Scaringe Ph.D. | 1984          |
| Grab Holdings Limited   | GRAB | Singapore     | 11267     | Mr. Ping Yeow  Tan               | 1983          |
| MicroAlgo Inc.          | MLGO | China         | 86        | Mr. Min  Shu                     | 1977          |


## 2. Top 10 Best 52 week
2. 10 stocks with best 52-Week Change. 52-Week Change placed on Statistics tab.

Sheet's fields: Name, Code, 52-Week Change, Total Cash

In [199]:
def get_data2(stock):
    r = request(f'https://finance.yahoo.com/quote/{stock}/key-statistics/')
    soup = BeautifulSoup(r.text)

    change = None
    pattern = re.compile(r'52 Week Change')
    for dt in soup.find_all('td'):
        if not pattern.search(dt.get_text(strip=True)):
            continue
        dd = dt.find_next_sibling('td')
        change = float(dd.text[:-1])
        break

    cash = None
    pattern = re.compile(r'Total Cash')
    for dt in soup.find_all('td'):
        if not pattern.search(dt.get_text(strip=True)):
            continue
        dd = dt.find_next_sibling('td')
        cash = dd.text.strip()
        break
            
    return change, cash

In [206]:
data2 = [(stock_names[s], s, *get_data2(s)) for s in stocks]
data2 = sorted(data2, key=lambda l: l[-2], reverse=True)

In [207]:
fields = ["Name", "Code", "52-Week Change (%)", "Total Cash"]
print(draw_table(fields, data2[:10], " 10 stocks with best 52-Week Change "))

| Name                       | Code | 52-Week Change (%) | Total Cash |
-----------------------------------------------------------------------
| Rigetti Computing, Inc.    | RGTI | 500.0              | 192.09M    |
| Palantir Technologies Inc. | PLTR | 301.04             | 5.23B      |
| IonQ, Inc.                 | IONQ | 148.25             | 340.29M    |
| GameStop Corp.             | GME  | 126.52             | 4.77B      |
| Robinhood Markets, Inc.    | HOOD | 122.21             | 12.31B     |
| New Gold Inc.              | NGD  | 104.12             | 110.3M     |
| Tesla, Inc.                | TSLA | 54.76              | 36.56B     |
| Grab Holdings Limited      | GRAB | 52.23              | 5.68B      |
| Apple Inc.                 | AAPL | 29.19              | 53.77B     |
| NVIDIA Corporation         | NVDA | 25.9               | 43.21B     |


## 3. Top 10 largest holds of Blackrock Inc
10 largest holds of Blackrock Inc. You can find related info on the Holders tab.
    Blackrock Inc is an investment management corporation.
    
    Sheet's fields: Name, Code, Shares, Date Reported, % Out, Value.
    
    All fields except first two should be taken from Holders tab.


In [None]:
r = request(f'https://finance.yahoo.com/quote/BLK/holders')
soup = BeautifulSoup(r.text)


holders = []
for h3 in soup.find_all('h3', string=re.compile('Top.*Holders')):
    for row in h3.find_next('table').find_all('tr')[1:]:
        name, shares, date_reported, out, value = (tag.text.strip() for tag in row.find_all('td'))
        holders.append((name, shares, date_reported, out, value))
holders

[('Vanguard Group Inc', '13.38M', 'Dec 31, 2024', '8.62%', '12,947,752,044'),
 ('Blackrock Inc.', '10.05M', 'Dec 31, 2024', '6.47%', '9,725,493,327'),
 ('State Street Corporation',
  '6.3M',
  'Dec 31, 2024',
  '4.06%',
  '6,100,242,469'),
 ('Temasek Holdings (Private) Limited',
  '5.09M',
  'Dec 31, 2024',
  '3.28%',
  '4,929,549,042'),
 ('Bank of America Corporation',
  '4.99M',
  'Dec 31, 2024',
  '3.22%',
  '4,833,220,621'),
 ('Morgan Stanley', '4.78M', 'Dec 31, 2024', '3.08%', '4,629,727,691'),
 ('Capital World Investors', '4.1M', 'Dec 31, 2024', '2.64%', '3,966,157,390'),
 ('Charles Schwab Investment Management, Inc.',
  '3.74M',
  'Dec 31, 2024',
  '2.41%',
  '3,618,374,611'),
 ('Capital Research Global Investors',
  '3.26M',
  'Dec 31, 2024',
  '2.10%',
  '3,151,117,062'),
 ('Geode Capital Management, LLC',
  '3.01M',
  'Dec 31, 2024',
  '1.94%',
  '2,910,368,121'),
 ('VANGUARD INDEX FUNDS-Vanguard Total Stock Market Index Fund',
  '4.18M',
  'Dec 31, 2024',
  '2.69%',
  '4,042

In [205]:
fields = ["Name", "Shares", "Date Reported", "% Out", "Value"]

print(draw_table(fields, holders[:10], " 10 largest holds of Blackrock Inc. "))

| Name                                       | Shares | Date Reported | % Out | Value          |
------------------------------------------------------------------------------------------------
| Vanguard Group Inc                         | 13.38M | Dec 31, 2024  | 8.62% | 12,947,752,044 |
| Blackrock Inc.                             | 10.05M | Dec 31, 2024  | 6.47% | 9,725,493,327  |
| State Street Corporation                   | 6.3M   | Dec 31, 2024  | 4.06% | 6,100,242,469  |
| Temasek Holdings (Private) Limited         | 5.09M  | Dec 31, 2024  | 3.28% | 4,929,549,042  |
| Bank of America Corporation                | 4.99M  | Dec 31, 2024  | 3.22% | 4,833,220,621  |
| Morgan Stanley                             | 4.78M  | Dec 31, 2024  | 3.08% | 4,629,727,691  |
| Capital World Investors                    | 4.1M   | Dec 31, 2024  | 2.64% | 3,966,157,390  |
| Charles Schwab Investment Management, Inc. | 3.74M  | Dec 31, 2024  | 2.41% | 3,618,374,611  |
| Capital Research Global Inve