In [None]:
import requests
from datetime import datetime
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from webdriver_manager.chrome import ChromeDriverManager
import time


cookies = {
'bm_mi': '',
'ak_bmsc': '',
'bm_sv': '',
}

In [3]:
def get_cik(ticker):
    headers = {
        'accept-language': 'en-US,en;q=0.5',
    #     'if-modified-since': 'Wed, 02 Apr 2025 20:59:16 GMT',
        'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/134.0.0.0 Safari/537.36',
    }

    res = requests.get('https://www.sec.gov/files/company_tickers.json', cookies=cookies, headers=headers).json()

    for k, v in res.items():
        if v['ticker'].lower() == ticker.lower():
            return str(v['cik_str']).zfill(10)
    return None


In [4]:
def get_filing_url(cik, form_type, before_date):
    headers = {
    'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/134.0.0.0 Safari/537.36',
    }

    res = requests.get(f'https://data.sec.gov/submissions/CIK{cik}.json', cookies=cookies, headers=headers).json()

    
    filings = res.get("filings", {}).get("recent", {})
    dates = filings.get("filingDate", [])
    forms = filings.get("form", [])
    accession_nums = filings.get("accessionNumber", [])

    before = datetime.strptime(before_date, "%Y-%m-%d")
    for form, date, acc_num in zip(forms, dates, accession_nums):
        if form == form_type:
            filing_date = datetime.strptime(date, "%Y-%m-%d")
            if filing_date <= before:
                acc_num_fmt = acc_num.replace("-", "")
                doc_url = f"https://www.sec.gov/Archives/edgar/data/0000{int(cik)}/{acc_num_fmt}/{acc_num}-index.htm"
                return doc_url
    return None


In [None]:
def extract_shares_outstanding_from_url(index_url):
    options = Options()
    options.add_argument("--user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36")
    options.add_argument("--headless")
    service = Service(ChromeDriverManager().install())
    driver = webdriver.Chrome(service=service, options=options)
    try:
        driver.get(index_url)

        button = WebDriverWait(driver, 15).until(
            EC.element_to_be_clickable((By.XPATH, '//a//button[contains(text(), "Interactive")]'))
        )
        button.click()

        WebDriverWait(driver, 15).until(EC.presence_of_element_located((By.XPATH, '//table')))

        time.sleep(1.5)

        rows = driver.find_elements(By.XPATH, '//table//tr')  
        for row in rows:  
            try:  
                label_cell = row.find_element(By.XPATH, './td[1]')  
                label_text = label_cell.text.strip().lower()  
                if "common stock" in label_text and "outstanding" in label_text:  
                    value_cell = row.find_element(By.XPATH, './td[3]')  
                    print("Outstanding Shares:", value_cell.text.strip())  
                    return value_cell.text.strip()  
                    
            except:  
                continue  
    except:
        pass

    finally:
        driver.quit()

# extract_shares_outstanding_from_url('https://www.sec.gov/Archives/edgar/data/320193/000119312515023697/0001193125-15-023697-index.htm')

In [6]:
def process_ticker(ticker, date="2023-12-31"):
    cik = get_cik(ticker)
    if not cik:
        return ticker, None, "CIK not found"
    print(cik)
    
    url = get_filing_url(cik, "10-Q", date) or get_filing_url(cik, "10-K", date)
    if not url:
        return ticker, None, "Filing not found"
    print(url)
    
    shares = extract_shares_outstanding_from_url(url)
    return ticker, shares, "Success" if shares else "Shares not found"


In [None]:
tickers = ["TSLA"] 
results = []

for ticker in tickers:
    ticker, shares, status = process_ticker(ticker, "2020-12-31")
    results.append((ticker, shares, status))
    print(ticker, "=>", status)

# READS LIST OF STOCKS AND RESPECTIVE DATES FROM THE EXCEL FILE AND UPDATES SHARES OUTSANDING IN THE SAME FILE 

In [None]:
import pandas as pd
import logging

logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(message)s')

EXCEL_FILE = r"" # EXCEL FILE WITH COLUMNS, SYMBOL AND DATE

df = pd.read_excel(EXCEL_FILE)
df['Date'] = pd.to_datetime(df['Date']).dt.date  
if 'Shares Outstanding' not in df.columns:
    df['Shares Outstanding'] = 0  

for index, row in df.iterrows():
    ticker = row['Symbol']
    date = row['Date']
    date_str = date.strftime('%Y-%m-%d')

    current_shares = row['Shares Outstanding']

    if current_shares == 0:
        ticker, shares, status = process_ticker(ticker, date_str)
        print(f"{ticker} => {status}")
        df.at[index, 'Shares Outstanding'] = shares
        logging.info(f"Updated {ticker} for {date_str}: Shares = {shares}, Status = {status}")
        
    else:
        print(f"{ticker} => Skipped (Shares already present: {current_shares})")
        logging.info(f"Skipped {ticker} for {date_str}: Shares already present = {current_shares}")

    df.to_excel(EXCEL_FILE, index=False)
    logging.info(f"Updated {ticker} for {date_str}: Shares = {shares}, Status = {status}")

logging.info(f"Done, bro! '{EXCEL_FILE}' has your calculated shares.")