In [14]:
# pip install requests pandas openpyxl tenacity python-dateutil
"""SEC 10-Q screener script."""
from __future__ import annotations

import json
import time
from pathlib import Path
from typing import Dict, List, Optional

import pandas as pd
import requests
from dateutil import parser
from tenacity import retry, stop_after_attempt, wait_exponential, retry_if_exception_type

WATCHLIST = [
    {"name": "Bath & Body Works", "ticker": "BBWI"},
    {"name": "Microsoft", "ticker": "MSFT"},
    {"name": "Google (Alphabet)", "ticker": "GOOGL"},
    {"name": "Robinhood", "ticker": "HOOD"},
    {"name": "NVIDIA", "ticker": "NVDA"},
    {"name": "Figma", "ticker": None},  # private; handle gracefully
]

HEADERS = {"User-Agent": "Jerry Mcguire", "Accept-Encoding": "gzip, deflate"}
RATE_LIMIT_SECONDS = 0.21

session = requests.Session()
session.headers.update(HEADERS)


@retry(
    stop=stop_after_attempt(5),
    wait=wait_exponential(multiplier=1, min=1, max=10),
    retry=retry_if_exception_type(requests.RequestException),
    reraise=True,
)
def make_request(
    url: str, *, headers: Dict[str, str] = HEADERS
) -> requests.Response:
    """Make HTTP GET request with retry and rate limiting.

    The SEC requires a User-Agent header to be sent with all requests; callers
    can override the default by providing ``headers``.
    """
    time.sleep(RATE_LIMIT_SECONDS)
    resp = session.get(url, headers=headers, timeout=30)
    if resp.status_code == 429 or 500 <= resp.status_code < 600:
        retry_after = resp.headers.get("Retry-After")
        if retry_after:
            try:
                time.sleep(float(retry_after))
            except ValueError:
                pass
        resp.raise_for_status()
    resp.raise_for_status()
    return resp


def fetch_json(url: str) -> Dict:
    """Fetch JSON from URL."""
    resp = make_request(url, headers=HEADERS)
    return resp.json()



In [16]:
resp = make_request("https://www.sec.gov/files/company_tickers.json", headers=HEADERS)
resp.json()

HTTPError: 403 Client Error: Forbidden for url: https://www.sec.gov/files/company_tickers

In [11]:
import requests

headers = {
    "User-Agent": "Jerry Mcguire", "Accept-Encoding": "gzip, deflate"
}

HEADERS = {"User-Agent": "Jerry Mcguire", "Accept-Encoding": "gzip, deflate"}
RATE_LIMIT_SECONDS = 0.21

session = requests.Session()
session.headers.update(HEADERS)

# Apple (AAPL) submissions JSON — CIK padded to 10 digits
url = "https://data.sec.gov/submissions/CIK0000320193.json"

resp = session.get(url, headers=headers, timeout=30)
print(resp.status_code)
if resp.status_code == 429 or 500 <= resp.status_code < 600:
        retry_after = resp.headers.get("Retry-After")
        if retry_after:
            try:
                time.sleep(float(retry_after))
            except ValueError:
                pass
        resp.raise_for_status()
print(resp.content)
# r = requests.get(url, headers=headers, timeout=20)
# r.raise_for_status()
# data = r.json()
# print(data["name"])          # Company name
# print(len(data["filings"]["recent"]["form"]))  # how many recent forms


200
b'{"cik":"0000320193","entityType":"operating","sic":"3571","sicDescription":"Electronic Computers","ownerOrg":"06 Technology","insiderTransactionForOwnerExists":0,"insiderTransactionForIssuerExists":1,"name":"Apple Inc.","tickers":["AAPL"],"exchanges":["Nasdaq"],"ein":"942404110","lei":null,"description":"","website":"","investorWebsite":"","category":"Large accelerated filer","fiscalYearEnd":"0927","stateOfIncorporation":"CA","stateOfIncorporationDescription":"CA","addresses":{"mailing":{"street1":"ONE APPLE PARK WAY","street2":null,"city":"CUPERTINO","stateOrCountry":"CA","zipCode":"95014","stateOrCountryDescription":"CA","isForeignLocation":0,"foreignStateTerritory":null,"country":null,"countryCode":null},"business":{"street1":"ONE APPLE PARK WAY","street2":null,"city":"CUPERTINO","stateOrCountry":"CA","zipCode":"95014","stateOrCountryDescription":"CA","isForeignLocation":null,"foreignStateTerritory":null,"country":null,"countryCode":null}},"phone":"(408) 996-1010","flags":"","

In [22]:
import json
import time
import requests
import pandas as pd

RATE_LIMIT_SECONDS = 0.25
SESSION = requests.Session()
HEADERS = {
    # Use YOUR real contact email here
    "User-Agent": "Jerry Mcguire sec-10q-screener (amos@example.com)",
    "Accept": "application/json",
    "Accept-Encoding": "gzip, deflate",
}

def make_request(url: str) -> requests.Response:
    time.sleep(RATE_LIMIT_SECONDS)
    r = SESSION.get(url, headers=HEADERS, timeout=30)
    # Raise on 4xx/5xx so we can see the real issue
    r.raise_for_status()
    return r

def fetch_json_robust(url: str):
    r = make_request(url)
    try:
        return r.json()  # normal case
    except ValueError:
        # Sometimes SEC returns JSON with BOM or an HTML error page.
        text = r.content.decode("utf-8-sig", errors="replace")
        if text.lstrip().startswith("<"):
            # Very likely a 403/429 HTML response
            raise RuntimeError(
                f"Expected JSON from {url} but got HTML (likely 403/429). "
                "Use a real User-Agent + email and slow down."
            )
        return json.loads(text)

# --- Use it ---
map_url = "https://www.sec.gov/files/company_tickers.json"
tickers_map = fetch_json_robust(map_url)

# Example: turn it into a DataFrame and find AAPL
rows = list(tickers_map.values())          # <-- make it indexable
df = pd.DataFrame.from_records(rows)
df["cik_padded"] = df["cik_str"].astype(str).str.zfill(10)

print(df.loc[df["ticker"] == "AAPL", ["ticker", "cik_str", "cik_padded", "title"]])


  ticker  cik_str  cik_padded       title
2   AAPL   320193  0000320193  Apple Inc.


In [25]:
# pip install requests pandas openpyxl tenacity python-dateutil
"""SEC 10-Q screener script."""
from __future__ import annotations

import json
import time
from pathlib import Path
from typing import Dict, List, Optional

import pandas as pd
import requests
from dateutil import parser
from tenacity import retry, stop_after_attempt, wait_exponential, retry_if_exception_type

WATCHLIST = [
    {"name": "Bath & Body Works", "ticker": "BBWI"},
    {"name": "Microsoft", "ticker": "MSFT"},
    {"name": "Google (Alphabet)", "ticker": "GOOGL"},
    {"name": "Robinhood", "ticker": "HOOD"},
    {"name": "NVIDIA", "ticker": "NVDA"},
    {"name": "Figma", "ticker": None},  # private; handle gracefully
]

HEADERS = {"User-Agent": "Jerry Mcguire sec-10q-screener (amos@example.com)",
    "Accept": "application/json",
    "Accept-Encoding": "gzip, deflate"}
RATE_LIMIT_SECONDS = 0.21

session = requests.Session()
session.headers.update(HEADERS)


@retry(
    stop=stop_after_attempt(5),
    wait=wait_exponential(multiplier=1, min=1, max=10),
    retry=retry_if_exception_type(requests.RequestException),
    reraise=True,
)
def make_request(
    url: str, *, headers: Dict[str, str] = HEADERS
) -> requests.Response:
    """Make HTTP GET request with retry and rate limiting.

    The SEC requires a User-Agent header to be sent with all requests; callers
    can override the default by providing ``headers``.
    """
    time.sleep(RATE_LIMIT_SECONDS)
    resp = session.get(url, headers=headers, timeout=30)
    if resp.status_code == 429 or 500 <= resp.status_code < 600:
        retry_after = resp.headers.get("Retry-After")
        if retry_after:
            try:
                time.sleep(float(retry_after))
            except ValueError:
                pass
        resp.raise_for_status()
    resp.raise_for_status()
    return resp


def fetch_json(url: str) -> Dict:
    """Fetch JSON from URL."""
    resp = make_request(url, headers=HEADERS)
    return resp.json()

In [26]:
fetch_json("https://www.sec.gov/files/company_tickers.json")

{'0': {'cik_str': 1045810, 'ticker': 'NVDA', 'title': 'NVIDIA CORP'},
 '1': {'cik_str': 789019, 'ticker': 'MSFT', 'title': 'MICROSOFT CORP'},
 '2': {'cik_str': 320193, 'ticker': 'AAPL', 'title': 'Apple Inc.'},
 '3': {'cik_str': 1652044, 'ticker': 'GOOGL', 'title': 'Alphabet Inc.'},
 '4': {'cik_str': 1018724, 'ticker': 'AMZN', 'title': 'AMAZON COM INC'},
 '5': {'cik_str': 1326801, 'ticker': 'META', 'title': 'Meta Platforms, Inc.'},
 '6': {'cik_str': 1730168, 'ticker': 'AVGO', 'title': 'Broadcom Inc.'},
 '7': {'cik_str': 1318605, 'ticker': 'TSLA', 'title': 'Tesla, Inc.'},
 '8': {'cik_str': 1067983,
  'ticker': 'BRK-B',
  'title': 'BERKSHIRE HATHAWAY INC'},
 '9': {'cik_str': 104169, 'ticker': 'WMT', 'title': 'Walmart Inc.'},
 '10': {'cik_str': 19617, 'ticker': 'JPM', 'title': 'JPMORGAN CHASE & CO'},
 '11': {'cik_str': 1341439, 'ticker': 'ORCL', 'title': 'ORACLE CORP'},
 '12': {'cik_str': 59478, 'ticker': 'LLY', 'title': 'ELI LILLY & Co'},
 '13': {'cik_str': 1403161, 'ticker': 'V', 'title'