In [2]:
import requests
import json
import pandas as pd
from tqdm import tqdm

In [3]:
# Custom headers (SEC requires contact info in User-Agent)
HEADERS = {
    'User-Agent': 'Gayathri Jayaraman (gayathrij@uchicago.edu)',
    'Accept-Encoding': 'gzip, deflate',
    'Host': 'data.sec.gov'
}

BASE_URL = "https://data.sec.gov/submissions/CIK{}.json"

In [4]:
def fetch_company_filings(cik):
    """Fetch submissions for a company based on CIK"""
    cik = str(cik).zfill(10)  # Ensure CIK is zero-padded
    url = BASE_URL.format(cik)
    response = requests.get(url, headers=HEADERS)
    if response.status_code == 200:
        return response.json()
    else:
        print(f"Failed to fetch data for CIK {cik}: {response.status_code}")
        return None


def extract_filings(cik_list, filing_type="10-K"):
    """Extract specified filing type for multiple CIKs"""
    all_filings = []

    for cik in tqdm(cik_list, desc="Fetching SEC filings"):
        data = fetch_company_filings(cik)
        if data:
            filings = data.get('filings', {}).get('recent', {})
            for form, accession, report_date in zip(filings.get('form', []),
                                                    filings.get('accessionNumber', []),
                                                    filings.get('reportDate', [])):
                if form == filing_type:
                    filing_url = f"https://www.sec.gov/Archives/edgar/data/{int(cik)}/{accession.replace('-', '')}/{accession}-index.html"
                    all_filings.append({
                        "CIK": cik,
                        "Form": form,
                        "AccessionNumber": accession,
                        "ReportDate": report_date,
                        "FilingURL": filing_url
                    })

    return pd.DataFrame(all_filings)

In [8]:
# Example CIKs (Apple, Microsoft, Amazon)
cik_list = ['0000320193', '0000789019', '0001018724']  

# Fetch 10-K filings
filings_df = extract_filings(cik_list)
filings_df.to_csv("sec_10k_filings.csv", index=False)
print(filings_df.head())

Fetching SEC filings: 100%|██████████| 3/3 [00:00<00:00,  4.53it/s]

          CIK  Form       AccessionNumber  ReportDate  \
0  0000320193  10-K  0000320193-24-000123  2024-09-28   
1  0000320193  10-K  0000320193-23-000106  2023-09-30   
2  0000320193  10-K  0000320193-22-000108  2022-09-24   
3  0000320193  10-K  0000320193-21-000105  2021-09-25   
4  0000320193  10-K  0000320193-20-000096  2020-09-26   

                                           FilingURL  
0  https://www.sec.gov/Archives/edgar/data/320193...  
1  https://www.sec.gov/Archives/edgar/data/320193...  
2  https://www.sec.gov/Archives/edgar/data/320193...  
3  https://www.sec.gov/Archives/edgar/data/320193...  
4  https://www.sec.gov/Archives/edgar/data/320193...  



