In [1]:
from sec_edgar_api import EdgarClient
import json
import os
from tqdm import tqdm

In [None]:
def get_filings(ciks, target_year, industry):
    edgar = EdgarClient(user_agent="<Sample Company Name> <Admin Contact>@<Sample Company Domain>")

    all_filings_for_year = []

    error_ciks = []

    # Loop through each company
    for cik in ciks:
        try:
            data = edgar.get_submissions(cik)

            company = data.get('tickers', [])[0] if 'tickers' in data else "Unknown_Company"
            
            recent_filings = data.get('filings', {}).get('recent', {})
            forms = recent_filings.get('form', [])
            accession_numbers = recent_filings.get('accessionNumber', [])
            filing_dates = recent_filings.get('filingDate', [])

            ten_k_filings = [
                {"company": company, "accessionNumber": accession, "filingDate": filing_date}
                for form, accession, filing_date in zip(forms, accession_numbers, filing_dates)
                if form == "10-K" and filing_date.startswith(str(target_year))
            ]

            all_filings_for_year.extend(ten_k_filings)

            for filing in ten_k_filings:
                accession_number = filing.get("accessionNumber", "")
                filing_date = filing.get("filingDate", "")
                company = filing.get("company", "")
                
                if accession_number and filing_date:
                    year = filing_date[:4]
                    formatted_accession = accession_number.replace("-", "")
                    url = f"https://www.sec.gov/Archives/edgar/data/{cik}/{formatted_accession}/{accession_number}.txt"
                    filing["url"] = url

        except Exception as e:
            error_ciks.append(cik)
            continue

    print(f"Error CIKs: {error_ciks}")
    output_file_path = f'./10K_URL_{industry}/All_10K_Filings_{target_year}.json'
    os.makedirs(os.path.dirname(output_file_path), exist_ok=True)
    
    with open(output_file_path, 'w') as output_file:
        json.dump(all_filings_for_year, output_file, indent=4)

In [None]:
# Industries: TECH, OIL, HEALTH

target_years = [2019, 2020, 2021, 2022]
industry = "TECH"

with open("./ciks.json", 'r') as file:
        ciks_data = json.load(file)

ciks = ciks_data[industry]

for year in tqdm(target_years):
    print(f"Getting 10-K filings for the year {year}...")
    get_filings(ciks, year, industry)
    print(f"10-K filings for the year {year} have been saved!")

  0%|          | 0/4 [00:00<?, ?it/s]

Getting 10-K filings for the year 2019...


  0%|          | 0/4 [00:07<?, ?it/s]


KeyboardInterrupt: 