In [None]:
# HTTP lib
import requests

# data.sec.gov/submissions/ API: easily find all filing documents by CIK
Plugging a 10 digit CIK (*including leading 0's*) into https://data.sec.gov/submissions/CIK##########.json returns a structure containing the following information (plus more) about each filing: date of filing, form type/code, an accession number, a primary document filename, and a description of that primary document. These methods attempt to list all the primary document URL's, their filing dates, and their form types given a 10 digit CIK

In [None]:
# Support for methods below. If additional submission info is found in filings->files, those JSON files are traversed here. target_form_code = ANY to list all filings.
def traverse_extended_submissions(ext_sub_uri, target_entity_cik, target_form_code='ANY'):

  # Needed to interact with SEC website
  request_headers = { "User-Agent" : "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/105.0.0.0 Safari/537.36" }

  # Base URL for archived documents/data
  submissions_url = r"https://data.sec.gov/submissions/{}".format(ext_sub_uri)
  base_archive_url = r"https://www.sec.gov/Archives/edgar/data/{}/".format(target_entity_cik)

  # Get the JSON data
  response = requests.get(url = submissions_url, headers = request_headers)
  response.raise_for_status()

  json_data = response.json()

  # We need 4 pieces of information about each filing: filing date, accession number, primary doc filename, and form code.
  date_list = json_data['filingDate']
  accession_list = json_data['accessionNumber']
  primary_docs_list = json_data['primaryDocument']
  form_code_list = json_data['form']

  # Loop through each filing
  for filing_index, current_filing_date in enumerate(date_list):

    current_accession_num = accession_list[filing_index].replace('-','')
    current_primary_doc = primary_docs_list[filing_index]
    primary_doc_url = base_archive_url + current_accession_num + '/' + current_primary_doc

    # If target_form_code != ANY, check that it matches
    if target_form_code != 'ANY':
      if form_code_list[filing_index] == target_form_code:
        # Print filing date and URL
        print(r"{} {}".format(current_filing_date, primary_doc_url))
    
    # Otherwise just list it
    else:
      print(r"{} Form: {}, filed on: {} : {}".format(filing_index, form_code_list[filing_index], current_filing_date, primary_doc_url))

  return

In [None]:
def print_all_filing_docs(target_entity_cik):

  # Needed to interact with SEC website
  request_headers = { "User-Agent" : "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/105.0.0.0 Safari/537.36" }

  # Base URL for archived documents/data
  base_archive_url = r"https://www.sec.gov/Archives/edgar/data/{}/".format(target_entity_cik))

  # Build the submissions URL, request it, and parse the JSON
  submissions_url = r"https://data.sec.gov/submissions/CIK{}.json".format(target_entity_cik))
  
  response = requests.get(url = submissions_url, headers = request_headers)
  response.raise_for_status()

  json_data = response.json()

  # The 1,000 most recent filings are under filings->recent
  # We need 4 pieces of information about each filing: filing date, accession number, primary doc filename, and form code.
  # Go into filings->recent dict, which will contain a separate ordered list for each of these pieces of info
  date_list = json_data['filings']['recent']['filingDate']
  accession_list = json_data['filings']['recent']['accessionNumber']
  primary_docs_list = json_data['filings']['recent']['primaryDocument']
  form_code_list = json_data['filings']['recent']['form']

  # For each filing date, find corresponding other 3 pieces of info
  for filing_index, current_filing_date in enumerate(date_list):

    # Build full URL to primary document. Remove hyphens from accession number
    current_accession_num = accession_list[filing_index].replace('-','')
    current_primary_doc = primary_docs_list[filing_index] # TODO: 5/26/2000 and before, primary doc should just be Archives/edgar/data/CIK/ACCESSION_WITH_DASHES.txt
    primary_doc_url = base_archive_url + current_accession_num + '/' + current_primary_doc

    # Print filing type, date, and primary document URL
    print(r"{} Form: {}, filed on: {} : {}".format(filing_index, form_code_list[filing_index], current_filing_date, primary_doc_url))

  # In case there were more than 1,000 (or if there are filings for some other reason in there) check filings->files for links to additional
  # Each entry in "files" contains "name" field with URI of another JSON file set up like submissions/CIK###.json 
  try:
    for sub_link in json_data['filings']['files']:
      traverse_extended_submissions(ext_sub_uri = sub_link['name'], target_entity_cik = target_entity_cik)
  except:
    pass

  print("End list")

  return

In [None]:
# Example of calling above function
test_target_cik = "0000320193" 
print_all_filing_docs(test_target_cik)

# Find filings of specific form type of given CIK
Consult https://www.sec.gov/info/edgar/forms/edgform.pdf and https://www.sec.gov/about/forms/secforms.htm for lists + descriptions of different form types. Slightly modified version of above function 

In [None]:
def print_filing_docs_of_type(target_form_code, target_entity_cik):
  
  # Needed to interact with SEC website
  request_headers = { "User-Agent" : "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/105.0.0.0 Safari/537.36" }

  # Base URL for archived documents/data
  base_archive_url = r"https://www.sec.gov/Archives/edgar/data/{}/".format(target_entity_cik)

  # Build the submissions URL, request it, and parse the JSON
  submissions_url = r"https://data.sec.gov/submissions/CIK{}.json".format(target_entity_cik)
  
  response = requests.get(url = submissions_url, headers = request_headers)
  response.raise_for_status()

  json_data = response.json()

  # The 1,000 most recent filings are under filings->recent
  # We need 4 pieces of information about each filing: filing date, accession number, primary doc filename, and form code.
  # Go into filings->recent dict, which will contain a separate ordered list for each of these pieces of info
  date_list = json_data['filings']['recent']['filingDate']
  accession_list = json_data['filings']['recent']['accessionNumber']
  primary_docs_list = json_data['filings']['recent']['primaryDocument']
  form_code_list = json_data['filings']['recent']['form']

  # Loop through list of form codes
  print("Form {} filings found:".format(target_form_code))
  for filing_index, current_form_code in enumerate(form_code_list):

    # Look for matching form types
    if current_form_code == target_form_code:

      # Build full URL to primary document. Remove hyphens from accession number
      current_accession_num = accession_list[filing_index].replace('-','')
      current_primary_doc = primary_docs_list[filing_index]
      primary_doc_url = base_archive_url + current_accession_num + '/' + current_primary_doc # TODO: 5/26/2000 and before, primary doc should just be Archives/edgar/data/CIK/ACCESSION_WITH_DASHES.txt

      # Print filing date and URL
      print(r"{} {}".format(date_list[filing_index], primary_doc_url))
  
  # In case there were more than 1,000 check filings->files for links to additional
  # Each entry in "files" contains "name" field with URI of another JSON file set up like submissions/CIK###.json 
  try:
    for sub_link in json_data['filings']['files']:
      traverse_extended_submissions(ext_sub_uri = sub_link['name'], target_entity_cik = target_entity_cik, target_form_code = target_form_code)
  except:
    pass

  print("End list")

  return 

In [None]:
# Example of calling above function
test_target_cik = "0000320193" 
print_filing_docs_of_type("10-Q", test_target_cik)  