In [1]:
# HTTP lib
import requests

# data.sec.gov/submissions/ API: easily find all filing documents by CIK
Plugging a 10 digit CIK (*including leading 0's*) into https://data.sec.gov/submissions/CIK##########.json returns a structure containing the following information (plus more) about each filing: date of filing, form type/code, an accession number, a primary document filename, and a description of that primary document. These methods attempt to list all the primary document URL's, their filing dates, and their form types given a 10 digit CIK. See https://www.sec.gov/edgar/sec-api-documentation for information.

Consult https://www.sec.gov/info/edgar/forms/edgform.pdf and https://www.sec.gov/about/forms/secforms.htm for lists + descriptions of different form types. 

In [7]:
# Support for methods below. If additional submission info is found in filings->files, those JSON files are traversed here. target_form_code = ANY to list all filings.
def traverse_extended_submissions(ext_sub_uri, target_entity_cik, target_form_code='ANY'):

  # Buffer the CIK 
  clean_target_cik = str(target_entity_cik).zfill(10)

  # Needed to interact with SEC website
  request_headers = { "User-Agent" : "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/105.0.0.0 Safari/537.36" }

  # Base URL for archived documents/data
  submissions_url = r"https://data.sec.gov/submissions/{}".format(ext_sub_uri)
  base_archive_url = r"https://www.sec.gov/Archives/edgar/data/{}/".format(clean_target_cik)

  # Get the JSON data
  response = requests.get(url = submissions_url, headers = request_headers)
  response.raise_for_status()

  json_data = response.json()

  # We need 4 pieces of information about each filing: filing date, accession number, primary doc filename, and form code.
  date_list = json_data['filingDate']
  accession_list = json_data['accessionNumber']
  primary_docs_list = json_data['primaryDocument']
  form_code_list = json_data['form']

  # Loop through each filing
  for filing_index, current_filing_date in enumerate(date_list):

    current_accession_num = accession_list[filing_index].replace('-','')
    current_primary_doc = primary_docs_list[filing_index]
    primary_doc_url = base_archive_url + current_accession_num + '/' + current_primary_doc

    # If target_form_code != ANY, check that it matches
    if target_form_code != 'ANY':
      if form_code_list[filing_index].upper() == target_form_code.upper():
        # Print filing date and URL
        print(r"{} {}".format(current_filing_date, primary_doc_url))
    
    # Otherwise just list it
    else:
      print(r"{} Form: {}, filed on: {} : {}".format(filing_index, form_code_list[filing_index], current_filing_date, primary_doc_url))

  return

In [8]:
# Lists all filings of a given CIK and a link to respective primary documents / filing directory, optionally filtering filings by form type
# Takes CIK, boolean filter_by_form, and optional target_form_code (used if filter_by_form is True)
def list_filing_docs(target_entity_cik, filter_by_form=False, target_form_type = "ANY"):

  # Buffer the CIK 
  clean_target_cik = str(target_entity_cik).zfill(10)

  # Needed to interact with SEC website
  request_headers = { "User-Agent" : "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/105.0.0.0 Safari/537.36" }

  # Base URL for archived documents/data
  base_archive_url = r"https://www.sec.gov/Archives/edgar/data/{}/".format(clean_target_cik)

  # Build the submissions URL, request it, and parse the JSON
  submissions_url = r"https://data.sec.gov/submissions/CIK{}.json".format(clean_target_cik)
  
  response = requests.get(url = submissions_url, headers = request_headers)
  response.raise_for_status()

  json_data = response.json()

  # The 1,000 most recent filings are under filings->recent
  # We need 4 pieces of information about each filing: filing date, accession number, primary doc filename, and form code.
  # Go into filings->recent dict, which will contain a separate ordered list for each of these pieces of info
  date_list = json_data['filings']['recent']['filingDate']
  accession_list = json_data['filings']['recent']['accessionNumber']
  primary_docs_list = json_data['filings']['recent']['primaryDocument']
  form_code_list = json_data['filings']['recent']['form']

  # Loop through each filing. Chose date as iterated list, but all four fields should be present for each filing
  for filing_index, current_filing_date in enumerate(date_list):

    # If we are filtering by form type, skip if no match
    if (filter_by_form == True and target_form_type != "ANY"):
      if form_code_list[filing_index].upper() != target_form_type.upper():
        continue

    # Build full URL to primary document. Remove hyphens from accession number
    current_accession_num = accession_list[filing_index].replace('-','')
    current_primary_doc = primary_docs_list[filing_index] # TODO: 5/26/2000 and before, primary doc should just be Archives/edgar/data/CIK/ACCESSION_WITH_DASHES.txt
    primary_doc_url = base_archive_url + current_accession_num + '/' + current_primary_doc

    # Print filing type, date, and primary document URL
    print(r"{} Form: {}, filed on: {} : {}".format(filing_index, form_code_list[filing_index], current_filing_date, primary_doc_url))

  # In case there were more than 1,000 (or if there are filings for some other reason in there) check filings->files for links to additional
  # Each entry in "files" contains "name" field with URI of another JSON file set up like submissions/CIK###.json 
  try:
    for sub_link in json_data['filings']['files']:
      traverse_extended_submissions(ext_sub_uri = sub_link['name'], target_entity_cik = clean_target_cik, target_form_code = target_form_type)
  except:
    pass

  print("End list")

  return

In [None]:
# Example of calling above function
test_target_cik = "0000320193" 
list_filing_docs(target_entity_cik = test_target_cik, filter_by_form=True, target_form_type = "4")

# Other values
The above method focuses on extracting information about specific filings, but the API does return (and seem to be the easiest source, when searching via a CIK) some other interesting values:


```
entityType : "" # Denotes the type entity (i.e. "operating" for company, seems to be "other" for individual, etc)
sic : "" # SIC number
sicDescription : "" # Description of SIC category
insiderTransactionForOwnerExists : int # Will be 1 if the filer is an "insider" who has filed beneficial ownership filings (form 4 etc). 0 otherwise
insiderTransactionForIssuerExists: int # Will be set to 1 if the filer is an issuer to insiders/beneficial owners and has filed related filings. 0 otherwise
name : "" # Name of entity
tickers : [] # List of tickers connected to filer
exchanges : [] # List of names of exchanges of respective tickers
ein : "" # Employer ID number
category : "" # Description of filing category (i.e. "Large accelerated filer")
fiscalYearEnd : "MMDD" # End of company's fiscal year
stateOfIncorporation : "" # State of incorp
stateOfIncorporationDescription : "" # Generally same as above
addresses : [] # Contains one dict for mailing address and one for business address
phone : "" # Phone number
formerNames : [] # Contains one dict for each former name the company held 
```



In [4]:
# Get SIC code and description given a CIK. Useful when CIK has been procured from other sources/endpoints which did not give SIC alongside
def get_sic_from_cik(target_cik):

  sic_dict = {
      "sic" : "",
      "desc" : ""
  }

  # Request page
  request_headers = { "User-Agent" : "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/105.0.0.0 Safari/537.36" }
  submissions_url = r"https://data.sec.gov/submissions/CIK{}.json".format(str(target_cik).zfill(10))
  
  response = requests.get(url = submissions_url, headers = request_headers)
  response.raise_for_status()

  # Get SIC and description
  json_data = response.json()
  try:
    sic_dict["sic"] = json_data["sic"]
    sic_dict["desc"] = json_data["sicDescription"]
  except:
    print("Submissions JSON response formatting not as excpected. Check structure")

  return sic_dict