In [9]:
import requests
import re
from datetime import datetime

In [10]:
cik_num = '320193' # currently AAPL (Apple Inc)

In [11]:
base_url = r"https://www.sec.gov/Archives/edgar/data"
HEADERS = {
    'User-Agent': 'Mozilla/5.0 (iPad; CPU OS 12_2 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Mobile/15E148'
}

In [12]:
# Get the filings in JSON format
filings_url = f"{base_url}/{cik_num}/index.json"
content = requests.get(filings_url, headers=HEADERS)
decoded_content = content.json()

In [13]:
# Define the desired form types
desired_form_types = ['10-K', '10-Q']

# Get the current year
current_year = datetime.now().year

In [15]:
# Initialize a list to store the relevant filings
relevant_filings = []

# Iterate through the filings
for filing_number in decoded_content['directory']['item']:
    filing_num = filing_number['name']
    
    # Define the filing URL in JSON format
    filing_url = f"{base_url}/{cik_num}/{filing_num}/index.json"

    # Get the documents submitted for that filing
    content = requests.get(filing_url, headers=HEADERS)
    document_content = content.json()

    # Iterate through the documents in the filing
    for document in document_content['directory']['item']:
        document_name = document['name']

        # Check if the document is a ".txt" file
        if document_name.endswith('.txt'):
            # Define the URL of the ".txt" file
            txt_file_url = f"{base_url}/{cik_num}/{filing_num}/{document_name}"

            # Get the contents of the ".txt" file
            txt_file_content = requests.get(txt_file_url, headers=HEADERS).text

            # Extract the filing date
            match_date = re.search(r'(\d{8})\.txt', document_name)
            if match_date:
                filing_date = match_date.group(1)
                filing_year = int(filing_date[:4])

                # Check if the filing is a 10-K or 10-Q for the current year
                if (
                    filing_year == current_year
                    and any(form_type in txt_file_content for form_type in desired_form_types)
                ):
                    print('Found relevant filing! Adding to list :)')
                    relevant_filings.append({
                        "date": filing_date,
                        "form_type": re.search(r'FORM TYPE:[^\n]*\n([^\n]*)', txt_file_content).group(1).strip(),
                        "file_url": txt_file_url,
                    })