In [1]:
print('a')

a


In [1]:
import requests
import json
import time
import re
import os

# --- Configuration ---
# IMPORTANT: Replace with your actual information
YOUR_COMPANY_NAME = "MyCompanyName"
YOUR_CONTACT_EMAIL = "contact@mycompany.com"
USER_AGENT = f"{YOUR_COMPANY_NAME} {YOUR_CONTACT_EMAIL}"

# Base URL for SEC EDGAR filings
SEC_EDGAR_BASE_URL = "https://www.sec.gov/Archives/edgar/data/"
# URL for CIK lookup
CIK_LOOKUP_URL = "https://www.sec.gov/files/company_tickers.json"
# Base URL for company submissions data (contains filing info)
SUBMISSIONS_BASE_URL = "https://data.sec.gov/submissions/"

# --- Helper Functions ---

def get_cik_from_ticker(ticker):
    """
    Looks up the CIK for a given ticker symbol.

    Args:
        ticker (str): The stock ticker symbol.

    Returns:
        str: The CIK for the ticker, or None if not found.
    """
    print(f"Looking up CIK for ticker: {ticker}...")
    try:
        headers = {'User-Agent': USER_AGENT}
        response = requests.get(CIK_LOOKUP_URL, headers=headers)
        response.raise_for_status() # Raise an exception for bad status codes
        data = response.json()

        # The JSON is a list of dictionaries, each with 'cik_str', 'ticker', 'title'
        for company_info in data:
            if company_info['ticker'].upper() == ticker.upper():
                # CIKs are stored as integers in the JSON, convert to string
                # Also pad with leading zeros to 10 digits if needed for some URLs
                return str(company_info['cik_str']).zfill(10)

        print(f"CIK not found for ticker: {ticker}")
        return None

    except requests.exceptions.RequestException as e:
        print(f"Error fetching CIK lookup data: {e}")
        return None
    except json.JSONDecodeError as e:
        print(f"Error parsing CIK lookup JSON: {e}")
        return None
    except KeyError as e:
         print(f"Error accessing expected key in CIK lookup data: {e}")
         return None


def find_10k_filings(cik, limit=1):
    """
    Finds recent 10-K filings for a given CIK.

    Args:
        cik (str): The company's CIK.
        limit (int): The maximum number of recent 10-K filings to find.

    Returns:
        list: A list of dictionaries, each containing 'accessionNumber' and 'filingDate'
              for the found 10-K filings.
    """
    print(f"Searching for 10-K filings for CIK: {cik}...")
    try:
        # The submissions endpoint URL structure
        submissions_url = f"{SUBMISSIONS_BASE_URL}CIK{cik}.json"
        headers = {'User-Agent': USER_AGENT}

        response = requests.get(submissions_url, headers=headers)
        response.raise_for_status()
        data = response.json()

        filings = []

        # Attempt to find recent filings first
        filings_data = data.get('filings', {})
        recent_filings = filings_data.get('recent', {})

        forms_recent = recent_filings.get('form')
        accessions_recent = recent_filings.get('accessionNumber')
        dates_recent = recent_filings.get('filingDate')

        # Check if recent filings data is in the expected list format and lengths match
        if isinstance(forms_recent, list) and isinstance(accessions_recent, list) and isinstance(dates_recent, list) and \
           len(forms_recent) == len(accessions_recent) and len(forms_recent) == len(dates_recent):

            for i in range(len(forms_recent)):
                try: # Use a try block here just in case indexing still fails for some reason
                    form_type = forms_recent[i]
                    if form_type == '10-K':
                        filings.append({
                            'accessionNumber': accessions_recent[i],
                            'filingDate': dates_recent[i]
                        })
                        if len(filings) >= limit:
                            break # Found enough recent filings
                except IndexError:
                     print(f"Warning: Index error while processing recent filings for CIK {cik}. Skipping.")
                     break # Exit loop if lists aren't aligned as expected

        else:
            # This warning might indicate an unexpected structure or missing data
            print(f"Warning: Unexpected or incomplete 'recent' filings structure for CIK {cik}. Forms: {type(forms_recent)}, Accessions: {type(accessions_recent)}, Dates: {type(dates_recent)}. Trying older filings...")


        # If limit not reached, try older filings (structure is different - list of item lists)
        if len(filings) < limit:
            older_filings_groups = filings_data.get('files', [])

            if isinstance(older_filings_groups, list):
                for filing_group in older_filings_groups:
                    # Each group is a dictionary, expecting 'form' and 'items' keys
                    if isinstance(filing_group, dict) and filing_group.get('form') == '10-K':
                        items = filing_group.get('items', [])
                        if isinstance(items, list):
                             for item in items:
                                 # Each item is a dictionary, expecting 'accessionNumber' and 'filingDate'
                                 if isinstance(item, dict):
                                     acc_num = item.get('accessionNumber')
                                     file_date = item.get('filingDate')
                                     if acc_num and file_date: # Basic check for required keys
                                         filings.append({
                                             'accessionNumber': acc_num,
                                             'filingDate': file_date
                                         })
                                         if len(filings) >= limit:
                                             break # Found enough total filings
                             if len(filings) >= limit:
                                break # Stop processing groups if limit is reached
                        else:
                             print(f"Warning: Unexpected 'items' structure in older filings group for CIK {cik}. Skipping this group.")
            else:
                print(f"Warning: Unexpected 'files' filings structure for CIK {cik}. Skipping older filings.")


        if not filings:
            print(f"No 10-K filings found for CIK: {cik}")

        # Return the found filings, up to the limit
        return filings[:limit]

    except requests.exceptions.RequestException as e:
        print(f"Error fetching submissions data for CIK {cik}: {e}")
        return []
    except json.JSONDecodeError as e:
        print(f"Error parsing submissions JSON for CIK {cik}: {e}")
        return []
    except KeyError as e:
        print(f"Unexpected JSON structure for CIK {cik}: Missing key {e}")
        return []


def download_filing_html(cik, accession_number, output_dir="filings"):
    """
    Downloads the primary HTML document for a given filing.

    Args:
        cik (str): The company's CIK.
        accession_number (str): The filing's accession number (with dashes).
        output_dir (str): Directory to save the downloaded file.

    Returns:
        str: The path to the downloaded file, or None if download failed.
    """
    # The accession number in the URL path needs dashes removed
    accession_number_clean = accession_number.replace('-', '')

    # Construct the URL for the primary document HTML
    # Filenames are usually the accession number with .html extension
    html_filename = f"{accession_number}.html"
    filing_url = f"{SEC_EDGAR_BASE_URL}{cik}/{accession_number_clean}/{html_filename}"

    print(f"Downloading filing: {filing_url}")

    try:
        headers = {'User-Agent': USER_AGENT}
        response = requests.get(filing_url, headers=headers)
        response.raise_for_status() # Raise an exception for bad status codes

        # Create output directory if it doesn't exist
        if not os.path.exists(output_dir):
            os.makedirs(output_dir)

        # Save the HTML content to a file
        file_path = os.path.join(output_dir, f"{accession_number}.html")
        # Use errors='ignore' or 'replace' if encoding issues occur, though utf-8 is standard
        with open(file_path, 'w', encoding='utf-8') as f:
            f.write(response.text)

        print(f"Downloaded successfully to: {file_path}")
        return file_path

    except requests.exceptions.RequestException as e:
        print(f"Error downloading filing {accession_number}: {e}")
        return None
    except Exception as e: # Catch other potential errors during file writing etc.
        print(f"An unexpected error occurred while downloading {accession_number}: {e}")
        return None


# --- Main Execution ---

if __name__ == "__main__":
    # Example usage:
    ticker_symbol = 'A'
    num_filings_to_get = int(1)

    # 1. Get CIK from ticker
    cik = get_cik_from_ticker(ticker_symbol)

    if cik:
        # Be polite
        time.sleep(0.5) # Shorter sleep after CIK lookup

        # 2. Find recent 10-K filings
        filings_info = find_10k_filings(cik, limit=num_filings_to_get)

        if filings_info:
            print(f"\nFound {len(filings_info)} 10-K filings.")
            downloaded_files = []
            for filing in filings_info:
                acc_num = filing['accessionNumber']
                file_date = filing['filingDate']
                print(f"\nProcessing filing: {acc_num} from {file_date}")

                # 3. Download the HTML filing
                downloaded_path = download_filing_html(cik, acc_num)

                if downloaded_path:
                    downloaded_files.append(downloaded_path)

                # Be polite between downloads
                time.sleep(1) # Longer sleep between filing downloads

            if downloaded_files:
                print("\nDownloaded 10-K filings:")
                for f_path in downloaded_files:
                    print(f_path)
                print("\nNote: To extract sections (like Business and Risk Factors) from these HTML files,")
                print("you'll need to use an HTML parser (like BeautifulSoup) and locate the relevant headings.")
            else:
                print("\nNo filings were successfully downloaded.")
        else:
            print(f"Could not find any 10-K filings for ticker {ticker_symbol}.")

    else:
        print(f"Could not proceed without finding the CIK for {ticker_symbol}.")

Looking up CIK for ticker: A...


TypeError: string indices must be integers