In [8]:
import pandas as pd
import requests
import os
import logging


In [9]:
# Configure logging to output log messages to the console
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
logger = logging.getLogger(__name__)

def getCompanyFiles(ticker: str):
    headers = {'User-Agent': 'PECO'}  # Define headers to mimic a browser request
    url = 'https://www.sec.gov/files/company_tickers.json'  # URL to fetch the company tickers
    
    try:
        # Send a GET request to the SEC URL
        response = requests.get(url, headers=headers)
        response.raise_for_status()  # Raise an HTTPError for bad responses (4xx and 5xx)
        
        # Convert JSON response to DataFrame
        company_df = pd.DataFrame.from_dict(response.json(), orient='index')
        company_df.set_index('ticker', inplace=True)  # Set the ticker symbol as the index
        company_df['cik_str'] = company_df['cik_str'].astype(str).str.zfill(10)  # Zero-fill CIK codes to 10 digits
        logger.info("Company tickers fetched successfully.")
    except requests.RequestException as e:
        # Log error if there is a problem with the request
        logger.error(f"Error fetching company tickers: {e}")
        return
    except ValueError as e:
        # Log error if there is a problem with JSON parsing
        logger.error(f"Error parsing company tickers JSON: {e}")
        return
    
    try:
        # Get the CIK code for the given ticker
        cik = company_df.at[ticker, 'cik_str']
    except KeyError:
        # Log error if the ticker is not found
        logger.error(f"Ticker {ticker} not found in the company tickers list.")
        return
    
    filing_url = f'https://data.sec.gov/submissions/CIK{cik}.json'  # URL to fetch filing metadata for the company
    try:
        # Send a GET request to the filing URL
        filing_metadata = requests.get(filing_url, headers=headers)
        filing_metadata.raise_for_status()  # Raise an HTTPError for bad responses
        
        # Convert JSON response to DataFrame
        filings_data = filing_metadata.json()
        filings = pd.DataFrame.from_dict(filings_data['filings']['recent'])
        filings = filings.loc[filings['form'].isin(['10-K', '10-Q'])]  # Filter for 10-K and 10-Q forms
        
        # Construct the download URLs for the filings
        filings['htmDownloadURL'] = (
            'https://www.sec.gov/Archives/edgar/data/' +
            cik.lstrip('0') + "/" +
            filings['accessionNumber'].str.replace("-", "") + "/" +
            filings['primaryDocument']
        )
        logger.info(f"Filing metadata fetched successfully for CIK {cik}.")
    except requests.RequestException as e:
        # Log error if there is a problem with the request
        logger.error(f"Error fetching filing metadata for CIK {cik}: {e}")
        return
    except ValueError as e:
        # Log error if there is a problem with JSON parsing
        logger.error(f"Error parsing filing metadata JSON: {e}")
        return
    
    # Create a directory for the ticker if it doesn't exist
    os.makedirs(ticker, exist_ok=True)
    
    # Iterate over each filing and download the document
    for index, item in filings.iterrows():
        try:
            # Send a GET request to the filing download URL
            r = requests.get(item['htmDownloadURL'], headers=headers)
            r.raise_for_status()  # Raise an HTTPError for bad responses
            
            # Construct the file path within the ticker's directory
            file_path = os.path.join(ticker, item['primaryDocument'])
            
            # Write the content to the file
            with open(file_path, 'wb') as file:
                file.write(r.content)
            logger.info(f"Downloaded {item['primaryDocument']} to {file_path}")
        except requests.RequestException as e:
            # Log error if there is a problem downloading the document
            logger.error(f"Error downloading document {item['primaryDocument']}: {e}")

In [10]:
# Example usage:
getCompanyFiles('AAPL')