In [2]:
from sec_edgar_downloader import Downloader
import os

# Initialize Downloader
dl = Downloader("/Users/petersapountzis/Desktop/tulane/fall2024/cmps4010/Entergy-AI/data/SO-10QHTML", 
                email_address="peter.sapountzis@gmail.com")

# Download 10-Q filings
print("Starting download...")
try:
    dl.get("10-Q", "788784")  # Adjust amount as needed
    print("Download complete!")
except Exception as e:
    print(f"Error during download: {e}")

# Verify the downloaded files
for root, dirs, files in os.walk("/Users/petersapountzis/Desktop/tulane/fall2024/cmps4010/Entergy-AI/data/SO-10QHTML"):
    for name in files:
        print(f"Downloaded file: {os.path.join(root, name)}")


Starting download...
Download complete!


In [7]:
import os
import re
import json
import uuid

# Input and output directories
input_dir = "/Users/petersapountzis/Desktop/tulane/fall2024/cmps4010/Entergy-AI/parsers/sec-edgar-filings/0000788784/10-Q"  # Change to your directory
output_dir = "/Users/petersapountzis/Desktop/tulane/fall2024/cmps4010/Entergy-AI/data/pseg_10q_json"  # Change to your output directory
os.makedirs(output_dir, exist_ok=True)  # Ensure output directory exists

KEY_INDICATORS = [
    "Operating revenues", "Net income", "Total assets", 
    "Total liabilities", "Cash and cash equivalents", 
    "Depreciation and amortization", "Interest expense"
]

def extract_date(content):
    """Extract the report date from the content."""
    match = re.search(r"For the (quarter|year) ended (\w+ \d{1,2}, \d{4})", content, re.IGNORECASE)
    if match:
        return match.group(2).replace(" ", "-")
    return f"unknown_{uuid.uuid4()}"

def extract_table_data(content):
    """Extract financial data from text tables."""
    financial_data = {}
    for indicator in KEY_INDICATORS:
        pattern = rf"{indicator}.*?([\d\(\),\-\.]+).*?([\d\(\),\-\.]+).*?([\d\(\),\-\.]+)"
        match = re.search(pattern, content, re.IGNORECASE | re.DOTALL)
        if match:
            values = [convert_to_number(value) for value in match.groups()]
            financial_data[indicator] = {
                "Utility": values[0],
                "Parent & Other": values[1],
                "Total": values[2]
            }
    return financial_data

def convert_to_number(value):
    """Convert a string value to a number or None."""
    value = value.replace(",", "").replace("(", "-").replace(")", "").strip()
    if value in {"", "-", "—"}:
        return None
    try:
        return float(value)
    except ValueError:
        return value

def parse_txt_file(file_path):
    """Parse a single 10-Q text file and extract data."""
    with open(file_path, 'r', encoding='utf-8') as f:
        content = f.read()

    report_date = extract_date(content)
    financial_data = extract_table_data(content)
    financial_data["Report Date"] = report_date

    return financial_data

def save_to_json(data, filename):
    """Save extracted data to a JSON file."""
    output_path = os.path.join(output_dir, filename)
    with open(output_path, 'w', encoding='utf-8') as f:
        json.dump(data, f, indent=4)

def main():
    """Main function to iterate through text files and extract financial data."""
    for root, _, files in os.walk(input_dir):
        for file in files:
            if file.endswith(".txt"):
                file_path = os.path.join(root, file)
                print(f"Parsing {file_path}...")

                financial_data = parse_txt_file(file_path)
                json_filename = f"{financial_data['Report Date']}.json"

                save_to_json(financial_data, json_filename)
                print(f"Saved parsed data to {json_filename}")

if __name__ == "__main__":
    main()


Parsing /Users/petersapountzis/Desktop/tulane/fall2024/cmps4010/Entergy-AI/parsers/sec-edgar-filings/0000788784/10-Q/0001628280-22-020260/full-submission.txt...
Saved parsed data to unknown_92ad6bcc-e975-454a-874d-e9bfc03eb8bd.json
Parsing /Users/petersapountzis/Desktop/tulane/fall2024/cmps4010/Entergy-AI/parsers/sec-edgar-filings/0000788784/10-Q/0000788784-13-000013/full-submission.txt...
Saved parsed data to June-30,-2013.json
Parsing /Users/petersapountzis/Desktop/tulane/fall2024/cmps4010/Entergy-AI/parsers/sec-edgar-filings/0000788784/10-Q/0000788784-95-000009/full-submission.txt...
Saved parsed data to unknown_f46dc136-e438-445e-a838-b94a07439a38.json
Parsing /Users/petersapountzis/Desktop/tulane/fall2024/cmps4010/Entergy-AI/parsers/sec-edgar-filings/0000788784/10-Q/0001193125-12-332159/full-submission.txt...
Saved parsed data to unknown_690edabf-7137-45c4-9afe-a472bbb0f1d7.json
Parsing /Users/petersapountzis/Desktop/tulane/fall2024/cmps4010/Entergy-AI/parsers/sec-edgar-filings/00