In [11]:
import os
import requests
from bs4 import BeautifulSoup

# Define headers to mimic a browser request (you should replace this with your real name and email as per SEC guidelines)
HEADERS = {
    "User-Agent": "Devin Davis (de545945@ucf.edu)"
}

# SEC EDGAR base URL
BASE_URL = "https://www.sec.gov/files/company_tickers.json"

# Create a directory to store filings
SAVE_DIR = "8K_Filings"
os.makedirs(SAVE_DIR, exist_ok=True)


def get_cik(ticker):
    """Fetch the CIK number for a given stock ticker."""
    ticker = ticker.lower()
    url = f"https://www.sec.gov/files/company_tickers.json"
    response = requests.get(url, headers=HEADERS)
    
    if response.status_code != 200:
        print("Failed to fetch ticker to CIK mapping.")
        return None

    data = response.json()

    for key, value in data.items():
        if value["ticker"].lower() == ticker:
            return str(value["cik_str"]).zfill(10)  # Format as 10-digit CIK

    return None


def get_8k_filings(cik, count=15):
    """Fetch the most recent 8-K filings for a given company CIK."""
    search_url = f"https://www.sec.gov/cgi-bin/browse-edgar?action=getcompany&CIK={cik}&type=8-K&count={count}&output=atom"
    response = requests.get(search_url, headers=HEADERS)

    if response.status_code != 200:
        print("Error fetching filings.")
        return []

    soup = BeautifulSoup(response.text, "xml")
    entries = soup.find_all("entry")
    filings = []

    for entry in entries:
        filing_href = entry.find("filing-href")
        if filing_href:
            filing_url = filing_href.text.replace("-index.htm", ".txt")  # Direct link to .txt filing
            filings.append(filing_url)

    return filings


def download_filing(filing_url, save_path):
    """Download and save an 8-K filing."""
    response = requests.get(filing_url, headers=HEADERS)

    if response.status_code == 200:
        with open(save_path, "w", encoding="utf-8") as f:
            f.write(response.text)
        print(f"Saved: {save_path}")
    else:
        print(f"Failed to download: {filing_url}")


def main():
    ticker = input("Enter stock ticker (e.g., AAPL): ").strip().upper()
    cik = get_cik(ticker)

    if not cik:
        print("Invalid ticker or CIK not found.")
        return

    print(f"Fetching 8-K filings for CIK: {cik}")
    filings = get_8k_filings(cik)

    if not filings:
        print("No recent 8-K filings found.")
        return

    for idx, filing_url in enumerate(filings):
        save_path = os.path.join(SAVE_DIR, f"{ticker}_8K_{idx + 1}.txt")
        download_filing(filing_url, save_path)


if __name__ == "__main__":
    main()


Enter stock ticker (e.g., AAPL):  cost


Fetching 8-K filings for CIK: 0000909832
Saved: 8K_Filings/COST_8K_1.txt
Saved: 8K_Filings/COST_8K_2.txt
Saved: 8K_Filings/COST_8K_3.txt
Saved: 8K_Filings/COST_8K_4.txt
Saved: 8K_Filings/COST_8K_5.txt
Saved: 8K_Filings/COST_8K_6.txt
Saved: 8K_Filings/COST_8K_7.txt
Saved: 8K_Filings/COST_8K_8.txt
Saved: 8K_Filings/COST_8K_9.txt
Saved: 8K_Filings/COST_8K_10.txt


In [14]:
import requests
from bs4 import BeautifulSoup
import os
import csv
import re
import ollama
import json
import time
import pandas as pd
from datetime import datetime
import subprocess 

# ----------------------------- CONFIGURATION ----------------------------- #
# Directory where 8-K filings are stored
SAVE_DIR = "8K_Filings"

# Output CSV file name
OUTPUT_CSV = "extracted_8K_products.csv"

# Model to use in OLLama
LLM_MODEL = "deepseek-r1:1.5b"  # Change this if using a different local model
# ------------------------------------------------------------------------ #


def query_local_llm(prompt, model=LLM_MODEL):
    """
    Query a local LLM (using OLLama) and return the response.
    Assumes OLLama is installed and running.
    """
    try:
        # Use subprocess to call OLLama with a prompt
        result = subprocess.run(
            ["ollama", "run", model, prompt],
            capture_output=True,
            text=True
        )
        return result.stdout.strip()
    except Exception as e:
        print(f"LLM query failed: {e}")
        return ""


def extract_metadata_from_filename(filename):
    """
    Extract stock ticker and order index from filename, e.g., 'AAPL_8K_1.txt'
    """
    match = re.match(r"([A-Z]+)_8K_(\d+)\.txt", filename)
    if match:
        return match.group(1), int(match.group(2))
    return None, None


def get_filing_timestamp(filing_text):
    """
    Try to extract the filing date from the text. 
    (Rough heuristic using common date patterns in SEC filings.)
    """
    date_pattern = r"(\d{4}-\d{2}-\d{2})"
    match = re.search(date_pattern, filing_text)
    if match:
        return match.group(1)
    else:
        # Default to today's date if not found
        return datetime.today().strftime('%Y-%m-%d')


def generate_llm_prompt(filing_text):
    """
    Generate a prompt for LLM to extract required information.
    """
    return f"""
You are analyzing an SEC Form 8-K filing. Your task is to extract and format the following information:

Company Name:
Stock Name:
Filing Time:
New Product:
Product Description (max 180 characters):

8-K Filing Text:
\"\"\"
{filing_text[:4000]}  # Only use first 4000 characters to fit into LLM context
\"\"\"

Return the answer in the following format:
Company Name: <company name>
Stock Name: <stock ticker>
Filing Time: <YYYY-MM-DD>
New Product: <product name>
Product Description: <concise product description>
"""


def parse_llm_response(response):
    """
    Parse the LLM response into a structured dictionary.
    """
    fields = {
        "Company Name": "",
        "Stock Name": "",
        "Filing Time": "",
        "New Product": "",
        "Product Description": "New Product Not Available"
    }

    for line in response.split("\n"):
        for key in fields.keys():
            if line.startswith(key + ":"):
                fields[key] = line.replace(key + ":", "").strip()

    return fields


def process_filings():
    """
    Main function to process all 8-K filings and save extracted data.
    """
    extracted_data = []

    for filename in os.listdir(SAVE_DIR):
        if filename.endswith(".txt"):
            file_path = os.path.join(SAVE_DIR, filename)
            print(f"Processing {file_path}...")

            with open(file_path, "r", encoding="utf-8") as f:
                filing_text = f.read()

            # Extract stock ticker from filename
            stock_name, index = extract_metadata_from_filename(filename)

            # Extract filing timestamp
            filing_time = get_filing_timestamp(filing_text)

            # Generate LLM prompt and query the model
            llm_prompt = generate_llm_prompt(filing_text)
            llm_response = query_local_llm(llm_prompt)

            print(f"LLM Response: \n{llm_response}\n")

            # Parse LLM response
            data = parse_llm_response(llm_response)

            # Fill in missing data if needed
            data["Stock Name"] = data["Stock Name"] or stock_name
            data["Filing Time"] = data["Filing Time"] or filing_time

            extracted_data.append(data)

    # Write to CSV
    with open(OUTPUT_CSV, "w", newline='', encoding="utf-8") as csvfile:
        fieldnames = ["Company Name", "Stock Name", "Filing Time", "New Product", "Product Description"]
        writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
        writer.writeheader()
        for row in extracted_data:
            writer.writerow(row)

    print(f"\nExtraction completed! Data saved to '{OUTPUT_CSV}'.")


if __name__ == "__main__":
    process_filings()


Processing 8K_Filings/JPM_8K_9.txt...
LLM Response: 
<think>
Alright, let me break down how I approached this task. 

First, I needed to extract the required information from the provided 8-K filing text. The user provided a detailed structure with several sections, each containing specific information like company name, stock ticker, filing time, new product, and product description.

Starting with Company Name: I noticed that in the document header, there's a line mentioning "JPMORGAN CHASE & CO". That seems to be the company's full name. Since the filing information is provided as a file number (0001193125-24-268103), the company name can be directly taken from the header.

Next, Stock Name: The stock ticker is usually placed in the same location as the company name but within the header. Here, it's written as "JPMORGAN CHASE & CO". So this is both the company name and the stock ticker since these are often used interchangeably for stocks of a company.

Filing Time: This is straight