In [3]:
!pip install -U sec-edgar-downloader
!pip install openai
!pip install pandas
!pip install matplotlib
!pip install numpy




In [4]:
import os
import openai
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np

openai.api_key = API_KEY_HERE

Matplotlib is building the font cache; this may take a moment.


In [10]:
from sec_edgar_downloader import Downloader

In [11]:
dl = Downloader("ShuklaInc", "adityashukla2015@gmail.com")
print(dl.get("10-K", "MSFT", after="1994-12-31", before="2024-01-01"))
print(dl.get("10-K", "TSLA", after="1994-12-31", before="2024-01-01"))
print(dl.get("10-K", "AAPL", after="1994-12-31", before="2024-01-01"))

29
13
27


In [6]:
def extract_insights(company_folder):
    """
    Extract insights from the 10-K filings of a company.

    Args:
    - company_folder (str): Path to the folder containing the company's 10-K filings.

    Returns:
    - tuple: A tuple containing the company name (str) and the concatenated text of all 10-K filings (str).
    """
    report_text = ""
    for submission_folder in os.listdir(company_folder):
        submission_folder_path = os.path.join(company_folder, submission_folder)
        if os.path.isdir(submission_folder_path):
            submission_file_path = os.path.join(submission_folder_path, 'submission.txt')
            if os.path.exists(submission_file_path):
                with open(submission_file_path, 'r') as file:
                    submission_text = file.read()
                    report_text += submission_text + "\n"
                    print(f"Read report from: {submission_file_path}\n{submission_text}")  #Debug statement

    company_name = os.path.basename(company_folder)
    return company_name, report_text

def analyze_company(company_folder):
    """
    Analyze a company's 10-K filings to extract insights using OpenAI's API.

    Args:
    - company_folder (str): Path to the folder containing the company's 10-K filings.

    Returns:
    - tuple: A tuple containing the company name (str) and the extracted insights (str).
    """
    company_name, report_text = extract_insights(company_folder)
    
    prompt = f"""
    You are an analyst examining {company_name}'s 10-K filings. Extract the following insights:
    1. Revenue and Net Income trends/growth percentage.
    2. Total Debt.
    3. Gross Margin and Percentage.
    4. Capital Expenditure.
    5. Effective Tax Rate and Deferred Tax Assets/Liabilities.
    6. Number of Shares Outstanding
    7. Foreign Income Percentage.
    8. Share Buy-Back
    And then give two more custom insights which you deem as important
    
    Here are the reports:
    {report_text}
    
    Provide the insights in a structured format.
    """
    
    try:
        response = openai.Completion.create(
            model="text-davinci-003",
            prompt=prompt,
            max_tokens=1500,
            n=1,
            stop=None,
            temperature=0.7,
        )
        return company_name, response['choices'][0]['text'].strip()
    except Exception as e:
        print(f"Error extracting insights: {e}")  #Debug statement
        return company_name, ""
    
def visualize_insights(insights):
    """
    Visualize the importance of insights using a horizontal bar chart.

    Args:
    - insights (dict): A dictionary mapping insights to their importance values.

    Returns:
    - None
    """
    #Extracted insights
    insights_data = {
        "Revenue and Net Income trends/growth percentage": 10,
        "Total Debt": 5,
        "Gross Margin and Percentage": 8,
        "Capital Expenditure": 3,
        "Effective Tax Rate and Deferred Tax Assets/Liabilities": 7,
        "Number of Shares Outstanding": 6,
        "Foreign Income Percentage": 4,
        "Share Buy-Back": 2,
        "Custom Insight 1": 9,
        "Custom Insight 2": 11
    }

    #Sort insights by value
    sorted_insights = {k: v for k, v in sorted(insights_data.items(), key=lambda item: item[1], reverse=True)}

    #Plotting
    plt.figure(figsize=(12, 8))
    plt.barh(list(sorted_insights.keys()), list(sorted_insights.values()), color='skyblue')
    plt.xlabel('Importance')
    plt.ylabel('Insights')
    plt.title('Importance of Insights')
    plt.gca().invert_yaxis()  #Invert y-axis to display most important at the top
    plt.show()

    #Example usage:
    insights = {
        "Revenue and Net Income trends/growth percentage": 10,
        "Total Debt": 5,
        "Gross Margin and Percentage": 8,
        "Capital Expenditure": 3,
        "Effective Tax Rate and Deferred Tax Assets/Liabilities": 7,
        "Number of Shares Outstanding": 6,
        "Foreign Income Percentage": 4,
        "Share Buy-Back": 2,
        "Custom Insight 1": 9,
        "Custom Insight 2": 11
    }
    visualize_insights(insights)

In [None]:
base_folder = '/Users/adi_shukla/Documents/sec-edgar-proj/sec-edgar-filings'
companies = ['AAPL', 'MSFT', 'TSLA']
all_insights = []

#Iterate over each company
for company in companies:
    company_folder = os.path.join(base_folder, company, '10-K')
    print(f"Analyzing reports for {company} in folder: {company_folder}\n")
    #Analyze the company's 10-K filings and extract insights
    company_name, insights = analyze_company(company_folder)
    if insights:
        #Append the company name and its insights to the list
        all_insights.append({
            "Company": company_name,
            "Insights": insights
        })
        print(f"Company: {company_name}")
        print(f"Insights:\n{insights}\n")
        print("-" * 80)
    else:
        print(f"No insights found for {company_name}.\n")

print("Analysis completed.")