In [1]:
from edgar_functions import *
from headers import headers

ticker = "GME"

In [2]:
def get_label_dictionary(ticker, headers):
    facts = get_facts(ticker, headers)
    us_gaap_data = facts["facts"]["us-gaap"]
    labels_dict = {fact: details["label"] for fact, details in us_gaap_data.items()}
    return labels_dict

In [3]:
label_dict = get_label_dictionary(ticker, headers)

In [4]:
accn = get_filtered_filings(
    ticker, ten_k=True, just_accession_numbers=False, headers=headers
)
acc_num = accn["accessionNumber"].iloc[0].replace("-", "")
soup = get_statement_soup(
    ticker,
    acc_num,
    "balance_sheet",
    headers=headers,
    statement_keys_map=statement_keys_map,
)
statement = process_one_statement(ticker, acc_num, "balance_sheet")

In [5]:
rename_statement(statement, label_dict)

Unnamed: 0,2024-02-03,2023-01-28
us-gaap_AssetsCurrentAbstract,,
"Cash and Cash Equivalents, at Carrying Value",921700.0,1139000.0
"Marketable Securities, Current",277600.0,251600.0
"Receivables, Net, Current",91000.0,153900.0
"Inventory, Net",632500.0,682900.0
"Prepaid Expense, Current",51400.0,96300.0
"Assets, Current",1974200.0,2323700.0
"Property, Plant and Equipment, Net",94900.0,136500.0
"Operating Lease, Right-of-Use Asset",555800.0,560800.0
"Deferred Income Tax Assets, Net",17300.0,18300.0


In [6]:
import pandas as pd
from edgar_functions import (
    get_filtered_filings,
    process_one_statement,
    get_label_dictionary,
    rename_statement,
    headers,
)
from datetime import datetime, timedelta


def pull_edgar_data(ticker, years_back=10, statement_type="10-K"):
    """
    Pulls financial data for a given ticker from SEC EDGAR for the specified number of years.

    Args:
        ticker (str): The stock ticker symbol.
        years_back (int): Number of years of data to retrieve (default is 10).
        statement_type (str): Type of filing to retrieve ('10-K' for annual, '10-Q' for quarterly).

    Returns:
        dict: A dictionary containing DataFrames for balance sheet, income statement, and cash flow statement.
    """
    # Calculate the start date
    end_date = datetime.now()
    start_date = end_date - timedelta(days=years_back * 365)

    # Get the filings
    filings = get_filtered_filings(
        ticker, ten_k=(statement_type == "10-K"), headers=headers
    )
    # filings = filings[(filings.index >= start_date) & (filings.index <= end_date)]

    # Initialize dictionaries to store data
    balance_sheets = {}
    income_statements = {}
    cash_flow_statements = {}

    # Process each filing
    for date, row in filings.iterrows():
        accession_number = row["accessionNumber"].replace("-", "")

        # Process each statement type
        balance_sheet = process_one_statement(ticker, accession_number, "balance_sheet")
        income_statement = process_one_statement(
            ticker, accession_number, "income_statement"
        )
        cash_flow = process_one_statement(
            ticker, accession_number, "cash_flow_statement"
        )

        # Store the processed statements
        if balance_sheet is not None:
            balance_sheets[date] = balance_sheet
        if income_statement is not None:
            income_statements[date] = income_statement
        if cash_flow is not None:
            cash_flow_statements[date] = cash_flow

    # Combine the statements
    combined_balance_sheet = pd.concat(balance_sheets.values(), axis=1)
    combined_income_statement = pd.concat(income_statements.values(), axis=1)
    combined_cash_flow = pd.concat(cash_flow_statements.values(), axis=1)

    # Get the label dictionary
    label_dict = get_label_dictionary(ticker, headers)

    # Rename the statements
    renamed_balance_sheet = rename_statement(combined_balance_sheet, label_dict)
    renamed_income_statement = rename_statement(combined_income_statement, label_dict)
    renamed_cash_flow = rename_statement(combined_cash_flow, label_dict)

    return {
        "balance_sheet": renamed_balance_sheet,
        "income_statement": renamed_income_statement,
        "cash_flow_statement": renamed_cash_flow,
    }


# # Example usage
# if __name__ == "__main__":

In [7]:
ticker = "AAPL"  # Example ticker
edgar_data = pull_edgar_data(ticker, years_back=10)

# Print some information about the retrieved data
for statement_name, df in edgar_data.items():
    print(f"\n{statement_name.replace('_', ' ').title()}:")
    print(f"Shape: {df.shape}")
    print(f"Date range: {df.columns.min()} to {df.columns.max()}")
    print(f"Number of items: {len(df)}")
    print("\nFirst few rows:")
    print(df.head())

KeyboardInterrupt: 

In [8]:
import pandas as pd
from edgar_functions import (
    get_filtered_filings,
    process_one_statement,
    get_label_dictionary,
    headers,
)
from datetime import datetime, timedelta


def drop_non_unique_columns(df):
    return df.loc[:, ~df.columns.duplicated()].copy()


def process_statement(df):
    """
    Process a single statement: transpose, handle non-unique index, and sort by date.

    Args:
        df (pd.DataFrame): Input DataFrame representing a single statement.

    Returns:
        pd.DataFrame: Processed DataFrame.
    """
    # Transpose the DataFrame
    df_transposed = df.T
    df_transposed = drop_non_unique_columns(df_transposed)

    # Reset index to handle non-unique index after transposition
    df_transposed.reset_index(inplace=True)
    df_transposed.columns = ["Date"] + list(df_transposed.columns[1:])

    # Convert 'Date' to datetime
    df_transposed["Date"] = pd.to_datetime(df_transposed["Date"])

    # Sort by date and set it as index
    df_transposed.sort_values("Date", inplace=True)
    df_transposed.set_index("Date", inplace=True)

    return df_transposed


def pull_edgar_data(ticker, years_back=10, statement_type="10-K"):
    """
    Pulls financial data for a given ticker from SEC EDGAR for the specified number of years.

    Args:
        ticker (str): The stock ticker symbol.
        years_back (int): Number of years of data to retrieve (default is 10).
        statement_type (str): Type of filing to retrieve ('10-K' for annual, '10-Q' for quarterly).

    Returns:
        pd.DataFrame: A DataFrame containing combined data from balance sheet, income statement, and cash flow statement.
    """
    # Calculate the start date
    end_date = datetime.now()
    start_date = end_date - timedelta(days=years_back * 365)

    # Get the filings
    filings = get_filtered_filings(
        ticker, ten_k=(statement_type == "10-K"), headers=headers
    )
    # filings = filings[(filings.index >= start_date) & (filings.index <= end_date)]

    # Initialize DataFrames to store data
    balance_sheets = pd.DataFrame()
    income_statements = pd.DataFrame()
    cash_flow_statements = pd.DataFrame()

    # Process each filing
    for date, row in filings.iterrows():
        accession_number = row["accessionNumber"].replace("-", "")

        # Process each statement type
        balance_sheet = process_one_statement(ticker, accession_number, "balance_sheet")
        income_statement = process_one_statement(
            ticker, accession_number, "income_statement"
        )
        cash_flow = process_one_statement(
            ticker, accession_number, "cash_flow_statement"
        )

        # Process and concatenate the statements
        if balance_sheet is not None:
            balance_sheets = pd.concat(
                [balance_sheets, process_statement(balance_sheet)]
            )
        if income_statement is not None:
            income_statements = pd.concat(
                [income_statements, process_statement(income_statement)]
            )
        if cash_flow is not None:
            cash_flow_statements = pd.concat(
                [cash_flow_statements, process_statement(cash_flow)]
            )

    # Remove duplicate rows (keeping the first occurrence)
    balance_sheets = balance_sheets[~balance_sheets.index.duplicated(keep="first")]
    income_statements = income_statements[
        ~income_statements.index.duplicated(keep="first")
    ]
    cash_flow_statements = cash_flow_statements[
        ~cash_flow_statements.index.duplicated(keep="first")
    ]

    # Get the label dictionary
    label_dict = get_label_dictionary(ticker, headers)

    # Rename the statements
    balance_sheets.columns = [
        label_dict.get(col.split("_", 1)[-1], col) for col in balance_sheets.columns
    ]
    income_statements.columns = [
        label_dict.get(col.split("_", 1)[-1], col) for col in income_statements.columns
    ]
    cash_flow_statements.columns = [
        label_dict.get(col.split("_", 1)[-1], col)
        for col in cash_flow_statements.columns
    ]

    # Add prefixes to column names to distinguish between statement types
    balance_sheets = balance_sheets.add_prefix("BS_")
    income_statements = income_statements.add_prefix("IS_")
    cash_flow_statements = cash_flow_statements.add_prefix("CF_")

    # Join all statements
    combined_statements = balance_sheets.join(income_statements, how="outer").join(
        cash_flow_statements, how="outer"
    )

    # Reset index to make date a column
    combined_statements.reset_index(inplace=True)

    return combined_statements

In [9]:
# Example usage

ticker = "CTRN"  # Example ticker
edgar_data = pull_edgar_data(ticker, years_back=15)

# Print some information about the retrieved data
print(f"Shape of combined data: {edgar_data.shape}")
print(f"\nDate range: {edgar_data['Date'].min()} to {edgar_data['Date'].max()}")
print(f"\nNumber of columns: {len(edgar_data.columns)}")
print("\nFirst few rows:")
print(edgar_data.head())

# Optionally, save to CSV
# edgar_data.to_csv(f"{ticker}_financial_data.csv", index=False)

ERROR:root:Failed to get statement soup: Could not find statement file name for income_statement for accession number: 000155837023005840
ERROR:root:Failed to get statement soup: Could not find statement file name for income_statement for accession number: 000155837022005426
ERROR:root:Failed to get statement soup: Could not find statement file name for income_statement for accession number: 000155837021004258
ERROR:root:Failed to get statement soup: Could not find statement file name for income_statement for accession number: 000155837019003051
ERROR:root:Failed to get statement soup: Could not find statement file name for income_statement for accession number: 000155837018003051
ERROR:root:Failed to get statement soup: Could not find statement file name for income_statement for accession number: 000155837017002393
ERROR:root:Failed to get statement soup: Could not find statement file name for balance_sheet for accession number: 000110465911019862
ERROR:root:Failed to get statement so

Shape of combined data: (15, 116)

Date range: 2010-01-30 00:00:00 to 2024-02-03 00:00:00

Number of columns: 116

First few rows:
        Date  BS_us-gaap_AssetsCurrentAbstract  \
0 2010-01-30                               NaN   
1 2011-01-29                               NaN   
2 2012-01-28                               NaN   
3 2013-02-02                               NaN   
4 2014-02-01                               NaN   

   BS_Cash, Cash Equivalents, Restricted Cash and Restricted Cash Equivalents  \
0                                                NaN                            
1                                                NaN                            
2                                                NaN                            
3                                                NaN                            
4                                                NaN                            

   BS_Inventory, Net  BS_Prepaid Expense and Other Assets, Current  \
0          

In [10]:
edgar_data

Unnamed: 0,Date,BS_us-gaap_AssetsCurrentAbstract,"BS_Cash, Cash Equivalents, Restricted Cash and Restricted Cash Equivalents","BS_Inventory, Net","BS_Prepaid Expense and Other Assets, Current","BS_Income Taxes Receivable, Current","BS_Assets, Current","BS_Property, Plant and Equipment, Net","BS_Operating Lease, Right-of-Use Asset","BS_Deferred Income Tax Assets, Net",...,CF_us-gaap_NetCashProvidedByUsedInOperatingActivitiesContinuingOperationsAbstract,"CF_Net Income (Loss) Available to Common Stockholders, Basic","CF_Net Cash Provided by (Used in) Operating Activities, Continuing Operations","CF_Net Cash Provided by (Used in) Investing Activities, Continuing Operations","CF_Net Cash Provided by (Used in) Financing Activities, Continuing Operations",CF_Net Cash Provided by (Used in) Continuing Operations,"CF_Interest Paid, Including Capitalized Interest, Operating and Investing Activities",CF_ctrn_GainLossOnSaleOfFormerDistributionCenter,CF_ctrn_ProceedsFromSaleOfFormerDistributionCenter,"CF_Stock Repurchased During Period, Value"
0,2010-01-30,,,,,,,,,,...,,,,,,,,,,
1,2011-01-29,,,,,,,,,,...,,,,,,,,,,
2,2012-01-28,,,,,,,,,,...,,,,,,,,,,
3,2013-02-02,,,,,,,,,,...,,,,,,,,,,
4,2014-02-01,,,,,,,,,,...,,464.0,35368.0,-12784.0,-919.0,21665.0,127.0,-1526.0,2941.0,
5,2015-01-31,,,131057.0,14604.0,973.0,241357.0,47603.0,,,...,,8966.0,40326.0,-23518.0,-1222.0,15586.0,134.0,,,
6,2016-01-30,,,137020.0,12201.0,1285.0,222293.0,50632.0,,,...,,,,,,,,,,
7,2017-01-28,,,134649.0,13749.0,1635.0,237312.0,59280.0,,,...,,,,,,,,,,
8,2018-02-03,,48451.0,137701.0,15694.0,,233346.0,61777.0,,,...,,,,,,,,,,
9,2019-02-02,,17863.0,139841.0,17544.0,,225598.0,56224.0,,6539.0,...,,,,,,,,,,


In [11]:
edgar_data.to_csv("report.csv")