In [43]:
import pandas as pd
import requests
import settings
import calendar

In [38]:

statement_keys_map = {
    "balance_sheet": [
        "balance sheet",
        "balance sheets",
        "statement of financial position",
        "consolidated balance sheets",
        "consolidated balance sheet",
        "consolidated financial position",
        "consolidated balance sheets - southern",
        "consolidated statements of financial position",
        "consolidated statement of financial position",
        "consolidated statements of financial condition",
        "combined and consolidated balance sheet",
        "condensed consolidated balance sheets",
        "consolidated balance sheets, as of december 31",
        "dow consolidated balance sheets",
        "consolidated balance sheets (unaudited)",
    ],
    "income_statement": [
        "income statement",
        "income statements",
        "statement of earnings (loss)",
        "statements of consolidated income",
        "consolidated statements of operations",
        "consolidated statement of operations",
        "consolidated statements of earnings",
        "consolidated statement of earnings",
        "consolidated statements of income",
        "consolidated statement of income",
        "consolidated income statements",
        "consolidated income statement",
        "condensed consolidated statements of earnings",
        "consolidated results of operations",
        "consolidated statements of income (loss)",
        "consolidated statements of income - southern",
        "consolidated statements of operations and comprehensive income",
        "consolidated statements of comprehensive income",
    ],
    "cash_flow_statement": [
        "cash flows statement",
        "cash flows statements",
        "statement of cash flows",
        "statements of consolidated cash flows",
        "consolidated statements of cash flows",
        "consolidated statement of cash flows",
        "consolidated statement of cash flow",
        "consolidated cash flows statements",
        "consolidated cash flow statements",
        "condensed consolidated statements of cash flows",
        "consolidated statements of cash flows (unaudited)",
        "consolidated statements of cash flows - southern",
    ],
}


In [2]:
headers = {"User-Agent": settings.email_address}
ticker = "TSLA"

In [3]:
def cik_matching_ticker(ticker, headers=headers):
    ticker = ticker.upper().replace(".", "_")
    ticker_json = requests.get("https://www.sec.gov/files/company_tickers.json", headers=headers).json()
    for company in ticker_json.values():
        if company["ticker"] == ticker:
            cik = str(company["cik_str"]).zfill(10)
            return cik
    raise ValueError(f'Ticker: {ticker} not found".format(ticker=ticker)')



In [4]:
cik = cik_matching_ticker('tsla')
print(cik)

0001318605


In [5]:
def get_submission_data_for_ticker(ticker, headers=headers, only_fillings_df=False):
    cik = cik_matching_ticker(ticker, headers=headers)
    headers = headers.copy()
    url = f"https://data.sec.gov/submissions/CIK{cik}.json"
    company_json = requests.get(url, headers=headers).json()
    if only_fillings_df:
        return pd.DataFrame(company_json["filings"]["recent"])
    else:
        return company_json


In [6]:
data = get_submission_data_for_ticker(ticker, only_fillings_df=True)

In [7]:
data.keys()

Index(['accessionNumber', 'filingDate', 'reportDate', 'acceptanceDateTime',
       'act', 'form', 'fileNumber', 'filmNumber', 'items', 'core_type', 'size',
       'isXBRL', 'isInlineXBRL', 'primaryDocument', 'primaryDocDescription'],
      dtype='object')

In [8]:
def get_filtered_filings(ticker, form_type='10-K', just_accession_numbers=False, headers=headers):
    company_filings_df = get_submission_data_for_ticker(ticker, only_fillings_df=True, headers=headers)
    if form_type is not None:
        df = company_filings_df[company_filings_df["form"] == form_type]
        if just_accession_numbers:
            df.set_index('reportDate', inplace=True)
            accession_df = df['accessionNumber']

            return accession_df
        else:
            return df
    else:
        raise ValueError("Must provide form_type")




In [9]:
filings = get_filtered_filings(ticker, form_type='10-K')

In [10]:
filings

Unnamed: 0,accessionNumber,filingDate,reportDate,acceptanceDateTime,act,form,fileNumber,filmNumber,items,core_type,size,isXBRL,isInlineXBRL,primaryDocument,primaryDocDescription
56,0001628280-25-003063,2025-01-30,2024-12-31,2025-01-30T01:42:33.000Z,34,10-K,001-34756,25570807,,XBRL,15788647,1,1,tsla-20241231.htm,10-K
170,0001628280-24-002390,2024-01-29,2023-12-31,2024-01-27T02:00:20.000Z,34,10-K,001-34756,24569853,,XBRL,15527801,1,1,tsla-20231231.htm,10-K
271,0000950170-23-001409,2023-01-31,2022-12-31,2023-01-31T02:29:15.000Z,34,10-K,001-34756,23570030,,XBRL,31445171,1,1,tsla-20221231.htm,10-K
371,0000950170-22-000796,2022-02-07,2021-12-31,2022-02-05T01:11:27.000Z,34,10-K,001-34756,22595227,,XBRL,29316024,1,1,tsla-20211231.htm,10-K
503,0001564590-21-004599,2021-02-08,2020-12-31,2021-02-08T12:27:23.000Z,34,10-K,001-34756,21598537,,XBRL,32860345,1,1,tsla-10k_20201231.htm,10-K
641,0001564590-20-004475,2020-02-13,2019-12-31,2020-02-13T12:12:18.000Z,34,10-K,001-34756,20606921,,XBRL,29961626,1,1,tsla-10k_20191231.htm,10-K
794,0001564590-19-003165,2019-02-19,2018-12-31,2019-02-19T11:10:16.000Z,34,10-K,001-34756,19613254,,10-K,30826751,1,0,tsla-10k_20181231.htm,10-K
901,0001564590-18-002956,2018-02-23,2017-12-31,2018-02-23T11:07:43.000Z,34,10-K,001-34756,18634585,,10-K,25498533,1,0,tsla-10k_20171231.htm,10-K


In [11]:
def get_facts(ticker, headers=headers):
    cik = cik_matching_ticker(ticker, headers=headers)
    url = f"https://data.sec.gov/api/xbrl/companyfacts/CIK{cik}.json"
    company_facts = requests.get(url, headers=headers).json()
    return company_facts



In [12]:
facts = get_facts(ticker)

In [13]:
def get_facts_df(ticker, headers=headers):
    facts = get_facts(ticker, headers=headers)
    us_gaap_data = facts["facts"]["us-gaap"]
    df_data = []
    for fact, details in us_gaap_data.items():
        for unit in details["units"]:
            for item in details["units"][unit]:
                row = item.copy()
                row["fact"] = fact
                df_data.append(row)

    df = pd.DataFrame(df_data)
    df["end"] = pd.to_datetime(df["end"])
    df["start"] = pd.to_datetime(df["start"])
    df.drop_duplicates(subset=["fact", "start", "end"], inplace=True)
    df.set_index("end", inplace=True)
    labels_dict = {fact: details["label"] for fact, details in us_gaap_data.items()}
    return df, labels_dict

In [14]:
facts, labels = get_facts_df(ticker)

In [15]:
facts

Unnamed: 0_level_0,val,accn,fy,fp,form,filed,fact,frame,start
end,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
2019-12-31,4.020000e+08,0001564590-20-047486,2020,Q3,10-Q,2020-10-26,AccountsAndNotesReceivableNet,,NaT
2020-09-30,3.420000e+08,0001564590-20-047486,2020,Q3,10-Q,2020-10-26,AccountsAndNotesReceivableNet,CY2020Q3I,NaT
2020-12-31,3.340000e+08,0001564590-21-004599,2020,FY,10-K,2021-02-08,AccountsAndNotesReceivableNet,,NaT
2021-03-31,3.240000e+08,0000950170-21-000046,2021,Q1,10-Q,2021-04-28,AccountsAndNotesReceivableNet,CY2021Q1I,NaT
2021-06-30,3.150000e+08,0000950170-21-000524,2021,Q2,10-Q,2021-07-27,AccountsAndNotesReceivableNet,CY2021Q2I,NaT
...,...,...,...,...,...,...,...,...,...
2022-12-31,0.000000e+00,0001628280-25-003063,2024,FY,10-K,2025-01-30,InterestIncomeShortTermInvestmentOther,CY2022,2022-01-01
2023-12-31,3.880000e+08,0001628280-25-003063,2024,FY,10-K,2025-01-30,InterestIncomeShortTermInvestmentOther,CY2023,2023-01-01
2024-12-31,7.630000e+08,0001628280-25-003063,2024,FY,10-K,2025-01-30,InterestIncomeShortTermInvestmentOther,CY2024,2024-01-01
2024-12-31,1.076000e+09,0001628280-25-018911,2025,Q1,10-Q,2025-04-23,CryptoAssetFairValueNoncurrent,CY2024Q4I,NaT


In [16]:
def annual_facts(ticker, headers=headers):
    accession_nums = get_filtered_filings(ticker, form_type='10-K', just_accession_numbers=True)
    df, label_dict = get_facts_df(ticker, headers)
    ten_k = df[df["accn"].isin(accession_nums)]
    ten_k = ten_k[ten_k.index.isin(accession_nums.index)]
    pivot = ten_k.pivot_table(values="val", columns="fact", index="end")
    pivot.rename(columns=label_dict, inplace=True)
    return pivot.T


In [17]:
accession_nums = get_filtered_filings(ticker, form_type="10-K", just_accession_numbers=True)

In [18]:
annual_facts("GOOG", headers=headers)


  ten_k = ten_k[ten_k.index.isin(accession_nums.index)]


end,2021-12-31,2022-12-31,2023-12-31,2024-12-31
fact,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
"Accounts Payable, Current",6.037000e+09,5.128000e+09,7.493000e+09,7.987000e+09
"Accounts Receivable, after Allowance for Credit Loss, Current",3.930400e+10,4.025800e+10,4.796400e+10,5.234000e+10
"Accrued Income Taxes, Current",8.080000e+08,,2.748000e+09,2.905000e+09
"Accrued Income Taxes, Noncurrent",9.176000e+09,9.258000e+09,8.474000e+09,8.782000e+09
"Accrued Liabilities, Current",3.123600e+10,3.786600e+10,4.616800e+10,5.122800e+10
...,...,...,...,...
Unrecognized Tax Benefits that Would Impact Effective Tax Rate,3.700000e+09,5.300000e+09,7.400000e+09,1.000000e+10
"Variable Interest Entity, Reporting Entity Involvement, Maximum Loss Exposure, Amount",2.900000e+09,2.800000e+09,5.700000e+09,
"Variable Lease, Cost",7.260000e+08,8.380000e+08,1.182000e+09,1.425000e+09
"Weighted Average Number of Shares Outstanding, Diluted",,1.315900e+10,1.272200e+10,1.244700e+10


In [19]:
def quarterly_facts(ticker, headers=headers):
    accession_nums = get_filtered_filings(
        ticker, form_type="10-Q", just_accession_numbers=True
    )
    df, label_dict = get_facts_df(ticker, headers)
    ten_q = df[df["accn"].isin(accession_nums)]
    ten_q = ten_q[ten_q.index.isin(accession_nums.index)].reset_index(drop=False)
    ten_q = ten_q.drop_duplicates(subset=["fact", "end"], keep="last")
    pivot = ten_q.pivot_table(values="val", columns="fact", index="end")
    pivot.rename(columns=label_dict, inplace=True)
    return pivot.T

In [20]:
quarterly_facts("GOOG", headers=headers)


  ten_q = ten_q[ten_q.index.isin(accession_nums.index)].reset_index(drop=False)


end,2022-03-31,2022-06-30,2022-09-30,2023-03-31,2023-06-30,2023-09-30,2024-03-31,2024-06-30,2024-09-30,2025-03-31
fact,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
"Accounts Payable, Current",3.436000e+09,4.409000e+09,6.303000e+09,4.184000e+09,5.313000e+09,5.803000e+09,6.198000e+09,6.092000e+09,7.049000e+09,8.497000e+09
"Accounts Receivable, after Allowance for Credit Loss, Current",3.470300e+10,3.570700e+10,3.469700e+10,3.603600e+10,3.880400e+10,4.102000e+10,4.455200e+10,4.708700e+10,4.910400e+10,5.100000e+10
"Accrued Income Taxes, Current",4.344000e+09,1.956000e+09,1.025000e+09,5.217000e+09,1.087000e+10,1.352800e+10,4.926000e+09,3.297000e+09,2.639000e+09,9.160000e+09
"Accrued Income Taxes, Noncurrent",9.406000e+09,8.163000e+09,8.572000e+09,9.722000e+09,8.753000e+09,8.038000e+09,9.234000e+09,7.703000e+09,8.219000e+09,9.773000e+09
"Accrued Liabilities, Current",3.305100e+10,3.297600e+10,3.503800e+10,4.318500e+10,4.930000e+10,5.560200e+10,4.860300e+10,4.729800e+10,4.658500e+10,5.830000e+10
...,...,...,...,...,...,...,...,...,...,...
Unrecognized Tax Benefits that Would Impact Effective Tax Rate,3.900000e+09,4.200000e+09,4.600000e+09,5.700000e+09,6.700000e+09,6.800000e+09,8.100000e+09,8.600000e+09,9.500000e+09,1.080000e+10
"Variable Interest Entity, Reporting Entity Involvement, Maximum Loss Exposure, Amount",2.900000e+09,2.400000e+09,2.600000e+09,2.700000e+09,3.000000e+09,5.500000e+09,6.900000e+09,6.900000e+09,,
"Variable Lease, Cost",,,,,,,3.430000e+08,,,3.600000e+08
"Weighted Average Number of Shares Outstanding, Diluted",,,,,1.276400e+10,1.269600e+10,1.252700e+10,1.249500e+10,1.241900e+10,1.229100e+10


In [21]:
from bs4 import BeautifulSoup


In [31]:
acc = get_filtered_filings(ticker, form_type="10-Q", just_accession_numbers=True)
acc_num = acc.iloc[0].replace('-','')

In [26]:
acc

reportDate
2025-03-31    0001628280-25-018911
2024-09-30    0001628280-24-043486
2024-06-30    0001628280-24-032662
2024-03-31    0001628280-24-017503
2023-09-30    0001628280-23-034847
2023-06-30    0000950170-23-033872
2023-03-31    0000950170-23-013890
2022-09-30    0000950170-22-019867
2022-06-30    0000950170-22-012936
2022-03-31    0000950170-22-006034
2021-09-30    0000950170-21-002253
2021-06-30    0000950170-21-000524
2021-03-31    0000950170-21-000046
2020-09-30    0001564590-20-047486
2020-06-30    0001564590-20-033670
2020-03-31    0001564590-20-019931
2019-09-30    0001564590-19-038256
2019-06-30    0001564590-19-026445
2019-03-31    0001564590-19-013462
2018-09-30    0001564590-18-026353
2018-06-30    0001564590-18-019254
2018-03-31    0001564590-18-011086
2017-09-30    0001564590-17-021343
2017-06-30    0001564590-17-015705
2017-03-31    0001564590-17-009968
Name: accessionNumber, dtype: object

In [28]:
import numpy as np
def _get_file_name(report):
    html_file_name_tag = report.find("HtmlFileName")
    xml_file_name_tag = report.find("XmlFileName")

    if html_file_name_tag:
        return html_file_name_tag.text
    elif xml_file_name_tag:
        return xml_file_name_tag.text
    else:
        return ""


def _is_statement_file(short_name_tag, long_name_tag, file_name):
    return (
        short_name_tag is not None
        and long_name_tag is not None
        and file_name  # Check if file_name is not an empty string
        and "Statement" in long_name_tag.text
    )


def get_statement_file_names_in_filing_summary(
    ticker, accession_number, headers=headers
):
    try:
        session = requests.Session()
        cik = cik_matching_ticker(ticker)
        base_link = f"https://www.sec.gov/Archives/edgar/data/{cik}/{accession_number}"
        filing_summary_link = f"{base_link}/FilingSummary.xml"
        filing_summary_response = session.get(
            filing_summary_link, headers=headers
        ).content.decode("utf-8")

        filing_summary_soup = BeautifulSoup(filing_summary_response, "lxml-xml")
        statement_file_names_dict = {}

        for report in filing_summary_soup.find_all("Report"):
            file_name = _get_file_name(report)
            short_name, long_name = report.find("ShortName"), report.find("LongName")

            if _is_statement_file(short_name, long_name, file_name):
                statement_file_names_dict[short_name.text.lower()] = file_name

        return statement_file_names_dict

    except requests.RequestException as e:
        print(f"An error occurred: {e}")
        return {}


def get_statement_soup(
    ticker,
    accession_number,
    statement_name,
    headers,
    statement_keys_map,
):
    """
    the statement_name should be one of the following:
    'balance_sheet'
    'income_statement'
    'cash_flow_statement'
    """
    session = requests.Session()

    cik = cik_matching_ticker(ticker)
    base_link = f"https://www.sec.gov/Archives/edgar/data/{cik}/{accession_number}"

    statement_file_name_dict = get_statement_file_names_in_filing_summary(
        ticker, accession_number, headers
    )

    statement_link = None
    for possible_key in statement_keys_map.get(statement_name.lower(), []):
        file_name = statement_file_name_dict.get(possible_key.lower())
        if file_name:
            statement_link = f"{base_link}/{file_name}"
            break

    if not statement_link:
        raise ValueError(f"Could not find statement file name for {statement_name}")

    try:
        statement_response = session.get(statement_link, headers=headers)
        statement_response.raise_for_status()  # Check if the request was successful

        if statement_link.endswith(".xml"):
            return BeautifulSoup(
                statement_response.content, "lxml-xml", from_encoding="utf-8"
            )
        else:
            return BeautifulSoup(statement_response.content, "lxml")

    except requests.RequestException as e:
        raise ValueError(f"Error fetching the statement: {e}")

In [32]:
get_statement_file_names_in_filing_summary(ticker, acc_num,headers=headers)

{'consolidated balance sheets': 'R2.htm',
 'consolidated balance sheets (parenthetical)': 'R3.htm',
 'consolidated statements of operations': 'R4.htm',
 'consolidated statements of comprehensive income': 'R5.htm',
 'consolidated statements of redeemable noncontrolling interest and equity': 'R6.htm',
 'consolidated statements of redeemable noncontrolling interest and equity (parenthetical)': 'R7.htm',
 'consolidated statements of cash flows': 'R8.htm'}

In [46]:
import numpy as np

def extract_columns_values_and_dates_from_statement(soup):
    """
    Extracts columns, values, and dates from an HTML soup object representing a financial statement.

    Args:
        soup (BeautifulSoup): The BeautifulSoup object of the HTML document.

    Returns:
        tuple: Tuple containing columns, values_set, and date_time_index.
    """
    columns = []
    values_set = []
    date_time_index = get_datetime_index_dates_from_statement(soup)

    for table in soup.find_all("table"):
        unit_multiplier = 1
        special_case = False

        # Check table headers for unit multipliers and special cases
        table_header = table.find("th")
        if table_header:
            header_text = table_header.get_text()
            # Determine unit multiplier based on header text
            if "in Thousands" in header_text:
                unit_multiplier = 1
            elif "in Millions" in header_text:
                unit_multiplier = 1000
            # Check for special case scenario
            if "unless otherwise specified" in header_text:
                special_case = True

        # Process each row of the table
        for row in table.select("tr"):
            onclick_elements = row.select("td.pl a, td.pl.custom a")
            if not onclick_elements:
                continue

            # Extract column title from 'onclick' attribute
            onclick_attr = onclick_elements[0]["onclick"]
            column_title = onclick_attr.split("defref_")[-1].split("',")[0]
            columns.append(column_title)

            # Initialize values array with NaNs
            values = [np.nan] * len(date_time_index)

            # Process each cell in the row
            for i, cell in enumerate(row.select("td.text, td.nump, td.num")):
                if "text" in cell.get("class"):
                    continue

                # Clean and parse cell value
                value = keep_numbers_and_decimals_only_in_string(
                    cell.text.replace("$", "")
                    .replace(",", "")
                    .replace("(", "")
                    .replace(")", "")
                    .strip()
                )
                if value:
                    value = float(value)
                    # Adjust value based on special case and cell class
                    if special_case:
                        value /= 1000
                    else:
                        if "nump" in cell.get("class"):
                            values[i] = value * unit_multiplier
                        else:
                            values[i] = -value * unit_multiplier

            values_set.append(values)

    return columns, values_set, date_time_index


def get_datetime_index_dates_from_statement(soup: BeautifulSoup) -> pd.DatetimeIndex:
    """
    Extracts datetime index dates from the HTML soup object of a financial statement.

    Args:
        soup (BeautifulSoup): The BeautifulSoup object of the HTML document.

    Returns:
        pd.DatetimeIndex: A Pandas DatetimeIndex object containing the extracted dates.
    """
    table_headers = soup.find_all("th", {"class": "th"})
    dates = [str(th.div.string) for th in table_headers if th.div and th.div.string]
    dates = [standardize_date(date).replace(".", "") for date in dates]
    index_dates = pd.to_datetime(dates)
    return index_dates


def standardize_date(date: str) -> str:
    """
    Standardizes date strings by replacing abbreviations with full month names.

    Args:
        date (str): The date string to be standardized.

    Returns:
        str: The standardized date string.
    """
    for abbr, full in zip(calendar.month_abbr[1:], calendar.month_name[1:]):
        date = date.replace(abbr, full)
    return date


def keep_numbers_and_decimals_only_in_string(mixed_string: str):
    """
    Filters a string to keep only numbers and decimal points.

    Args:
        mixed_string (str): The string containing mixed characters.

    Returns:
        str: String containing only numbers and decimal points.
    """
    num = "1234567890."
    allowed = list(filter(lambda x: x in num, mixed_string))
    return "".join(allowed)


def create_dataframe_of_statement_values_columns_dates(
    values_set, columns, index_dates
) -> pd.DataFrame:
    """
    Creates a DataFrame from statement values, columns, and index dates.

    Args:
        values_set (list): List of values for each column.
        columns (list): List of column names.
        index_dates (pd.DatetimeIndex): DatetimeIndex for the DataFrame index.

    Returns:
        pd.DataFrame: DataFrame constructed from the given data.
    """
    transposed_values_set = list(zip(*values_set))
    df = pd.DataFrame(transposed_values_set, columns=columns, index=index_dates)
    return df


def process_one_statement(ticker, accession_number, statement_name):
    """
    Processes a single financial statement identified by ticker, accession number, and statement name.

    Args:
        ticker (str): The stock ticker.
        accession_number (str): The SEC accession number.
        statement_name (str): Name of the financial statement.

    Returns:
        pd.DataFrame or None: DataFrame of the processed statement or None if an error occurs.
    """
    try:
        # Fetch the statement HTML soup
        soup = get_statement_soup(
            ticker,
            accession_number,
            statement_name,
            headers=headers,
            statement_keys_map=statement_keys_map,
        )
    except Exception as e:
        settings.logger.error(
            f"Failed to get statement soup: {e} for accession number: {accession_number}"
        )
        return None

    if soup:
        try:
            # Extract data and create DataFrame
            columns, values, dates = extract_columns_values_and_dates_from_statement(
                soup
            )
            df = create_dataframe_of_statement_values_columns_dates(
                values, columns, dates
            )

            if not df.empty:
                # Remove duplicate columns
                df = df.T.drop_duplicates()
            else:
                logging.warning(
                    f"Empty DataFrame for accession number: {accession_number}"
                )
                return None

            return df
        except Exception as e:
            settings.logger.error(f"Error processing statement: {e}")
            return None

In [47]:
ticker='TSLA'
process_one_statement(ticker, acc_num, 'balance_sheet')

Unnamed: 0,2025-03-31,2024-12-31
us-gaap_AssetsCurrentAbstract,,
us-gaap_CashAndCashEquivalentsAtCarryingValue,16352000.0,16139000.0
us-gaap_ShortTermInvestments,20644000.0,20424000.0
us-gaap_AccountsReceivableNetCurrent,3782000.0,4418000.0
us-gaap_InventoryNet,13706000.0,12017000.0
us-gaap_PrepaidExpenseAndOtherAssetsCurrent,4905000.0,5362000.0
us-gaap_AssetsCurrent,59389000.0,58360000.0
us-gaap_PropertyPlantAndEquipmentNet,37088000.0,35836000.0
us-gaap_OperatingLeaseRightOfUseAsset,5330000.0,5160000.0
us-gaap_CryptoAssetFairValueNoncurrent,951000.0,1076000.0


In [40]:
print(acc_num)

000162828025018911


In [48]:
def get_label_dictionary(ticker, headers):
    facts = get_facts(ticker, headers)
    us_gaap_data = facts["facts"]["us-gaap"]
    labels_dict = {fact: details["label"] for fact, details in us_gaap_data.items()}
    return labels_dict


def rename_statement(statement, label_dictionary):
    # Extract the part after the first "_" and then map it using the label dictionary
    statement.index = statement.index.map(
        lambda x: label_dictionary.get(x.split("_", 1)[-1], x)
    )
    return statement

In [50]:
my_statement = process_one_statement(ticker, acc_num, 'balance_sheet')
label_dict = get_label_dictionary(ticker, headers)
work_statement = rename_statement(my_statement, label_dict)

In [51]:
work_statement

Unnamed: 0,2025-03-31,2024-12-31
us-gaap_AssetsCurrentAbstract,,
"Cash and Cash Equivalents, at Carrying Value",16352000.0,16139000.0
Short-term Investments,20644000.0,20424000.0
"Accounts Receivable, after Allowance for Credit Loss, Current",3782000.0,4418000.0
"Inventory, Net",13706000.0,12017000.0
"Prepaid Expense and Other Assets, Current",4905000.0,5362000.0
"Assets, Current",59389000.0,58360000.0
"Property, Plant and Equipment, Net",37088000.0,35836000.0
"Operating Lease, Right-of-Use Asset",5330000.0,5160000.0
,951000.0,1076000.0


In [55]:
work_statement.info()


<class 'pandas.core.frame.DataFrame'>
Index: 34 entries, us-gaap_AssetsCurrentAbstract to tsla_LeasedAssetsNet
Data columns (total 2 columns):
 #   Column               Non-Null Count  Dtype  
---  ------               --------------  -----  
 0   2025-03-31 00:00:00  33 non-null     float64
 1   2024-12-31 00:00:00  33 non-null     float64
dtypes: float64(2)
memory usage: 816.0+ bytes


In [58]:
work_statement.loc['short_term_debt']

KeyError: 'short_term_debt'