### Exploration Junkyard

In [None]:
# Helper function to compute financial metrics from long-form statements
def safe_div(a, b):
    try:
        return np.nan if (b is None or b == 0 or np.isnan(b)) else a / b
    except Exception:
        return np.nan
    
# Helper function to simply grab values from merged dataframe
def get_value(df, label):
    value = df[df["position_label"] == label]
    if value.empty:
        return np.nan
    return value["STD Balance Sheet All"].iloc[0]

In [None]:
def compute_financial_metrics(balance_sheets, income_statements, cash_flows, cik, date):

    # 1. Filter statement for input CIK and date
    bs = balance_sheets[(balance_sheets["CIK"] == cik) & (balance_sheets["Date"] == date)]
    is_ = income_statements[(income_statements["CIK"] == cik) & (income_statements["Date"] == date)]
    cf = cash_flows[(cash_flows["CIK"] == cik) & (cash_flows["Date"] == date)]

    # Rename columns containing numeric values to a common name for easier extraction
    bs = bs.rename(columns={"STD Balance Sheet All": "Value"})
    is_ = is_.rename(columns={"STD Income Statement All": "Value"})
    cf = cf.rename(columns={"STD Cash Flow All": "Value"})

    # Helper function to grab values, defined inside function, because of custom dataframe
    def get_value(label):
        values = merged.loc[merged["position_label"] == label, "Value"]
        return values.iloc[0] if not values.empty else np.nan

    # 2. Merge all statements into one 
    merged = pd.concat([bs, is_, cf], ignore_index=True)

    # 3. Extract values
    total_assets = get_value("Total Assets")
    total_equity = get_value("Total Shareholders' Equity")
    total_debt = get_value("Debt, Total")
    current_assets = get_value("Total Current Assets")
    current_liabilities = get_value("Total Current Liabilities")
    inventory = get_value("Inventory, Total")
    cash = get_value("Cash and Cash Equivalents, Total")
    revenue = get_value("Total Revenue")
    cogs = get_value("Cost of Goods Sold, Total")
    operating_income = get_value("Earnings Before Interest and Taxes")
    net_income = get_value("Net Income After Tax")
    interest_expense = get_value("Interest Expense")
    fcf = get_value("Free Cash Flow to Equity") or get_value("Free Operating Cash Flow")

    # 4. Compute metrics with rounding
    metrics = {
        "CIK": cik,
        "Date": date,
        "ROE": round(safe_div(net_income, total_equity), 6),
        "ROA": round(safe_div(net_income, total_assets), 6),
        "Gross_Margin": round(safe_div(revenue - cogs, revenue), 6),
        "Operating_Margin": round(safe_div(operating_income, revenue), 6),
        "Net_Margin": round(safe_div(net_income, revenue), 6),
        "Debt_to_Equity": round(safe_div(total_debt, total_equity), 6),
        "Current_Ratio": round(safe_div(current_assets, current_liabilities), 6),
        "Quick_Ratio": round(safe_div(current_assets - inventory, current_liabilities), 6),
        "Interest_Coverage": round(safe_div(operating_income, interest_expense), 6),
        "Free_Cash_Flow": round(fcf, 6) if not pd.isna(fcf) else np.nan,
        "Free_Cash_Flow_to_Net_Income": round(safe_div(fcf, net_income), 6),
        "Net_Debt": round(total_debt - cash, 6) if not np.isnan(total_debt) and not np.isnan(cash) else np.nan,
    }
    
    return pd.DataFrame([metrics])

In [None]:
# from tqdm import tqdm
# # Compute financial metrics for all CIK and date combinations with progress bar
# financial_metrics_list = []

# # Iterate over each row in all_ciks_and_dates
# for idx, row in tqdm(all_ciks_and_dates.iterrows(), total=len(all_ciks_and_dates), desc="Computing metrics"):
#     cik = row["CIK"]
#     date = row["Date"]
#     metrics_df = compute_financial_metrics(
#         labeled_balance_sheets, 
#         labeled_income_statements, 
#         labeled_cash_flow_statements, 
#         cik, 
#         date
#     )
#     financial_metrics_list.append(metrics_df)

# # Concatenate all metrics DataFrames into a single DataFrame
# financial_metrics = pd.concat(financial_metrics_list, ignore_index=True)
# financial_metrics.to_csv("../data/financial_metrics_all_companies.csv", index=False)