In [None]:
from models.balance_sheet import BalanceSheet
from sec_processing.utils import *
import settings
import re
from typing import List, Dict

In [None]:
def extract_avg_total_debt(df_: pd.DataFrame) -> float:
    debt_df = df_.transpose().loc[:,
              ['tsla_LongTermDebtAndFinanceLeasesNoncurrent', 'tsla_LongTermDebtAndFinanceLeasesCurrent']]
    debt_df["TotalDebt"] = debt_df.iloc[:, 0].copy() + debt_df.iloc[:, 1].copy()
    average_total_debt_ = debt_df["TotalDebt"].mean()
    return average_total_debt_


# Example debt mapping dictionary
debt_mapping = {
    "Long-term Debt": "long_term_debt",
    "Long-term Debt and Lease Obligation": "long_term_debt",
    "Long-term Debt and Lease Obligation, Current": "current_portion_long_term_debt",
    "Short-term Debt": "short_term_debt",
    # Add others as you analyze more filings
}

debt_patterns = [
    r".*long[- ]?term.*debt.*",
    r".*short[- ]?term.*debt.*",
    r".*debt.*obligation.*",
    r".*lease.*obligation.*",
    r".*debt.*"
    # refine further as needed
]


def detect_debt_columns(columns, debt_mapping, debt_patterns):
    """
    Hybrid approach: map known columns first, then regex fallback.

    :param columns: List of column names
    :param debt_mapping: dict of known mappings
    :param debt_patterns: list of regex patterns
    :return: dict with matched columns
    """
    detected = {}

    # First pass: dictionary mapping
    for col in columns:
        if str.lower(col) in debt_mapping:
            detected[debt_mapping[col]] = col

    # Second pass: regex matching for missing keys
    mapped_values = set(debt_mapping.values())
    for col in columns:
        for pattern in debt_patterns:
            if re.search(pattern, col, re.IGNORECASE):
                # Map to a generic or pattern-specific key
                key = "debt_regex_match"
                if key not in detected:
                    detected[key] = []
                detected[key].append(col)

    return detected





In [None]:
import settings  # assuming you have logging here

debt_mapping = {
    "Long-term Debt": "long_term_debt",
    "Long-term Debt and Lease Obligation": "long_term_debt",
    "Long-term Debt and Lease Obligation, Current": "current_portion_long_term_debt",
    "Short-term Debt": "short_term_debt",
}

debt_patterns = [
    r".*long[- ]?term.*debt.*",
    r".*short[- ]?term.*debt.*",
    r".*debt.*obligation.*",
    r".*lease.*obligation.*",
]

def detect_debt_columns(columns, debt_mapping, debt_patterns):
    detected = {}

    for col in columns:
        if col is None:
            continue
        col_lower = col.lower()
        for mapped_key, mapped_value in debt_mapping.items():
            if mapped_key.lower() == col_lower:
                detected[mapped_value] = col

    for col in columns:
        if col is None:
            continue
        col_lower = col.lower()
        for pattern in debt_patterns:
            if re.search(pattern, col_lower, re.IGNORECASE):
                key = "debt_regex_match"
                if key not in detected:
                    detected[key] = []
                detected[key].append(col)

    return detected

def main():
    tickers = ["MSFT", "JNJ", "HD", "GOOGL", "TSLA", "HD", "JNJ"]
    metric_ = "avg_total_debt"
    report_type = "balance_sheet"

    for ticker in tickers:
        report_df = fetch_report_to_df(ticker, report_type)
        cols = list(report_df.transpose().columns)

        try:
            detected_debt_cols = detect_debt_columns(cols, debt_mapping, debt_patterns)

            # Aggregate all found debt columns
            debt_cols = []

            # Add mapped debt columns
            for key in ["long_term_debt", "current_portion_long_term_debt", "short_term_debt"]:
                if key in detected_debt_cols:
                    debt_cols.append(detected_debt_cols[key])

            # Add regex matched columns (if any)
            if "debt_regex_match" in detected_debt_cols:
                debt_cols.extend(detected_debt_cols["debt_regex_match"])

            # Flatten in case some are lists
            flat_debt_cols = []
            for col in debt_cols:
                if isinstance(col, list):
                    flat_debt_cols.extend(col)
                else:
                    flat_debt_cols.append(col)

            if not flat_debt_cols:
                raise ValueError("No debt columns found")

            metric_df = report_df.transpose().loc[:, flat_debt_cols]

            # Sum debt columns row-wise
            metric_df["TotalDebt"] = metric_df.sum(axis=1)
            metric_value = metric_df["TotalDebt"].mean()

            print(f'Ticker: {ticker} , {metric_} : {metric_value}')

        except Exception as e:
            settings.logger.error(f"Could not find {metric_} in: {report_type} for {ticker} : {e}")


In [None]:
main()