# Import Libraries

In [47]:
import pandas as pd
import numpy as np
from pathlib import Path

# Data Treatment

In [48]:
# We create the financials for the raw data obtained in notebook data_collection.ipynb

financials = pd.read_pickle('../data/raw_financials.pkl')
financials.sort_values(['Ticker', 'Date'], inplace=True)

In [49]:
# We have some tickers with data between quarters. We drop this rows to keep only quarterly data (3 months difference between rows)

financials['months_diff_next'] = (
    financials
    .groupby('Ticker')['Date']
    .diff(periods=-1)
    .dt.days / 30
).round()


rows_to_drop = financials[financials['months_diff_next'].isin([-6, -2])].index
financials.drop(rows_to_drop, inplace=True)

financials.drop(columns=['months_diff_next'], inplace=True)

print(f'We dropped {len(rows_to_drop)} rows of data between quarters')

We dropped 10 rows of data between quarters


In [50]:
cols_to_drop = [

    # =====================================================
    # Highly granular physical assets (low predictive power,
    # highly correlated with aggregate asset metrics)
    # =====================================================
    "Land And Improvements",
    "Buildings And Improvements",
    "Machinery Furniture Equipment",
    "Construction In Progress",
    "Work In Process",
    "Properties",
    "Other Properties",

    # =====================================================
    # Highly granular inventory details
    # =====================================================
    "Finished Goods",
    "Raw Materials",
    "Other Inventories",
    "Inventories Adjustments Allowances",

    # =====================================================
    # Highly granular receivables / payables
    # =====================================================
    "Gross Accounts Receivable",
    "Allowance For Doubtful Accounts Receivable",
    "Receivables Adjustments Allowances",
    "Provision For Doubtful Accounts",
    "Notes Receivable",
    "Non Current Note Receivables",

    # =====================================================
    # Duplicated / redundant Net Income definitions
    # Keep only: "Net Income From Continuing Operations"
    # =====================================================
    "Net Income",
    "Net Income Common Stockholders",
    "Net Income Continuous Operations",
    "Net Income From Continuing Operation Net Minority Interest",
    "Net Income From Continuing And Discontinued Operation",
    "Net Income Including Noncontrolling Interests",
    "Diluted NI Availto Com Stockholders",

    # =====================================================
    # Earnings Per Share (EPS)
    # Keep only: "Diluted EPS"
    # =====================================================
    "Basic EPS",

    # =====================================================
    # Redundant cash flow aggregates
    # Keep only: "Operating Cash Flow", "Free Cash Flow", and "Cash And Cash Equivalents"
    # =====================================================
    "Cash Flow From Continuing Operating Activities",
    "Financing Cash Flow",
    "Cash Flow From Continuing Financing Activities",
    "Investing Cash Flow",
    "Cash Flow From Continuing Investing Activities",
    "Changes In Cash",
    "Beginning Cash Position",
    "End Cash Position",
    "Cash Cash Equivalents And Short Term Investments",
    # REMOVED: "Cash And Cash Equivalents" - CRITICAL for liquidity analysis
    "Repurchase Of Capital Stock",
    "Repayment Of Debt",
    "Issuance Of Debt",
    "Interest Paid Supplemental Data",
    "Income Tax Paid Supplemental Data",
    "Effect Of Exchange Rate Changes",
    "Net Other Financing Charges",
    "Proceeds From Stock Option Exercised",
    "Cash Dividends Paid",
    "Common Stock Dividend Paid",
    "Net Common Stock Issuance",
    "Common Stock Payments",
    "Net Issuance Payments Of Debt",
    "Net Long Term Debt Issuance",
    "Long Term Debt Payments",
    "Long Term Debt Issuance",
    "Net Other Investing Changes",
    "Net Investment Purchase And Sale",
    "Sale Of Investment",
    "Purchase Of Investment",
    "Net Business Purchase And Sale",
    "Purchase Of Business",
    "Net PPE Purchase And Sale",
    "Purchase Of PPE",
    "Other Non Cash Items",

    # =====================================================
    # Depreciation / amortization highly redundant metrics
    # Keep only: "Depreciation And Amortization"
    # =====================================================
    "Normalized EBITDA",
    "Depreciation",
    "Reconciled Depreciation",
    "Depreciation Income Statement",
    "Depreciation Amortization Depletion",
    "Depreciation Amortization Depletion Income Statement",
    "Depreciation And Amortization In Income Statement",
    "Amortization",
    "Amortization Cash Flow",
    "Amortization Of Intangibles",
    "Amortization Of Intangibles Income Statement",
    "Depletion",
    "Depletion Income Statement",

    # =====================================================
    # Overly detailed equity structure components
    # Keep only aggregate equity metrics
    # =====================================================
    "Capital Stock",
    "Common Stock",
    "Preferred Stock",
    "Additional Paid In Capital",
    "Other Equity Adjustments",
    "Gains Losses Not Affecting Retained Earnings",
    "Treasury Stock",
    "Treasury Shares Number",
    "Minority Interest",
    "Minority Interests",
    "Common Stock Equity",  # Redundant with Stockholders Equity
    "Other Non Current Liabilities",
    "Other Current Borrowings",
    "Employee Benefits",
    "Non Current Pension And Other Postretirement Benefit Plans",
    "Long Term Debt And Capital Lease Obligation",
    "Current Deferred Liabilities",
    "Current Deferred Revenue",
    "Pensionand Other Post Retirement Benefit Plans Current",
    "Payables And Accrued Expenses",
    "Current Accrued Expenses",
    "Payables",
    "Accounts Payable",
    "Other Non Current Assets",
    "Other Intangible Assets",
    "Other Current Assets",
    "Other Investments",
    "Other Payable",
    "Preferred Stock Dividends",
    "Preferred Shares Number",
    "Preferred Stock Equity",
    "Long Term Equity Investment",
    "Investments In Other Ventures Under Equity Method",
    "Prepaid Assets",
    "Issuance Of Capital Stock",
    "Preferred Stock Dividend Paid",
    "Common Stock Issuance",
    "Earnings Losses From Equity Investments",
    "Average Dilution Earnings",
    "Other Gand A",
    "Other Equity Interest",
    "Non Current Deferred Revenue",
    "Current Capital Lease Obligation",
    "Non Current Accounts Receivable",
    "Ordinary Shares Number",  # Redundant with Diluted Average Shares
    "Share Issued",            # Redundant with Diluted Average Shares
    "Basic Average Shares",    # Keep only Diluted Average Shares

    # =====================================================
    # Working capital micro-changes (very noisy, highly correlated)
    # Keep only: "Working Capital" and "Change In Working Capital"
    # =====================================================
    "Change In Payables And Accrued Expense",
    "Change In Payable",
    "Change In Account Payable",
    "Change In Inventory",
    "Change In Receivables",
    "Changes In Account Receivables",
    "Change In Other Working Capital",
    "Change In Other Current Assets",
    "Change In Other Current Liabilities",

    # =====================================================
    # One-off / non-recurring items (introduce noise)
    # =====================================================
    "Total Unusual Items",
    "Total Unusual Items Excluding Goodwill",
    "Other Income Expense",
    "Other Non Operating Income Expenses",
    "Operating Gains Losses",
    "Gain Loss On Investment Securities",
    "Unrealized Gain Loss On Investment Securities",
    "Gain On Sale Of Business",
    "Gain On Sale Of Ppe",
    "Sale Of Business",
    "Net Income Discontinuous Operations",
    "Net Income From Tax Loss Carryforward",
    "Net Income Extraordinary",
    "Provisionand Write Offof Assets",

    # =====================================================
    # Sector-specific insurance / financial instruments
    # (harm generalization across sectors)
    # =====================================================
    "Net Policyholder Benefits And Claims",
    "Policyholder Benefits Gross",
    "Policyholder Benefits Ceded",
    "Insurance And Claims",
    "Trading Securities",
    "Held To Maturity Securities",
    "Derivative Product Liabilities",

    # =====================================================
    # Overly detailed financing components
    # Keep only aggregate debt metrics
    # =====================================================
    "Commercial Paper",
    "Line Of Credit",
    "Current Notes Payable",
    "Short Term Debt Issuance",
    "Short Term Debt Payments",
    "Net Short Term Debt Issuance",

    # =====================================================
    # Micro / noisy financial metrics
    # =====================================================
    "Net Non Operating Interest Income Expense",
    "Interest Expense Non Operating",
    "Interest Income Non Operating",
    "Reconciled Cost Of Revenue",
    "Operating Revenue",  # Redundant with Total Revenue
    "Interest Payable",
    "Sale Of PPE",
    "Earnings From Equity Interest",
    "Hedging Assets Current",
    "Restricted Cash",
    "Taxes Receivable",
    "Capital Expenditure Reported",
    "Salaries And Wages",
    "Net Intangibles Purchase And Sale",
    "Purchase Of Intangibles",
    "Amortization Of Securities",
    "Pension And Employee Benefit Expense",
    "Total Other Finance Cost",
    "Other Taxes",
    "Long Term Provisions",
    "Defined Pension Benefit",
    "Preferred Securities Outside Stock Equity",
    "Financial Assets",
    "Assets Held For Sale Current",
    "Liabilities Heldfor Sale Non Current",
    "Investmentsin Associatesat Cost",
    "Other Cash Adjustment Inside ChangeinCash",
    "Net Preferred Stock Issuance",
    "Preferred Stock Issuance",
    "Net Foreign Currency Exchange Gain Loss",
    "Minimum Pension Liabilities",
    "Loans Receivable",
    "Other Cash Adjustment Outside Changein Cash",
    "Cash From Discontinued Financing Activities",
    "Preferred Stock Payments",
    "Cash From Discontinued Investing Activities",
    "Cash From Discontinued Operating Activities",
    "Current Deferred Assets",
    "Dueto Related Parties Current",
    "Investmentsin Joint Venturesat Cost",
    "Dividend Received Cfo",
    "Duefrom Related Parties Current",
    "Rent Expense Supplemental",
    "Rent And Landing Fees",
    "Non Current Prepaid Assets",
    "Non Current Accrued Expenses",
    "Investment Properties",
    "Net Investment Properties Purchase And Sale",
    "Sale Of Investment Properties",
    "Purchase Of Investment Properties",
    "Dividends Received Cfi",
    "Occupancy And Equipment",
    "Professional Expense And Contract Services Expense",
    "Other Non Interest Expense",
    "Cash Cash Equivalents And Federal Funds Sold",
    "Sale Of Intangibles",
    "Financial Assets Designatedas Fair Value Through Profitor Loss Total",
    "Change In Interest Payable",
    "Investmentsin Subsidiariesat Cost",
    "Excess Tax Benefit From Stock Based Compensation",
    "Current Deferred Taxes Assets",
    "Interest Paid Cff",
    "Excise Taxes",
    "Dividend Paid Cfo",
    "Cash Flowsfromusedin Operating Activities Direct",
    "Classesof Cash Payments",
    "Other Cash Paymentsfrom Operating Activities",
    "Paymentson Behalfof Employees",
    "Classesof Cash Receiptsfrom Operating Activities",
    "Other Cash Receiptsfrom Operating Activities",
    "Taxes Refund Paid",
    "Securities Amortization",
    "Duefrom Related Parties Non Current",
    "Dueto Related Parties Non Current",
    "Interest Paid Cfo",
    
    # =====================================================
    # Redundant income metrics - keeping only cleaner versions
    # =====================================================
    "Normalized Income",  # Keep "Net Income From Continuing Operations" instead
    "Total Operating Income As Reported",  # Keep "Operating Income" instead
]


In [51]:
# We drop the columns selected previously

financials = financials.drop(columns=cols_to_drop, errors='ignore')

In [52]:
financials = financials.sort_values(['Ticker', 'Date'])

financials['n_prev_dates'] = (
    financials
    .groupby('Ticker')
    .cumcount()
)


In [53]:
financials['n_prev_dates'].value_counts()

n_prev_dates
0    472
1    472
2    472
3    472
4    472
5    457
6    439
Name: count, dtype: int64

In [54]:
#Lots of columns with too many nulls

prctg_of_nulls = (financials.isna().mean()).sort_values(ascending=False)
prctg_of_nulls[prctg_of_nulls>0.4]

Cash Flow From Discontinued Operation         0.994779
Other Cash Adjustment Inside Changein Cash    0.994472
Accrued Interest Receivable                   0.986179
Unrealized Gain Loss                          0.985565
Foreign Currency Translation Adjustments      0.977580
Loss Adjustment Expense                       0.966523
Cash Equivalents                              0.936732
Gain Loss On Sale Of PPE                      0.921990
Earnings From Equity Interest Net Of Tax      0.917998
Current Provisions                            0.915233
Leases                                        0.912776
Cash Financial                                0.906020
Write Off                                     0.906020
Impairment Of Capital Assets                  0.901720
Otherunder Preferred Stock Dividend           0.884828
Gain Loss On Sale Of Business                 0.884214
Investmentin Financial Assets                 0.874693
Dividends Payable                             0.870393
Available 

In [55]:
# list of critical columns that we want to keep
critical_cols = [
    # ===== Identifiers =====
    'Ticker', 'Date',
    
    # ===== Income Statement Metrics =====
    'EBITDA', 'EBIT', 'Operating Income', 'Operating Expense',
    'Pretax Income', 'Tax Provision', 'Net Income From Continuing Operations',
    'Gross Profit', 'Cost Of Revenue', 'Total Revenue',
    'Research And Development', 'Selling General And Administration',
    'Net Interest Income', 'Interest Expense', 'Interest Income',
    
    # ===== Per Share Metrics =====
    'Diluted Average Shares', 'Diluted EPS',
    
    # ===== Balance Sheet - Assets =====
    'Total Assets', 'Total Non Current Assets', 'Current Assets',
    'Net PPE', 'Gross PPE', 'Accumulated Depreciation',
    'Goodwill', 'Goodwill And Other Intangible Assets',
    'Investments And Advances',
    'Inventory', 'Receivables', 'Accounts Receivable',
    'Cash And Cash Equivalents',  # CRITICAL for liquidity
    
    # ===== Balance Sheet - Liabilities & Equity =====
    'Total Liabilities Net Minority Interest',
    'Total Non Current Liabilities Net Minority Interest',
    'Current Liabilities',
    'Total Debt', 'Net Debt', 'Long Term Debt', 'Current Debt',
    'Stockholders Equity', 'Total Equity Gross Minority Interest',
    'Retained Earnings',
    'Tangible Book Value', 'Net Tangible Assets',
    'Invested Capital', 'Total Capitalization',
    'Working Capital',
    
    # ===== Cash Flow Statement =====
    'Operating Cash Flow', 'Free Cash Flow', 'Capital Expenditure',
    'Change In Working Capital',
    'Depreciation And Amortization',
    'Stock Based Compensation',  # Important for tech companies
    'Deferred Tax', 'Deferred Income Tax',
    'Special Income Charges',
    
    # ===== Tax & Other =====
    'Tax Effect Of Unusual Items', 'Tax Rate For Calcs',
    
    # ===== Metrics with acceptable null levels =====
    'Non Current Deferred Liabilities',
    'Non Current Deferred Taxes Liabilities',
    'Capital Lease Obligations',
    'Long Term Capital Lease Obligation',
]

# We select columns with >60% nulls
null_percent = financials.isna().mean()
high_null_cols = null_percent[null_percent > 0.6].index.tolist()

# We drop those columns if not critical
cols_to_drop_nulls = [col for col in high_null_cols if col not in critical_cols]
financials = financials.drop(columns=cols_to_drop_nulls, errors='ignore')

print(f"Dropped {len(cols_to_drop_nulls)} columns with >60% nulls & not critical")
print(f"\nFinal dataset shape: {financials.shape}")
print(f"Final columns ({len(financials.columns)}): {financials.columns.tolist()}")

Dropped 38 columns with >60% nulls & not critical

Final dataset shape: (3256, 67)
Final columns (67): ['Ticker', 'Date', 'Tax Effect Of Unusual Items', 'Tax Rate For Calcs', 'EBITDA', 'EBIT', 'Net Interest Income', 'Interest Expense', 'Interest Income', 'Total Expenses', 'Diluted Average Shares', 'Diluted EPS', 'Tax Provision', 'Pretax Income', 'Operating Income', 'Operating Expense', 'Research And Development', 'Selling General And Administration', 'Gross Profit', 'Cost Of Revenue', 'Total Revenue', 'Net Debt', 'Total Debt', 'Tangible Book Value', 'Invested Capital', 'Working Capital', 'Net Tangible Assets', 'Total Capitalization', 'Total Equity Gross Minority Interest', 'Stockholders Equity', 'Retained Earnings', 'Total Liabilities Net Minority Interest', 'Total Non Current Liabilities Net Minority Interest', 'Long Term Debt', 'Current Liabilities', 'Current Debt And Capital Lease Obligation', 'Current Debt', 'Total Assets', 'Total Non Current Assets', 'Investments And Advances', 'G

In [56]:
'Cash And Cash Equivalents' in financials.columns.tolist()

True

In [57]:
# We impute nulls in a gross manner just to have a clean dataset and create a benchmark prediction

fill_zero = [
    'EBITDA','EBIT','Net Interest Income','Interest Expense','Interest Income',
    'Net Income From Continuing Operations',
    'Operating Income','Operating Expense',
    'Gross Profit','Cost Of Revenue','Total Revenue',
    'Research And Development','Selling General And Administration',
    'Tax Provision','Pretax Income',
    'Depreciation And Amortization','Stock Based Compensation',
    'Tax Effect Of Unusual Items','Special Income Charges',
    'Deferred Tax','Deferred Income Tax','Change In Working Capital',
    'Operating Cash Flow','Free Cash Flow','Capital Expenditure', 'Tax Rate For Calcs',
]

ffill_cols = [
    'Net Debt','Total Debt','Long Term Debt','Current Debt',
    'Total Assets','Total Non Current Assets','Current Assets',
    'Inventory','Receivables','Accounts Receivable',
    'Cash And Cash Equivalents',
    'Net PPE','Gross PPE','Accumulated Depreciation',
    'Goodwill','Goodwill And Other Intangible Assets',
    'Investments And Advances','Working Capital',
    'Tangible Book Value','Net Tangible Assets','Invested Capital',
    'Total Capitalization','Stockholders Equity','Retained Earnings',
    'Total Liabilities Net Minority Interest',
    'Total Non Current Liabilities Net Minority Interest',
    'Non Current Deferred Liabilities',
    'Non Current Deferred Taxes Liabilities',
    'Capital Lease Obligations','Long Term Capital Lease Obligation',
    'Diluted Average Shares','Diluted EPS', 
    'Current Liabilities', 'Total Equity Gross Minority Interest'
]

financials = financials.sort_values(['Ticker', 'Date'])

financials[fill_zero] = financials[fill_zero].fillna(0)
financials[ffill_cols] = financials.groupby('Ticker')[ffill_cols].ffill().fillna(0)

print(f'nulls in the dataset financials: {financials.isna().sum().sum()}')

nulls in the dataset financials: 2146


In [58]:
# Adding useful financial ratios

# =============================================================================
# PROFITABILITY RATIOS
# =============================================================================

# Operating Margin (Operating Income / Total Revenue)
financials['operating_margin'] = np.where(
    financials['Total Revenue'] != 0,
    financials['Operating Income'] / financials['Total Revenue'],
    0
)

# Net Profit Margin (Net Income / Total Revenue)
financials['net_profit_margin'] = np.where(
    financials['Total Revenue'] != 0,
    financials['Net Income From Continuing Operations'] / financials['Total Revenue'],
    0
)

# EBITDA Margin (EBITDA / Total Revenue)
financials['ebitda_margin'] = np.where(
    financials['Total Revenue'] != 0,
    financials['EBITDA'] / financials['Total Revenue'],
    0
)

# Gross Margin (Gross Profit / Total Revenue)
financials['gross_margin'] = np.where(
    financials['Total Revenue'] != 0,
    financials['Gross Profit'] / financials['Total Revenue'],
    0
)

# Return on Assets (ROA) - Net Income / Total Assets
financials['roa'] = np.where(
    financials['Total Assets'] != 0,
    financials['Net Income From Continuing Operations'] / financials['Total Assets'],
    0
)

# Return on Equity (ROE) - Net Income / Stockholders Equity
financials['roe'] = np.where(
    financials['Stockholders Equity'] != 0,
    financials['Net Income From Continuing Operations'] / financials['Stockholders Equity'],
    0
)

# =============================================================================
# LIQUIDITY RATIOS
# =============================================================================

# Current Ratio (Current Assets / Current Liabilities)
financials['current_ratio'] = np.where(
    financials['Current Liabilities'] != 0,
    financials['Current Assets'] / financials['Current Liabilities'],
    0
)

# Quick Ratio / Acid Test ((Current Assets - Inventory) / Current Liabilities)
financials['quick_ratio'] = np.where(
    financials['Current Liabilities'] != 0,
    (financials['Current Assets'] - financials['Inventory']) / financials['Current Liabilities'],
    0
)

# Cash Ratio (Cash And Cash Equivalents / Current Liabilities)
financials['cash_ratio'] = np.where(
    financials['Current Liabilities'] != 0,
    financials['Cash And Cash Equivalents'] / financials['Current Liabilities'],
    0
)

# =============================================================================
# LEVERAGE RATIOS
# =============================================================================

# Debt to Equity Ratio (Total Debt / Stockholders Equity)
financials['debt_to_equity'] = np.where(
    financials['Stockholders Equity'] != 0,
    financials['Total Debt'] / financials['Stockholders Equity'],
    0
)

# Debt to Assets Ratio (Total Debt / Total Assets)
financials['debt_to_assets'] = np.where(
    financials['Total Assets'] != 0,
    financials['Total Debt'] / financials['Total Assets'],
    0
)

# Equity Ratio (Stockholders Equity / Total Assets)
financials['equity_ratio'] = np.where(
    financials['Total Assets'] != 0,
    financials['Stockholders Equity'] / financials['Total Assets'],
    0
)

# Interest Coverage Ratio (EBIT / Interest Expense)
financials['interest_coverage'] = np.where(
    financials['Interest Expense'] != 0,
    financials['EBIT'] / financials['Interest Expense'],
    0
)

# =============================================================================
# EFFICIENCY RATIOS
# =============================================================================

# Asset Turnover (Total Revenue / Total Assets)
financials['asset_turnover'] = np.where(
    financials['Total Assets'] != 0,
    financials['Total Revenue'] / financials['Total Assets'],
    0
)

# Inventory Turnover (Cost of Revenue / Inventory)
financials['inventory_turnover'] = np.where(
    financials['Inventory'] != 0,
    financials['Cost Of Revenue'] / financials['Inventory'],
    0
)

# Receivables Turnover (Total Revenue / Accounts Receivable)
financials['receivables_turnover'] = np.where(
    financials['Accounts Receivable'] != 0,
    financials['Total Revenue'] / financials['Accounts Receivable'],
    0
)

# Working Capital Turnover (Total Revenue / Working Capital)
financials['working_capital_turnover'] = np.where(
    financials['Working Capital'] != 0,
    financials['Total Revenue'] / financials['Working Capital'],
    0
)

# =============================================================================
# CASH FLOW RATIOS
# =============================================================================

# Operating Cash Flow Ratio (Operating Cash Flow / Current Liabilities)
financials['ocf_ratio'] = np.where(
    financials['Current Liabilities'] != 0,
    financials['Operating Cash Flow'] / financials['Current Liabilities'],
    0
)

# Free Cash Flow to Revenue (Free Cash Flow / Total Revenue)
financials['fcf_to_revenue'] = np.where(
    financials['Total Revenue'] != 0,
    financials['Free Cash Flow'] / financials['Total Revenue'],
    0
)

# Cash Flow Margin (Operating Cash Flow / Total Revenue)
financials['cash_flow_margin'] = np.where(
    financials['Total Revenue'] != 0,
    financials['Operating Cash Flow'] / financials['Total Revenue'],
    0
)

# =============================================================================
# VALUATION & OTHER RATIOS
# =============================================================================

# Price to Earnings Ratio (P/E) - Stock Price / EPS
financials['pe_ratio'] = np.where(
    financials['Diluted EPS'] != 0,
    financials['close_stock_price'] / financials['Diluted EPS'],
    0
)

# Price to Book Ratio (P/B) - Stock Price / (Stockholders Equity / Shares)
financials['book_value_per_share'] = np.where(
    financials['Diluted Average Shares'] != 0,
    financials['Stockholders Equity'] / financials['Diluted Average Shares'],
    0
)

financials['pb_ratio'] = np.where(
    financials['book_value_per_share'] != 0,
    financials['close_stock_price'] / financials['book_value_per_share'],
    0
)

# EV/EBITDA approximation using Net Debt
# Enterprise Value ≈ Market Cap + Net Debt
financials['market_cap'] = financials['close_stock_price'] * financials['Diluted Average Shares']
financials['enterprise_value'] = financials['market_cap'] + financials['Net Debt']

financials['ev_to_ebitda'] = np.where(
    financials['EBITDA'] != 0,
    financials['enterprise_value'] / financials['EBITDA'],
    0
)

# R&D Intensity (R&D / Total Revenue)
financials['rd_intensity'] = np.where(
    financials['Total Revenue'] != 0,
    financials['Research And Development'] / financials['Total Revenue'],
    0
)

# Capital Intensity (Net PPE / Total Revenue)
financials['capital_intensity'] = np.where(
    financials['Total Revenue'] != 0,
    financials['Net PPE'] / financials['Total Revenue'],
    0
)

print(f"Se han añadido 25 ratios financieros al dataset")
print("\nNuevos ratios creados:")
print("- Profitability: operating_margin, net_profit_margin, ebitda_margin, gross_margin, roa, roe")
print("- Liquidity: current_ratio, quick_ratio, cash_ratio")
print("- Leverage: debt_to_equity, debt_to_assets, equity_ratio, interest_coverage")
print("- Efficiency: asset_turnover, inventory_turnover, receivables_turnover, working_capital_turnover")
print("- Cash Flow: ocf_ratio, fcf_to_revenue, cash_flow_margin")
print("- Valuation: pe_ratio, pb_ratio, ev_to_ebitda, rd_intensity, capital_intensity")

Se han añadido 25 ratios financieros al dataset

Nuevos ratios creados:
- Profitability: operating_margin, net_profit_margin, ebitda_margin, gross_margin, roa, roe
- Liquidity: current_ratio, quick_ratio, cash_ratio
- Leverage: debt_to_equity, debt_to_assets, equity_ratio, interest_coverage
- Efficiency: asset_turnover, inventory_turnover, receivables_turnover, working_capital_turnover
- Cash Flow: ocf_ratio, fcf_to_revenue, cash_flow_margin
- Valuation: pe_ratio, pb_ratio, ev_to_ebitda, rd_intensity, capital_intensity


# We save the data as a pickle in the folder 'data'

In [59]:
#We save the 'financials' DataFrame as a pickle with the name financials.pkl in the 'data' folder
#We save the data folder path or create it if it doesn't exist

project_path = Path('..')          
data_folder = project_path / 'data' 
data_folder.mkdir(exist_ok=True) 

#We save the DataFrame in pikle format
file_path = data_folder / 'financials.pkl'
financials.to_pickle(file_path)

print(f"DataFrame saved successfully in {file_path}")

DataFrame saved successfully in ../data/financials.pkl
