In [1]:
# Merge all compustat into one data frame

import os
import pandas as pd
from pathlib import Path
from dotenv import load_dotenv

load_dotenv()

table = 'fundq'
start_year = 2010
end_year = 2024

dataframes = []

for year in range(start_year, end_year + 1):

    # For notebooks, construct path relative to known project structure
    # The notebook is in Strategies/dividend_cuts/, so go up 2 levels to project root
    current_dir = Path.cwd()
    OUTPUT_DIR = current_dir.parent.parent / "Data" / "compustat_fundamentals_q"
    file_location = OUTPUT_DIR / f"{table}_{year}.parquet"

    df = pd.read_parquet(file_location)
    dataframes.append(df)


# Concatenate all dataframes together
computstat_data = pd.concat(dataframes, ignore_index=True)

# print some quick info
print(computstat_data.head())
print(len(computstat_data))

# Save concatenated dataframe
output_file = OUTPUT_DIR / f"{table}_combined.parquet"
computstat_data.to_parquet(output_file)

  computstat_data = pd.concat(dataframes, ignore_index=True)


    gvkey    datadate  fyearq  fqtr                          conm     tic  \
0  001082  2010-01-31    2009     3                 SERVIDYNE INC  SERV.1   
1  001173  2010-01-31    2009     4                AEROSONIC CORP   AIM.1   
2  001259  2010-01-31    2010     2      TAMIR BIOTHECHNOLOGY INC   ACEL.   
3  001410  2010-01-31    2010     1            ABM INDUSTRIES INC     ABM   
4  001562  2010-01-31    2009     3  LOGILITY SPPLY CHAIN SOL INC    LGTY   

       cusip         cik indfmt consol  ...   xrdy   xsgay  adjex      cshtrq  \
0  81765M106  0000001923   INDL      C  ...   <NA>   7.133    1.0     76970.0   
1  008015307  0000109471   INDL      C  ...  1.423   7.286    1.0   1716019.0   
2  87509Q105  0000708717   INDL      C  ...  0.283   1.041    1.0   4038194.0   
3  000957100  0000771497   INDL      C  ...   <NA>  60.832    1.0  10115204.0   
4  029683109  0000713425   INDL      C  ...  4.979  25.173    1.0   2908206.0   

  dvpspq dvpsxq    mkvaltq   prccq  prchq  prclq  

In [None]:
# Columns we want

# gvkey
# datadate
# fyearq - fiscal year 
# fqtr - fiscal quarter
# datacqtr - calendar date year and quater
# datafqtr - Fiscal quarter by year
# conm
# tic
# cusip
# cik
# fyr - fiscal year end month
# ajexq - adjustment factor
# apdedateq - actual period end date
# rdq - report date on which quarterly EPS are first reported
# fdateq - final date
# pdateq - prelim date, the date the data for the co is updated on a prelim basis for that year
# finalq - Final indicator flag, indicates all annual fundamentals have been delivered 
# updq - Update code
# compstq - comparability status (import with codes about comparability)
# curncdq - Native Currency Code
# currtrq - currency translation rate
# staltq - status alert (for bankruptcy, litigation etc)

In [None]:
recommended_fields = {
    
    # === IDENTIFIERS & DATES ===
    'gvkey': 'Company identifier',
    'datadate': 'Period end date',
    'rdq': 'Report date (for point-in-time)',
    'fyearq': 'Fiscal year',
    'fqtr': 'Fiscal quarter',
    'tic': 'Ticker',
    'cusip': 'CUSIP',
    'conm': 'Company name',
    'updq': 'Update code (2=prelim, 3=final)',
    'ajexq': 'Adjustment factor (for per-share calcs)',
    
    # === DISTRESS/STATUS INDICATORS ===
    'staltq': 'Status Alert (bankruptcy, litigation, liquidation)',
    'compstq': 'Comparability Status (mergers, restatements)',
    
    # === INCOME STATEMENT ===
    'revtq': 'Revenue (Total)',
    'cogsq': 'Cost of Goods Sold',
    'xsgaq': 'Selling, General & Admin Expense',
    'xrdq': 'R&D Expense',
    'xintq': 'Interest Expense',
    'niq': 'Net Income',
    'oiadpq': 'Operating Income After Depreciation',
    'dpq': 'Depreciation & Amortization',
    'txtq': 'Income Taxes',
    'ebitdaq': 'EBITDA',
    'ibq': 'Income Before Extraordinary Items',
    'xoprq': 'Operating Expenses - Total',
    
    # === BALANCE SHEET ===
    'atq': 'Total Assets',
    'actq': 'Current Assets',
    'cheq': 'Cash and Short Term Investments',
    'chq': 'Cash',
    'rectq': 'Receivables',
    'invtq': 'Inventories',
    'acoq': 'Current Assets - Other',
    'lctq': 'Current Liabilities',
    'ltq': 'Total Liabilities',
    'dlcq': 'Debt in Current Liabilities (short-term)',
    'dlttq': 'Long-Term Debt',
    'ceqq': 'Common Equity',
    'wcapq': 'Working Capital',
    'apq': 'Accounts Payable',
    'lcoq': 'Current Liabilities - Other',
    'pstknq': 'Preferred Stock',
    'mibq': 'Minority Interest',
    'txditcq': 'Deferred Taxes - Investment Tax Credit',
    
    # === CASH FLOW (Quarterly YTD) ===
    'oancfy': 'Operating Activities - Net Cash Flow (YTD)',
    'capxy': 'Capital Expenditures (YTD)',
    'fincfy': 'Financing Activities - Net Cash Flow (YTD)',
    'ivncfy': 'Investing Activities - Net Cash Flow (YTD)',
    'dltisy': 'Long-Term Debt - Issuance (YTD)',
    'dlcchy': 'Current Debt - Changes (YTD)',
    'dltrq': 'Long-Term Debt - Reduction (YTD)',
    
    # === PER SHARE DATA ===
    'cshoq': 'Common Shares Outstanding',
    'cshopq': 'Common Shares Outstanding - Prior Period',
    'epspxq': 'EPS Basic - Excluding Extraordinary Items',
    'prccq': 'Price Close - Quarter',
    
    # === MARKET DATA ===
    'mkvaltq': 'Market Value (Market Cap)',
    
    # === DIVIDENDS ===
    'dvy': 'Dividends - Year to Date',
}

# DERIVED METRICS:
# PROFITABILITY:
#   - Gross Margin = (revtq - cogsq) / revtq
#   - Operating Margin = oiadpq / revtq
#   - Net Margin = niq / revtq
#   - ROA = niq / atq
#   - ROE = niq / ceqq
  
# LEVERAGE:
#   - Debt/Equity = (dlcq + dlttq) / ceqq
#   - Debt/Assets = (dlcq + dlttq) / atq
#   - Current Ratio = actq / lctq
#   - Interest Coverage = oiadpq / xintq
#   - EBITDA/Interest = ebitdaq / xintq
  
# CASH FLOW:
#   - Free Cash Flow = oancfy - capxy
#   - FCF/Revenue = (oancfy - capxy) / revtq
#   - Operating CF/Net Income = oancfy / niq
#   - Accruals Ratio = (niq - oancfy) / atq
  
# LIQUIDITY:
#   - Quick Ratio = (actq - invtq) / lctq
#   - Cash Ratio = cheq / lctq
#   - Payables Days = (apq / cogsq) * 90
#   - Working Capital Ratio = wcapq / atq
  
# DIVIDEND METRICS:
#   - Payout Ratio = dvy / niq
#   - Cash Flow Payout = dvy / oancfy
#   - Free Cash Flow Coverage = (oancfy - capxy) / dvy
#   - Debt Issuance/Dividends = dltisy / dvy
#   - Financing CF/Dividends = fincfy / dvy
#   - Total Payout = (dvy + dltrq) / oancfy
  
# CAPITAL ALLOCATION:
#   - CapEx/Depreciation = capxy / dpq
#   - Net Borrowing = dltisy - dltrq
  
# SHARE CHANGES:
#   - Dilution Rate = (cshoq - cshopq) / cshopq
  
# GROWTH RATES (quarter-over-quarter or YoY):
#   - Revenue growth
#   - Earnings growth  
#   - FCF growth
#   - Dividend growth (identifies cuts!)
  
# VALUATION:
#   - P/E = prccq / epspxq
#   - P/B = mkvaltq / ceqq
#   - EV/EBITDA (approx) 

