In [1]:
from sqlalchemy import create_engine

import warnings
warnings.filterwarnings('ignore')

#Data Analysis
import pandas as pd
import numpy as np


ENGINE_URL = "postgresql://postgres:CSDBMS623@localhost:5432/SP500_ML"
engine = create_engine(ENGINE_URL)

sql = """
WITH target_features AS (
  SELECT week_end, ticker_latest, target_gt_median
  FROM sp500_weekly_rollups
  WHERE week_end > DATE '2014-12-31' 
    AND target_gt_median IS NOT NULL
)

SELECT
  -- keys
  t.week_end,
  t.ticker_latest,
  t.target_gt_median,

  -- daily/technical snapshot (last trading day in that week)
  f.date,
  f.adj_close, f.ret_30d, f.ret_180d, f.ret_360d,
  f.rsi_14, f.rsi_9, f.rsi_3,
  f.sma_50, f.sma_100, f.sma_200,
  f.bb_lower, f.bb_middle, f.bb_upper, f.bb_bandwidth, f.bb_percent,
  f.beta_12m, f.mkt_1m, f.mkt_6m, f.mkt_12m,

  /* ================= INCOME STATEMENT ================= */
  -- TTM levels (for margins / yields)
  i.revenue_ttm,
  i.operatingincome_ttm,
  i.netincome_ttm,

  -- TTM growth (signed symmetric)
  i.revenue_ttm_growth           AS sales_ttm_growth_signed,
  i.netincome_ttm_growth         AS netincome_ttm_growth_signed,
  i.operatingincome_ttm_growth   AS opincome_ttm_growth_signed,

  -- Quarterly YoY (same quarter one year ago) — if present in your table
  i.revenue_q_yoy,
  i.operatingincome_q_yoy,
  i.netincome_q_yoy,

  -- TTM margins
  (i.operatingincome_ttm::double precision / NULLIF(i.revenue_ttm::double precision, 0.0))
    AS operating_margin_ttm,
  (i.netincome_ttm::double precision / NULLIF(i.revenue_ttm::double precision, 0.0))
    AS profitability_margin_ttm,

  /* ================= SIZE / VALUATION ================= */
  mcap.size_mcap,
  ev.enterprise_value,

  /* ================= CASH FLOW ================= */
  -- TTM levels
  cf.operatingcashflow_ttm,
  cf.freecashflow_ttm,
  cf.investingcashflow_ttm,

  -- TTM growth (signed)
  cf.operatingcashflow_ttm_growth,
  cf.freecashflow_ttm_growth,
  cf.investingcashflow_ttm_growth,

  -- Quarterly YoY (if present)
  cf.operatingcashflow_q_yoy,
  cf.freecashflow_q_yoy,
  cf.investingcashflow_q_yoy,

  /* ================= BALANCE SHEET ================= */
  -- point-in-time ratios (you computed these in balance_sheets_q)
  bs.current_ratio,
  bs.quick_ratio,
  bs.cash_ratio,
  bs.working_capital,
  bs.working_capital_to_assets,
  bs.inventory_to_current,
  bs.debt_to_equity,
  bs.debt_to_assets,
  bs.net_debt_to_equity,
  bs.liabilities_to_assets,
  bs.equity_ratio,
  bs.lt_debt_to_capital,
  bs.total_debt_to_capital,

  -- leverage set (original + smoothed variations if you stored them)
  bs.financialleverage,
  bs.financialleverage_avg4q,
  bs.financialleverage_yoy,
  bs.debttoequity,
  bs.liabilitiestoequity,
  bs.debttoassets,
  bs.netdebttoequity,

  -- key levels + YoY on core balance sheet lines (if present)
  bs.totalassets,
  bs.totalassets_yoy           AS assets_yoy_growth,
  bs.totalLiabilities          AS total_liabilities_level,
  bs.totalLiabilities_yoy      AS total_liabilities_yoy,
  bs.totalstockholdersequity   AS total_equity_level,
  bs.totalstockholdersequity_yoy AS total_equity_yoy,
  bs.totaldebt,
  bs.netdebt,
  bs.totalcurrentassets,
  bs.totalcurrentliabilities,
  bs.cashandshortterminvestments AS cash_and_st_investments,

  /* ================= DERIVED YIELDS ================= */
  (i.netincome_ttm::double precision / NULLIF(mcap.size_mcap::double precision, 0.0))
    AS ni_ttm_to_mcap,
  (i.revenue_ttm::double precision / NULLIF(ev.enterprise_value::double precision, 0.0))
    AS sales_ttm_to_ev,
  (cf.operatingcashflow_ttm::double precision / NULLIF(mcap.size_mcap::double precision, 0.0))
    AS ocf_ttm_to_mcap,
  (cf.freecashflow_ttm::double precision / NULLIF(mcap.size_mcap::double precision, 0.0))
    AS fcf_ttm_to_mcap,
  (cf.freecashflow_ttm::double precision / NULLIF(ev.enterprise_value::double precision, 0.0))
    AS fcf_ttm_to_ev,

  -- Book-to-market (book equity / market cap)
  CASE
    WHEN mcap.size_mcap > 0 THEN
      ( (bs.totalstockholdersequity - COALESCE(bs.preferredstock, 0))::double precision
        / mcap.size_mcap::double precision )
  END AS book_to_market,

  /* ================= RISK / VOL ================= */
  rvol.vol_1m_ann,
  rvol.vol_6m_ann,
  rvol.vol_12m_ann,

  /* ================= TAGS ================= */
  sec.sector_clean,

    /* ================= Earnings Beats ================= */
  eb.beat_flag,
  eb.beat_mag,

   /* ================= Volatility ================= */
  vb.v_1m, 
  vb.v_6m,
  vb.v_12m,


   /* ================= 10 Year Yields ================= */
  trb.t_1m, 
  trb.t_6m, 
  trb.t_12m

FROM target_features t

LEFT JOIN LATERAL (
  SELECT f.*
  FROM v_sp500_daily_features f
  WHERE f.ticker_latest = t.ticker_latest
    AND f.date <= t.week_end
    AND f.date >  t.week_end - INTERVAL '6 days'
  ORDER BY f.date DESC
  LIMIT 1
) f ON TRUE

LEFT JOIN LATERAL (
  SELECT
    i.revenue_ttm,
    i.operatingincome_ttm,
    i.netincome_ttm,
    i.revenue_ttm_growth,
    i.netincome_ttm_growth,
    i.operatingincome_ttm_growth,
    -- include quarterly YoY if present
    i.revenue_q_yoy,
    i.operatingincome_q_yoy,
    i.netincome_q_yoy
  FROM income_statements_q i
  WHERE i.symbol = t.ticker_latest
    AND t.week_end >= i.date_start
    AND t.week_end <  i.date_end
  LIMIT 1
) i ON TRUE

LEFT JOIN LATERAL (
  SELECT mc.marketcap AS size_mcap
  FROM market_caps_d mc
  WHERE mc.symbol = t.ticker_latest
    AND mc.date <= f.date
  ORDER BY mc.date DESC
  LIMIT 1
) mcap ON TRUE

LEFT JOIN LATERAL (
  SELECT ev.enterprisevalue AS enterprise_value
  FROM enterprise_values_q ev
  WHERE ev.symbol = t.ticker_latest
    AND ev.date <= f.date
  ORDER BY ev.date DESC
  LIMIT 1
) ev ON TRUE

LEFT JOIN LATERAL (
  SELECT
    cf.operatingcashflow_ttm,
    cf.freecashflow_ttm,
    cf.investingcashflow_ttm,
    cf.operatingcashflow_ttm_growth,
    cf.freecashflow_ttm_growth,
    cf.investingcashflow_ttm_growth,
    -- include quarterly YoY if present
    cf.operatingcashflow_q_yoy,
    cf.freecashflow_q_yoy,
    cf.investingcashflow_q_yoy
  FROM cashflow_statements_q cf
  WHERE cf.symbol = t.ticker_latest
    AND t.week_end >= cf.date_start
    AND t.week_end <  cf.date_end
  LIMIT 1
) cf ON TRUE

LEFT JOIN LATERAL (
  SELECT
    -- ratios/levels + YoY that you ingested into balance_sheets_q
    bs.current_ratio,
    bs.quick_ratio,
    bs.cash_ratio,
    bs.working_capital,
    bs.working_capital_to_assets,
    bs.inventory_to_current,
    bs.debt_to_equity,
    bs.debt_to_assets,
    bs.net_debt_to_equity,
    bs.liabilities_to_assets,
    bs.equity_ratio,
    bs.lt_debt_to_capital,
    bs.total_debt_to_capital,

    bs.financialleverage,
    bs.financialleverage_avg4q,
    bs.financialleverage_yoy,
    bs.debttoequity,
    bs.liabilitiestoequity,
    bs.debttoassets,
    bs.netdebttoequity,

    bs.totalassets,
    bs.totalassets_yoy,
    bs.totalLiabilities,
    bs.totalLiabilities_yoy,
    bs.totalstockholdersequity,
    bs.totalstockholdersequity_yoy,
    bs.totaldebt,
    bs.netdebt,
    bs.totalcurrentassets,
    bs.totalcurrentliabilities,
    bs.cashandshortterminvestments,
    bs.preferredstock
  FROM balance_sheets_q bs
  WHERE bs.symbol = t.ticker_latest
    AND t.week_end >= bs.date_start
    AND t.week_end <  bs.date_end
  LIMIT 1
) bs ON TRUE

LEFT JOIN LATERAL (
  SELECT rv.vol_1m_ann, rv.vol_6m_ann, rv.vol_12m_ann
  FROM realized_vol_d rv
  WHERE rv.symbol = t.ticker_latest
    AND rv.date <= f.date
  ORDER BY rv.date DESC
  LIMIT 1
) rvol ON TRUE


LEFT JOIN LATERAL (


  SELECT symbol, date_start, date_end, beat_flag,
  CASE WHEN surprise_pct >= 0.1 then 1 else 0 END AS "beat_mag"
  FROM earnings_surprises_q esq
  WHERE esq.symbol = t.ticker_latest
    AND esq.date <= f.date
  ORDER BY esq.date DESC
  LIMIT 1
) eb ON TRUE


LEFT JOIN LATERAL (


  SELECT date, v_1m, v_6m, v_12m
  FROM vix_momentum_daily vix
  WHERE vix.date = f.date
  ORDER BY vix.date DESC
  LIMIT 1
) vb ON TRUE


LEFT JOIN LATERAL (


  SELECT date, t_1m, t_6m, t_12m
  FROM rates_momentum_daily yld
  WHERE yld.date = f.date
  ORDER BY yld.date DESC
  LIMIT 1
) trb ON TRUE




LEFT JOIN public.v_sp500_sector_clean sec
  ON sec.ticker = t.ticker_latest

ORDER BY t.week_end, t.ticker_latest;



"""

df = pd.read_sql(sql, engine, parse_dates=["date", "week_end"])


In [2]:
df.head()

Unnamed: 0,week_end,ticker_latest,target_gt_median,date,adj_close,ret_30d,ret_180d,ret_360d,rsi_14,rsi_9,...,vol_12m_ann,sector_clean,beat_flag,beat_mag,v_1m,v_6m,v_12m,t_1m,t_6m,t_12m
0,2015-01-02,A,1,2015-01-02,37.195492,-0.03146,-0.02542,0.018369,47.245745,44.904368,...,0.222133,Healthcare,1.0,1.0,0.384436,0.723837,0.250176,-0.070897,-0.198263,-0.288777
1,2015-01-02,AA,1,2015-01-02,35.668671,-0.082081,0.063946,0.520973,48.376589,51.330977,...,0.299963,Basic Materials,,,0.384436,0.723837,0.250176,-0.070897,-0.198263,-0.288777
2,2015-01-02,AAPL,1,2015-01-02,24.261047,-0.046236,0.173564,0.412716,42.667411,37.879188,...,0.216399,Technology,1.0,0.0,0.384436,0.723837,0.250176,-0.070897,-0.198263,-0.288777
3,2015-01-02,ABBV,1,2015-01-02,42.394867,-0.048794,0.148833,0.308545,48.341617,45.572814,...,0.246712,Healthcare,1.0,1.0,0.384436,0.723837,0.250176,-0.070897,-0.198263,-0.288777
4,2015-01-02,ABT,1,2015-01-02,36.580616,-0.007077,0.083329,0.200769,49.998736,45.475581,...,0.153934,Healthcare,1.0,0.0,0.384436,0.723837,0.250176,-0.070897,-0.198263,-0.288777


In [3]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 262243 entries, 0 to 262242
Data columns (total 94 columns):
 #   Column                        Non-Null Count   Dtype         
---  ------                        --------------   -----         
 0   week_end                      262243 non-null  datetime64[ns]
 1   ticker_latest                 262243 non-null  object        
 2   target_gt_median              262243 non-null  int64         
 3   date                          262243 non-null  datetime64[ns]
 4   adj_close                     262243 non-null  float64       
 5   ret_30d                       262182 non-null  float64       
 6   ret_180d                      261723 non-null  float64       
 7   ret_360d                      261155 non-null  float64       
 8   rsi_14                        262162 non-null  float64       
 9   rsi_9                         262178 non-null  float64       
 10  rsi_3                         262192 non-null  float64       
 11  sma_50       