In [2]:
from sqlalchemy import create_engine

import warnings
warnings.filterwarnings('ignore')

#Data Analysis
import pandas as pd
import numpy as np


ENGINE_URL = "postgresql://postgres:CSDBMS623@localhost:5432/SP500_ML"
engine = create_engine(ENGINE_URL)

sql = """
WITH target_features AS (
  SELECT week_end, ticker_latest, target_gt_median
  FROM sp500_weekly_rollups
  WHERE week_end > DATE '2014-12-31'
    AND target_gt_median IS NOT NULL
)
SELECT
  t.week_end,
  t.ticker_latest,
  t.target_gt_median,
  f.date,  -- last trading day in that week
  f.adj_close, f.ret_30d, f.ret_180d, f.ret_360d,
  f.rsi_14, f.rsi_9, f.rsi_3,
  f.sma_50, f.sma_100, f.sma_200,
  f.bb_lower, f.bb_middle, f.bb_upper, f.bb_bandwidth, f.bb_percent,
  f.beta_12m, f.mkt_1m, f.mkt_6m, f.mkt_12m,

  -- Income statement TTM features
  i.revenue_ttm_growth AS sales_ttm_growth_signed,
  (i.operatingincome_ttm::double precision / NULLIF(i.revenue_ttm::double precision, 0.0))
    AS operating_margin_ttm,
  (i.netincome_ttm::double precision / NULLIF(i.revenue_ttm::double precision, 0.0))
    AS profitability_margin_ttm,
  i.netincome_ttm,

  -- Size / valuation levels
  mcap.size_mcap,
  ev.enterprise_value,

  -- Cashflow TTM levels
  cf.operatingcashflow_ttm,
  cf.freecashflow_ttm,
  cf.investingcashflow_ttm,

  -- Balance sheet features
  bs.financialleverage,
  bs.totalassets_yoy AS assets_yoy_growth,

  -- Book-to-market (book equity / market cap)
  CASE
    WHEN mcap.size_mcap > 0 THEN
      ( (bs.totalstockholdersequity - COALESCE(bs.preferredstock, 0))::double precision
        / mcap.size_mcap::double precision )
  END AS book_to_market,

  -- Yield-style ratios
  (i.netincome_ttm::double precision / NULLIF(mcap.size_mcap::double precision, 0.0))
    AS ni_ttm_to_mcap,
  (i.revenue_ttm::double precision / NULLIF(ev.enterprise_value::double precision, 0.0))
    AS sales_ttm_to_ev,
  (cf.operatingcashflow_ttm::double precision / NULLIF(mcap.size_mcap::double precision, 0.0))
    AS ocf_ttm_to_mcap,
  (cf.freecashflow_ttm::double precision / NULLIF(mcap.size_mcap::double precision, 0.0))
    AS fcf_ttm_to_mcap,
  (cf.freecashflow_ttm::double precision / NULLIF(ev.enterprise_value::double precision, 0.0))
    AS fcf_ttm_to_ev,

  -- Realized volatility (as-of f.date)
  rvol.vol_1m_ann,
  rvol.vol_6m_ann,
  rvol.vol_12m_ann,

  sec.sector_clean
FROM target_features t

LEFT JOIN LATERAL (
  SELECT f.*
  FROM v_sp500_daily_features f
  WHERE f.ticker_latest = t.ticker_latest
    AND f.date <= t.week_end
    AND f.date >  t.week_end - INTERVAL '6 days'
  ORDER BY f.date DESC
  LIMIT 1
) f ON TRUE

LEFT JOIN LATERAL (
  SELECT
    i.revenue_ttm_growth,
    i.revenue_ttm,
    i.operatingincome_ttm,
    i.netincome_ttm
  FROM income_statements_q i
  WHERE i.symbol = t.ticker_latest
    AND t.week_end >= i.date_start
    AND t.week_end <  i.date_end
  LIMIT 1
) i ON TRUE

LEFT JOIN LATERAL (
  SELECT mc.marketcap AS size_mcap
  FROM market_caps_d mc
  WHERE mc.symbol = t.ticker_latest
    AND mc.date <= f.date
  ORDER BY mc.date DESC
  LIMIT 1
) mcap ON TRUE

LEFT JOIN LATERAL (
  SELECT ev.enterprisevalue AS enterprise_value
  FROM enterprise_values_q ev
  WHERE ev.symbol = t.ticker_latest
    AND ev.date <= f.date
  ORDER BY ev.date DESC
  LIMIT 1
) ev ON TRUE

LEFT JOIN LATERAL (
  SELECT
    cf.operatingcashflow_ttm,
    cf.freecashflow_ttm,
    cf.investingcashflow_ttm
  FROM cashflow_statements_q cf
  WHERE cf.symbol = t.ticker_latest
    AND t.week_end >= cf.date_start
    AND t.week_end <  cf.date_end
  LIMIT 1
) cf ON TRUE

LEFT JOIN LATERAL (
  SELECT
    bs.financialleverage,
    bs.totalassets_yoy,
    bs.totalstockholdersequity,
    bs.preferredstock
  FROM balance_sheets_q bs
  WHERE bs.symbol = t.ticker_latest
    AND t.week_end >= bs.date_start
    AND t.week_end <  bs.date_end
  LIMIT 1
) bs ON TRUE

LEFT JOIN LATERAL (
  SELECT rv.vol_1m_ann, rv.vol_6m_ann, rv.vol_12m_ann
  FROM realized_vol_d rv
  WHERE rv.symbol = t.ticker_latest
    AND rv.date <= f.date
  ORDER BY rv.date DESC
  LIMIT 1
) rvol ON TRUE

LEFT JOIN public.v_sp500_sector_clean sec
  ON sec.ticker = t.ticker_latest

ORDER BY t.week_end, t.ticker_latest;


"""

df = pd.read_sql(sql, engine, parse_dates=["date", "week_end"])


In [3]:
df.head()

Unnamed: 0,week_end,ticker_latest,target_gt_median,date,adj_close,ret_30d,ret_180d,ret_360d,rsi_14,rsi_9,...,book_to_market,ni_ttm_to_mcap,sales_ttm_to_ev,ocf_ttm_to_mcap,fcf_ttm_to_mcap,fcf_ttm_to_ev,vol_1m_ann,vol_6m_ann,vol_12m_ann,sector_clean
0,2015-01-02,A,1,2015-01-02,37.195492,-0.03146,-0.02542,0.018369,47.245745,44.904368,...,0.388974,0.040284,0.32005,0.052318,0.037129,0.040006,0.244356,0.215118,0.222133,Healthcare
1,2015-01-02,AA,1,2015-01-02,35.668671,-0.082081,0.063946,0.520973,48.376589,51.330977,...,,,,,,,0.341931,0.298772,0.299963,Basic Materials
2,2015-01-02,AAPL,1,2015-01-02,24.261047,-0.046236,0.173564,0.412716,42.667411,37.879188,...,0.194715,0.070198,0.292517,0.111726,0.094305,0.087449,0.23631,0.204893,0.216399,Technology
3,2015-01-02,ABBV,1,2015-01-02,42.394867,-0.048794,0.148833,0.308545,48.341617,45.572814,...,0.016576,0.01688,0.180194,0.03377,0.027946,0.026515,0.263114,0.249898,0.246712,Healthcare
4,2015-01-02,ABT,1,2015-01-02,36.580616,-0.007077,0.083329,0.200769,49.998736,45.475581,...,0.322028,0.034154,0.296552,0.054978,0.038866,0.036248,0.201521,0.165674,0.153934,Healthcare


In [4]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 262243 entries, 0 to 262242
Data columns (total 44 columns):
 #   Column                    Non-Null Count   Dtype         
---  ------                    --------------   -----         
 0   week_end                  262243 non-null  datetime64[ns]
 1   ticker_latest             262243 non-null  object        
 2   target_gt_median          262243 non-null  int64         
 3   date                      262243 non-null  datetime64[ns]
 4   adj_close                 262243 non-null  float64       
 5   ret_30d                   262182 non-null  float64       
 6   ret_180d                  261723 non-null  float64       
 7   ret_360d                  261155 non-null  float64       
 8   rsi_14                    262162 non-null  float64       
 9   rsi_9                     262178 non-null  float64       
 10  rsi_3                     262192 non-null  float64       
 11  sma_50                    262058 non-null  float64       
 12  sm