# Hull Tactical - Submission v4

**Uses artifacts_v4** with:
- More aggressive position sizing (scale_factor 120, bounds [0.0, 2.0])
- Lower risk aversion (35 vs 50)
- PROMETHEUS features trained into models
- Deeper trees, more iterations

In [None]:
import os
import pickle
import warnings
from pathlib import Path

import numpy as np
import pandas as pd
import polars as pl
import lightgbm as lgb
import xgboost as xgb
from sklearn.preprocessing import RobustScaler

warnings.filterwarnings('ignore')

# Paths - try v4 artifacts first
if os.path.exists('/kaggle/input/hull-tactical-market-prediction'):
    DATA_DIR = Path('/kaggle/input/hull-tactical-market-prediction')
    # Try multiple artifact locations
    for artifact_name in ['hull-artifacts-v4', 'hull-artifacts', 'hull-submission-artifacts']:
        test_path = Path(f'/kaggle/input/{artifact_name}')
        if test_path.exists():
            ARTIFACTS_DIR = test_path
            break
    else:
        ARTIFACTS_DIR = Path('/kaggle/working')
else:
    DATA_DIR = Path('/home/user/aimo3/hull/hull-tactical-market-prediction')
    ARTIFACTS_DIR = Path('/home/user/aimo3/hull/artifacts_v4')

print(f"Data: {DATA_DIR}")
print(f"Artifacts: {ARTIFACTS_DIR}")

In [None]:
# Global inference state
class InferenceState:
    scaler = None
    feature_cols = None
    lgb_models = None
    xgb_models = None
    config = None
    recent_data = None
    history = []
    initialized = False


def initialize():
    if InferenceState.initialized:
        return
    
    print("Loading V4 artifacts...")
    
    with open(ARTIFACTS_DIR / 'scaler.pkl', 'rb') as f:
        InferenceState.scaler = pickle.load(f)
    
    with open(ARTIFACTS_DIR / 'feature_cols.pkl', 'rb') as f:
        InferenceState.feature_cols = pickle.load(f)
    
    with open(ARTIFACTS_DIR / 'lgb_models.pkl', 'rb') as f:
        InferenceState.lgb_models = pickle.load(f)
    
    InferenceState.xgb_models = []
    i = 0
    while (ARTIFACTS_DIR / f'xgb_model_{i}.json').exists():
        model = xgb.Booster()
        model.load_model(str(ARTIFACTS_DIR / f'xgb_model_{i}.json'))
        InferenceState.xgb_models.append(model)
        i += 1
    
    with open(ARTIFACTS_DIR / 'config.pkl', 'rb') as f:
        InferenceState.config = pickle.load(f)
    
    InferenceState.recent_data = pd.read_parquet(ARTIFACTS_DIR / 'recent_data.parquet')
    
    InferenceState.initialized = True
    print(f"Loaded {len(InferenceState.lgb_models)} LGB + {len(InferenceState.xgb_models)} XGB")
    print(f"Config: {InferenceState.config}")
    print(f"Features: {len(InferenceState.feature_cols)}")

In [None]:
# ============================================================================
# PROMETHEUS FEATURE ENGINEERING (same as training)
# ============================================================================

def add_prometheus_features(df: pd.DataFrame) -> pd.DataFrame:
    """Add PROMETHEUS features - must match training."""
    df = df.copy()
    
    # === BASIC ROLLING FEATURES ===
    key_cols = ['V1', 'V2', 'V3', 'M1', 'M2', 'S1', 'S2', 'E1', 'P1', 'I1']
    key_cols = [c for c in key_cols if c in df.columns]
    
    for col in key_cols:
        for window in [5, 21, 63]:
            if len(df) >= window:
                df[f'{col}_ma{window}'] = df[col].rolling(window, min_periods=1).mean()
                df[f'{col}_std{window}'] = df[col].rolling(window, min_periods=1).std().fillna(0)
    
    # === LAGGED RETURNS ===
    if 'lagged_forward_returns' in df.columns:
        df['lagged_ret'] = df['lagged_forward_returns']
    elif 'forward_returns' in df.columns:
        df['lagged_ret'] = df['forward_returns'].shift(1).fillna(0)
    else:
        df['lagged_ret'] = 0
    
    for w in [5, 10, 21, 63]:
        if len(df) >= w:
            df[f'ret_cumsum_{w}'] = df['lagged_ret'].rolling(w, min_periods=1).sum()
            df[f'ret_vol_{w}'] = df['lagged_ret'].rolling(w, min_periods=1).std().fillna(0)
            df[f'sharpe_{w}'] = df[f'ret_cumsum_{w}'] / (df[f'ret_vol_{w}'] * np.sqrt(w) + 1e-8)
    
    # === TIER 1: PHASE TRANSITION ===
    if len(df) >= 63:
        df['var_21'] = df['lagged_ret'].rolling(21, min_periods=5).var().fillna(0)
        df['var_63'] = df['lagged_ret'].rolling(63, min_periods=10).var().fillna(0)
        df['var_ratio'] = df['var_21'] / (df['var_63'] + 1e-8)
        df['var_compression'] = (df['var_ratio'] < 0.5).astype(float)
    else:
        df['var_21'] = df['var_63'] = df['var_ratio'] = 0
        df['var_compression'] = 0
    
    # Critical slowing down
    if len(df) >= 63:
        def calc_ac1(x):
            if len(x) < 10:
                return 0
            try:
                return np.corrcoef(x[:-1], x[1:])[0, 1]
            except:
                return 0
        df['ac1'] = df['lagged_ret'].rolling(63, min_periods=10).apply(calc_ac1, raw=True).fillna(0)
        df['ac1_ma21'] = df['ac1'].rolling(21, min_periods=1).mean()
        df['ac1_rising'] = (df['ac1'] > df['ac1_ma21']).astype(float)
    else:
        df['ac1'] = df['ac1_ma21'] = df['ac1_rising'] = 0
    
    # === TIER 2: TEMPERATURE & COHERENCE ===
    v_cols = [c for c in df.columns if c.startswith('V') and c[1:].isdigit()]
    m_cols = [c for c in df.columns if c.startswith('M') and c[1:].isdigit()]
    s_cols = [c for c in df.columns if c.startswith('S') and c[1:].isdigit()]
    e_cols = [c for c in df.columns if c.startswith('E') and c[1:].isdigit()]
    i_cols = [c for c in df.columns if c.startswith('I') and c[1:].isdigit()]
    
    if v_cols:
        df['v_mean'] = df[v_cols].mean(axis=1)
        df['v_std'] = df[v_cols].std(axis=1)
        df['temperature'] = df['v_std'] / (df['v_mean'].abs() + 1e-8)
    else:
        df['v_mean'] = df['v_std'] = df['temperature'] = 0
    
    for prefix, cols in [('V', v_cols), ('M', m_cols), ('S', s_cols)]:
        if len(cols) >= 2:
            zscores = (df[cols] - df[cols].mean()) / (df[cols].std() + 1e-8)
            df[f'{prefix}_order'] = 1 - zscores.var(axis=1).fillna(1)
        else:
            df[f'{prefix}_order'] = 0
    
    if v_cols:
        df['vol_regime'] = df[v_cols].mean(axis=1)
        df['vol_regime_ma21'] = df['vol_regime'].rolling(21, min_periods=1).mean()
        df['vol_expanding'] = (df['vol_regime'] > df['vol_regime_ma21']).astype(float)
    else:
        df['vol_regime'] = df['vol_regime_ma21'] = df['vol_expanding'] = 0
    
    # === TIER 3: INTERACTIONS ===
    if s_cols:
        df['sent_mean'] = df[s_cols].mean(axis=1)
        df['sent_vol_interact'] = df['sent_mean'] / (df['vol_regime'] + 1e-8)
    else:
        df['sent_mean'] = df['sent_vol_interact'] = 0
    
    if 'ret_cumsum_21' in df.columns:
        df['cum_ret_std'] = df['ret_cumsum_21'].rolling(63, min_periods=1).std().fillna(0)
        df['momentum_strong'] = (df['ret_cumsum_21'].abs() > df['cum_ret_std']).astype(float)
    else:
        df['cum_ret_std'] = df['momentum_strong'] = 0
    
    if e_cols:
        df['econ_mean'] = df[e_cols].mean(axis=1)
        df['econ_momentum'] = df['econ_mean'].diff(5).fillna(0)
        df['econ_surprise'] = df['econ_momentum'] - df['econ_momentum'].rolling(63, min_periods=1).mean()
    else:
        df['econ_mean'] = df['econ_momentum'] = df['econ_surprise'] = 0
    
    if len(i_cols) >= 3 and 'I3' in df.columns and 'I1' in df.columns:
        df['rate_slope'] = df['I3'] - df['I1']
        df['rate_slope_pct'] = df['rate_slope'].rolling(63, min_periods=1).rank(pct=True).fillna(0.5)
        df['rate_inverting'] = (df['rate_slope_pct'] < 0.1).astype(float)
    else:
        df['rate_slope'] = 0
        df['rate_slope_pct'] = 0.5
        df['rate_inverting'] = 0
    
    # === TIER 4: CROSS-DOMAIN ===
    if v_cols and m_cols and s_cols and len(df) >= 21:
        vm = df[v_cols].mean(axis=1)
        mm = df[m_cols].mean(axis=1)
        sm = df[s_cols].mean(axis=1)
        
        corr_vm = vm.rolling(21, min_periods=5).corr(mm).fillna(0)
        corr_vs = vm.rolling(21, min_periods=5).corr(sm).fillna(0)
        corr_ms = mm.rolling(21, min_periods=5).corr(sm).fillna(0)
        
        avg_corr = (abs(corr_vm) + abs(corr_vs) + abs(corr_ms)) / 3
        df['cross_domain_corr'] = avg_corr
        df['correlation_surge'] = (avg_corr > 0.7).astype(float)
    else:
        df['cross_domain_corr'] = 0
        df['correlation_surge'] = 0
    
    return df

In [None]:
def predict(test: pl.DataFrame) -> float:
    """V4 prediction with trained PROMETHEUS features."""
    if not InferenceState.initialized:
        initialize()
    
    test_pd = test.to_pandas()
    
    # Update history
    InferenceState.history.append(test_pd)
    if len(InferenceState.history) > 300:
        InferenceState.history = InferenceState.history[-300:]
    
    # Combine with historical data
    if len(InferenceState.history) < 63:
        n_needed = 300 - len(InferenceState.history)
        combined = pd.concat(
            [InferenceState.recent_data.tail(n_needed)] + InferenceState.history,
            ignore_index=True
        )
    else:
        combined = pd.concat(InferenceState.history, ignore_index=True)
    
    # Add PROMETHEUS features
    combined = add_prometheus_features(combined)
    
    # Ensure all feature columns exist
    for col in InferenceState.feature_cols:
        if col not in combined.columns:
            combined[col] = 0
    
    # Get features and scale
    X = combined[InferenceState.feature_cols].iloc[[-1]].fillna(0)
    X_scaled = pd.DataFrame(
        InferenceState.scaler.transform(X),
        columns=InferenceState.feature_cols
    )
    
    # Ensemble predictions
    predictions = []
    
    for model in InferenceState.lgb_models:
        pred = model.predict(X_scaled)[0]
        predictions.append(pred)
    
    dtest = xgb.DMatrix(X_scaled)
    for model in InferenceState.xgb_models:
        pred = model.predict(dtest)[0]
        predictions.append(pred)
    
    predictions = np.array(predictions)
    mean_pred = predictions.mean()
    std_pred = predictions.std()
    
    # Position sizing with V4 config (more aggressive)
    cfg = InferenceState.config
    uncertainty = max(std_pred, 1e-5)
    kelly = mean_pred / (cfg['risk_aversion'] * uncertainty**2 + 1e-8)
    position = cfg['base_position'] + cfg['scale_factor'] * kelly
    position = np.clip(position, cfg['min_position'], cfg['max_position'])
    
    return float(position)

In [None]:
# Inference server setup
if os.getenv('KAGGLE_IS_COMPETITION_RERUN'):
    import kaggle_evaluation.default_inference_server
    inference_server = kaggle_evaluation.default_inference_server.DefaultInferenceServer(predict)
    print("V4 inference server ready.")
else:
    print("Skipping inference server setup (not in competition rerun)")

In [None]:
if os.getenv('KAGGLE_IS_COMPETITION_RERUN'):
    inference_server.serve()
else:
    # Local validation
    print("Local validation mode...")
    initialize()
    print(f"\nV4 Config:")
    for k, v in InferenceState.config.items():
        print(f"  {k}: {v}")
    print(f"\nFeatures: {len(InferenceState.feature_cols)}")
    print("\nV4 validation passed!")