# Hull Tactical - SUBMISSION (Live Phase Ready)

**This notebook submits predictions using pre-trained artifacts.**

Attach dataset: `ryancardwell/hull-artifacts-v3` (or your latest artifacts)

In [None]:
import os
import pickle
import warnings
from pathlib import Path

import numpy as np
import pandas as pd
import polars as pl
import lightgbm as lgb
import xgboost as xgb

warnings.filterwarnings('ignore')
np.random.seed(42)

print("Hull Tactical Submission - LIVE PHASE READY")

In [None]:
# Find artifacts directory
ARTIFACTS_DIRS = [
    '/kaggle/input/hull-artifacts-v3',
    '/kaggle/input/hull-artifacts-v4', 
    '/kaggle/input/hull-artifacts-v5',
    '/kaggle/input/hull-artifacts',
]

ARTIFACTS_DIR = None
for d in ARTIFACTS_DIRS:
    if os.path.exists(d):
        ARTIFACTS_DIR = Path(d)
        break

if ARTIFACTS_DIR is None:
    raise FileNotFoundError("No artifacts found! Attach hull-artifacts dataset.")

print(f"Using artifacts: {ARTIFACTS_DIR}")
print(f"Contents: {list(ARTIFACTS_DIR.iterdir())}")

In [None]:
# Load artifacts
with open(ARTIFACTS_DIR / 'scaler.pkl', 'rb') as f:
    scaler = pickle.load(f)

with open(ARTIFACTS_DIR / 'feature_cols.pkl', 'rb') as f:
    feature_cols = pickle.load(f)

with open(ARTIFACTS_DIR / 'lgb_models.pkl', 'rb') as f:
    lgb_models = pickle.load(f)

with open(ARTIFACTS_DIR / 'config.pkl', 'rb') as f:
    CONFIG = pickle.load(f)

# Load XGB models
xgb_models = []
for i in range(len(lgb_models)):
    xgb_path = ARTIFACTS_DIR / f'xgb_model_{i}.json'
    if xgb_path.exists():
        model = xgb.Booster()
        model.load_model(str(xgb_path))
        xgb_models.append(model)

# Load recent historical data for feature computation
recent_data = pd.read_parquet(ARTIFACTS_DIR / 'recent_data.parquet')

print(f"Loaded {len(lgb_models)} LGB + {len(xgb_models)} XGB models")
print(f"Features: {len(feature_cols)}")
print(f"Config: {CONFIG}")

In [None]:
# Feature engineering (simplified for inference)
def add_features_for_inference(df: pd.DataFrame, history: pd.DataFrame) -> pd.DataFrame:
    """Add features using historical context."""
    df = df.copy()
    
    # Combine with history for rolling calculations
    combined = pd.concat([history, df], ignore_index=True)
    
    key_cols = ['V1', 'V2', 'V3', 'V4', 'V5', 'M1', 'M2', 'M3', 'S1', 'S2', 'S3', 'E1', 'E2', 'P1', 'I1', 'I2']
    key_cols = [c for c in key_cols if c in combined.columns]
    
    for col in key_cols:
        for window in [5, 10, 21, 63]:
            combined[f'{col}_ma{window}'] = combined[col].rolling(window, min_periods=1).mean()
            combined[f'{col}_std{window}'] = combined[col].rolling(window, min_periods=1).std().fillna(0)
            combined[f'{col}_z{window}'] = (combined[col] - combined[f'{col}_ma{window}']) / (combined[f'{col}_std{window}'] + 1e-8)
    
    # Lagged returns
    if 'lagged_forward_returns' in combined.columns:
        combined['lagged_ret'] = combined['lagged_forward_returns']
    else:
        combined['lagged_ret'] = 0
    
    for w in [5, 10, 21, 63, 126]:
        combined[f'ret_cumsum_{w}'] = combined['lagged_ret'].rolling(w, min_periods=1).sum()
        combined[f'ret_vol_{w}'] = combined['lagged_ret'].rolling(w, min_periods=1).std().fillna(0)
        combined[f'sharpe_{w}'] = combined[f'ret_cumsum_{w}'] / (combined[f'ret_vol_{w}'] * np.sqrt(w) + 1e-8)
    
    # Volatility features
    v_cols = [c for c in combined.columns if c.startswith('V') and c[1:].isdigit()]
    if v_cols:
        combined['v_mean'] = combined[v_cols].mean(axis=1)
        combined['v_std'] = combined[v_cols].std(axis=1)
        combined['temperature'] = combined['v_std'] / (combined['v_mean'].abs() + 1e-8)
        combined['vol_regime'] = combined[v_cols].mean(axis=1)
        combined['vol_regime_ma21'] = combined['vol_regime'].rolling(21, min_periods=1).mean()
    
    # Return only the new rows
    result = combined.tail(len(df)).reset_index(drop=True)
    return result

print("Feature engineering ready.")

In [None]:
# Ensemble prediction functions
def dempster_shafer_fusion(predictions, reliabilities=None):
    if reliabilities is None:
        reliabilities = np.ones(len(predictions))
    weights = reliabilities / reliabilities.sum()
    fused = np.sum(predictions * weights)
    
    # Conflict
    n = len(predictions)
    conflict = 0.0
    for i in range(n):
        for j in range(i+1, n):
            if np.sign(predictions[i]) != np.sign(predictions[j]):
                conflict += reliabilities[i] * reliabilities[j]
    conflict /= max(1, n * (n-1) / 2)
    
    return fused, 1 - conflict, conflict

print("Ensemble functions ready.")

In [None]:
# Main prediction function
history_buffer = recent_data.copy()

def predict(test: pl.DataFrame) -> pl.DataFrame:
    global history_buffer
    
    # Convert to pandas
    test_pd = test.to_pandas()
    
    # Add features using history
    test_features = add_features_for_inference(test_pd, history_buffer)
    
    # Prepare feature matrix
    X = test_features[feature_cols].fillna(0)
    
    # Handle missing columns
    for col in feature_cols:
        if col not in X.columns:
            X[col] = 0
    X = X[feature_cols]
    
    # Scale
    X_scaled = pd.DataFrame(scaler.transform(X), columns=feature_cols)
    
    # Get predictions from all models
    predictions = []
    reliabilities = []
    
    for model in lgb_models:
        pred = model.predict(X_scaled)
        predictions.append(pred[0] if len(pred) == 1 else pred.mean())
        reliabilities.append(0.9)
    
    dmatrix = xgb.DMatrix(X_scaled)
    for model in xgb_models:
        pred = model.predict(dmatrix)
        predictions.append(pred[0] if len(pred) == 1 else pred.mean())
        reliabilities.append(0.85)
    
    predictions = np.array(predictions)
    reliabilities = np.array(reliabilities)
    
    # Dempster-Shafer fusion
    fused_pred, confidence, conflict = dempster_shafer_fusion(predictions, reliabilities)
    
    # Position sizing
    std_pred = np.std(predictions)
    uncertainty = max(std_pred, 1e-5)
    kelly = fused_pred / (CONFIG['risk_aversion'] * uncertainty**2 + 1e-8)
    base_pos = CONFIG['base_position'] + CONFIG['scale_factor'] * kelly
    
    # Risk adjustment based on confidence
    risk_factor = confidence
    if conflict > 0.5:
        risk_factor *= CONFIG.get('high_conflict_factor', 0.6)
    
    position = CONFIG['base_position'] + (base_pos - CONFIG['base_position']) * risk_factor
    position = np.clip(position, CONFIG['min_position'], CONFIG['max_position'])
    
    # Update history buffer
    history_buffer = pd.concat([history_buffer, test_pd], ignore_index=True).tail(500)
    
    return test.with_columns(pl.lit(float(position)).alias('prediction'))

print("Prediction function ready.")

In [None]:
# Inference server
import kaggle_evaluation.default_inference_server as kes

print("Starting inference server...")
inference_server = kes.DefaultInferenceServer(predict)

if os.getenv('KAGGLE_IS_COMPETITION_RERUN'):
    print("Competition mode - serving predictions")
    inference_server.serve()
else:
    print("Local mode - running validation")
    inference_server.run_local_gateway(
        ('/kaggle/input/hull-tactical-market-prediction/',)
    )