# 8. Daily Inference Pipeline
Generate predictions for the next trading day

In [None]:
import sys
sys.path.append('..')

import pandas as pd
import numpy as np
from datetime import datetime, timedelta
from utils.hopsworks_helpers import get_feature_store, get_model_registry, create_feature_group
import joblib
import yaml

# Load config
with open('../config/config.yaml', 'r') as f:
    config = yaml.safe_load(f)

## Load Models from Registry

In [None]:
mr = get_model_registry()

# Get latest model versions
regressor_model = mr.get_model("qqq_return_regressor", version=1)
classifier_model = mr.get_model("qqq_direction_classifier", version=1)

# Download and load models
regressor_path = regressor_model.download()
classifier_path = classifier_model.download()

regressor = joblib.load(regressor_path)
classifier = joblib.load(classifier_path)

print("Models loaded from Hopsworks Model Registry")

## Get Latest Features

In [None]:
fs = get_feature_store()
feature_view = fs.get_feature_view('qqq_prediction_fv', version=1)

# Get batch data (latest features)
batch_data = feature_view.get_batch_data()

# Get the most recent row (latest trading day)
latest_features = batch_data.tail(1)

print(f"Latest feature date: {latest_features.index[0] if hasattr(latest_features.index[0], 'date') else 'N/A'}")
print(f"Features shape: {latest_features.shape}")
latest_features.head()

## Generate Predictions

In [None]:
# Remove target columns if present
feature_cols = [col for col in latest_features.columns if not col.startswith('target_')]
X_latest = latest_features[feature_cols]

# Regression prediction (return)
predicted_return = regressor.predict(X_latest)[0]

# Classification prediction (direction)
predicted_direction = classifier.predict(X_latest)[0]
predicted_proba = classifier.predict_proba(X_latest)[0, 1]  # Probability of up

print(f"\n===== PREDICTIONS FOR NEXT TRADING DAY =====")
print(f"Predicted Return: {predicted_return:.4f} ({predicted_return*100:.2f}%)")
print(f"Predicted Direction: {'UP' if predicted_direction == 1 else 'DOWN'}")
print(f"Probability of UP: {predicted_proba:.4f} ({predicted_proba*100:.1f}%)")
print(f"Probability of DOWN: {1-predicted_proba:.4f} ({(1-predicted_proba)*100:.1f}%)")

## Feature Importance for This Prediction

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns

# Get feature importance
feature_importance = pd.DataFrame({
    'feature': feature_cols,
    'importance': regressor.feature_importances_,
    'value': X_latest.values[0]
}).sort_values('importance', ascending=False)

# Show top 10 features driving this prediction
print("\nTop 10 Features Driving This Prediction:")
print(feature_importance.head(10).to_string(index=False))

# Plot
plt.figure(figsize=(10, 6))
sns.barplot(data=feature_importance.head(10), x='importance', y='feature')
plt.title('Top 10 Features for Current Prediction')
plt.xlabel('Importance')
plt.tight_layout()
plt.show()

## Save Predictions

In [None]:
# Create prediction record
prediction_date = datetime.now().strftime('%Y-%m-%d')

prediction_record = pd.DataFrame([{
    'prediction_date': prediction_date,
    'predicted_return': predicted_return,
    'predicted_direction': predicted_direction,
    'predicted_proba_up': predicted_proba,
    'model_version_regressor': 1,
    'model_version_classifier': 1
}])

# Add top 5 feature values
for idx, row in feature_importance.head(5).iterrows():
    prediction_record[f'feature_{row["feature"]}'] = row['value']

prediction_record

In [None]:
# Save to Hopsworks as a feature group (for dashboard)
predictions_fg = create_feature_group(
    fs,
    name='qqq_predictions',
    df=prediction_record,
    primary_key=['prediction_date'],
    description='Daily QQQ predictions with model outputs and key features'
)

print(f"\nPrediction saved to Hopsworks for {prediction_date}")

## Historical Prediction Performance (Optional)

In [None]:
# Generate predictions for last 30 days to visualize performance
recent_data = batch_data.tail(30)
X_recent = recent_data[[col for col in recent_data.columns if not col.startswith('target_')]]
y_recent_return = recent_data['target_return'] if 'target_return' in recent_data.columns else None
y_recent_direction = recent_data['target_direction'] if 'target_direction' in recent_data.columns else None

# Predictions
pred_returns = regressor.predict(X_recent)
pred_directions = classifier.predict(X_recent)
pred_probas = classifier.predict_proba(X_recent)[:, 1]

# Create visualization DataFrame
viz_df = pd.DataFrame({
    'predicted_return': pred_returns,
    'predicted_direction': pred_directions,
    'predicted_proba': pred_probas
})

if y_recent_return is not None:
    viz_df['actual_return'] = y_recent_return.values
    viz_df['actual_direction'] = y_recent_direction.values

viz_df.index = recent_data.index

# Plot
if 'actual_return' in viz_df.columns:
    fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(14, 10))
    
    # Returns
    ax1.plot(viz_df.index, viz_df['actual_return'], label='Actual Return', marker='o')
    ax1.plot(viz_df.index, viz_df['predicted_return'], label='Predicted Return', marker='x')
    ax1.axhline(y=0, color='gray', linestyle='--', alpha=0.5)
    ax1.set_title('Predicted vs Actual Returns (Last 30 Days)')
    ax1.set_ylabel('Return')
    ax1.legend()
    ax1.grid(True, alpha=0.3)
    
    # Direction probabilities
    colors = ['red' if d == 0 else 'green' for d in viz_df['actual_direction']]
    ax2.bar(viz_df.index, viz_df['predicted_proba'], color=colors, alpha=0.6)
    ax2.axhline(y=0.5, color='black', linestyle='--', alpha=0.5)
    ax2.set_title('Predicted Probability of UP Movement (Last 30 Days)')
    ax2.set_ylabel('Probability')
    ax2.set_ylim(0, 1)
    ax2.grid(True, alpha=0.3)
    
    plt.tight_layout()
    plt.show()
    
    # Calculate accuracy
    accuracy = (viz_df['predicted_direction'] == viz_df['actual_direction']).mean()
    print(f"\nDirectional Accuracy (Last 30 Days): {accuracy:.2%}")