# 6. Create Feature View
Join all feature groups and create targets for model training

In [None]:
import sys
sys.path.append('..')

import pandas as pd
from utils.hopsworks_helpers import get_feature_store
import yaml

# Load config
with open('../config/config.yaml', 'r') as f:
    config = yaml.safe_load(f)

## Load All Feature Groups

In [None]:
fs = get_feature_store()

# Get all feature groups
qqq_tech_fg = fs.get_feature_group('qqq_technical_features', version=1)
xlk_sector_fg = fs.get_feature_group('xlk_sector_features', version=1)
vix_vol_fg = fs.get_feature_group('vix_volatility_features', version=1)
macro_fg = fs.get_feature_group('macro_features', version=1)
sentiment_fg = fs.get_feature_group('sentiment_features', version=1)

# Also need raw QQQ for target creation
qqq_raw_fg = fs.get_feature_group('qqq_raw', version=1)

print("All feature groups loaded")

## Join Feature Groups

In [None]:
# Read all feature groups
qqq_tech = qqq_tech_fg.read()
xlk_sector = xlk_sector_fg.read()
vix_vol = vix_vol_fg.read()
macro = macro_fg.read()
sentiment = sentiment_fg.read()
qqq_raw = qqq_raw_fg.read()

# Merge all on date
features = qqq_tech.merge(xlk_sector, on='date', how='left')
features = features.merge(vix_vol, on='date', how='left')
features = features.merge(macro, on='date', how='left')
features = features.merge(sentiment, on='date', how='left')

# Add raw QQQ close for target calculation
features = features.merge(qqq_raw[['date', 'qqq_close']], on='date', how='left')

# Sort by date
features = features.sort_values('date').reset_index(drop=True)

print(f"Combined features shape: {features.shape}")
print(f"Columns: {features.columns.tolist()}")
features.head()

## Create Target Variables

In [None]:
# Target 1: Next-day return (regression)
features['target_return'] = features['qqq_close'].pct_change().shift(-1)

# Target 2: Binary up/down (classification)
features['target_direction'] = (features['target_return'] > 0).astype(int)

# Drop rows with missing targets (first row and last row)
features_with_targets = features.dropna(subset=['target_return'])

print(f"\nFeatures with targets shape: {features_with_targets.shape}")
print(f"Target distribution:\n{features_with_targets['target_direction'].value_counts()}")
features_with_targets.head()

## Handle Missing Values

In [None]:
# Check missing values
missing = features_with_targets.isnull().sum()
print("Missing values per column:")
print(missing[missing > 0])

# For sentiment features, fill missing with 0 (no news that day)
sentiment_cols = ['sentiment_mean', 'sentiment_std', 'article_count', 'positive_mean', 'negative_mean', 'neutral_mean']
for col in sentiment_cols:
    if col in features_with_targets.columns:
        features_with_targets[col] = features_with_targets[col].fillna(0)

# Drop remaining rows with missing values (due to rolling windows at the start)
features_clean = features_with_targets.dropna()

print(f"\nClean features shape: {features_clean.shape}")
print(f"Date range: {features_clean['date'].min()} to {features_clean['date'].max()}")

## Create Feature View in Hopsworks

In [None]:
# First, create a feature group with the combined data including targets
from utils.hopsworks_helpers import create_feature_group

combined_fg = create_feature_group(
    fs,
    name='qqq_combined_features',
    df=features_clean,
    primary_key=['date'],
    description='Combined feature set with all features and targets'
)

print("Combined feature group created!")

In [None]:
# Create feature view for training
# Select all features except date and raw price
feature_cols = [col for col in features_clean.columns 
                if col not in ['date', 'qqq_close']]

query = combined_fg.select(feature_cols)

feature_view = fs.create_feature_view(
    name='qqq_prediction_fv',
    version=1,
    query=query,
    labels=['target_return', 'target_direction']
)

print("Feature view created successfully!")
print(f"Features: {len([col for col in feature_cols if not col.startswith('target_')])}")
print(f"Targets: target_return (regression), target_direction (classification)")