In [None]:
import numpy as np
import pandas as pd
from sklearn.svm import SVR
from sklearn.preprocessing import RobustScaler
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from sklearn.inspection import permutation_importance
from skopt import BayesSearchCV
from skopt.space import Real, Integer
from tqdm import tqdm

def train_svr(features, target, window_size=48, test_size=1, n_iter=50):
    
    all_preds = []
    all_actuals = []
    models = []
    scalers = []
    
    total_size = len(features)
    n_steps = (total_size - window_size) // test_size
    
    for i in tqdm(range(n_steps), desc="Moving Window Validation"):
        train_start = i * test_size
        train_end = train_start + window_size
        test_end = train_end + test_size
        
        X_train = features.iloc[train_start:train_end]
        y_train = target.iloc[train_start:train_end]
        X_test = features.iloc[train_end:test_end]
        y_test = target.iloc[train_end:test_end]
        
        scaler = RobustScaler()
        X_train_scaled = scaler.fit_transform(X_train)
        X_test_scaled = scaler.transform(X_test)
        
        opt = BayesSearchCV(
            SVR(kernel='rbf'),
            {
                'C': Real(1e-3, 1e3, prior='log-uniform'),
                'gamma': Real(1e-4, 1e1, prior='log-uniform'),
                'epsilon': Real(0.01, 1.0)
            },
            n_iter=n_iter,
            cv=3,
            n_jobs=-1
        )
        opt.fit(X_train_scaled, y_train)
        best_model = opt.best_estimator_
        
        preds = best_model.predict(X_test_scaled)
        all_preds.extend(preds)
        all_actuals.extend(y_test.values)
        models.append(best_model)
        scalers.append(scaler)
    
    importance = permutation_importance(
        models[-1], scalers[-1].transform(features.iloc[-window_size:]), 
        target.iloc[-window_size:],
        n_repeats=10, random_state=42, n_jobs=-1
    )
    
    return all_preds, all_actuals, models, scalers, importance

def calculate_metrics(actuals, preds):
    rmse = np.sqrt(mean_squared_error(actuals, preds))
    mae = mean_absolute_error(actuals, preds)
    r2 = r2_score(actuals, preds)
    
    actual_changes = np.diff(actuals)
    pred_changes = np.diff(preds)
    direction_acc = np.mean((actual_changes * pred_changes) > 0) * 100
    
    return {
        'RMSE': rmse,
        'MAE': mae,
        'R2': r2,
        'Directional_Accuracy': direction_acc
    }

In [8]:
import os
df = pd.read_csv(os.path.join("data", "AAPL.csv"))

In [None]:
from utils.preprocess import prepare_data
features, target = prepare_data(df, horizon=1)

predictions, actuals, models, scalers, importance = train_svr(
    features, target,
    window_size=48,
    test_size=1,
    n_iter=20
)

metrics = calculate_metrics(actuals, predictions)
print("\nModel Performance:")
for metric, value in metrics.items():
    print(f"{metric}: {value:.4f}")

feature_names = features.columns
importance_df = pd.DataFrame({
    'Feature': feature_names,
    'Mean_Importance': importance.importances_mean,
    'Std_Dev': importance.importances_std
}).sort_values('Mean_Importance', ascending=False)

print("\nTop 10 Important Features:")
print(importance_df.head(10).to_string(index=False))

Moving Window Validation:   0%|          | 5/8202 [02:39<54:36:33, 23.98s/it] 