In [48]:
import pandas as pd
import plotly.graph_objs as go
import plotly.subplots as sp
import numpy as np
from sklearn.preprocessing import MinMaxScaler
from keras.models import Sequential
from keras.layers import Input, LSTM, Dense
from sklearn.metrics import mean_squared_error, mean_absolute_error
import matplotlib.pyplot as plt
from tqdm import tqdm

from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
from sklearn.model_selection import TimeSeriesSplit
from skopt import BayesSearchCV

In [50]:
signal_source = pd.read_csv('/Users/evelynli/Documents/Work/ASL/ETF Hedging Data/ETF/processed data/signal_source.csv', index_col= 'Date', parse_dates=True)
signal = pd.read_csv('/Users/evelynli/Documents/Work/ASL/ETF Hedging Data/ETF/processed data/signals.csv', index_col= 'Date', parse_dates=True)
yield_df = pd.read_csv('/Users/evelynli/Documents/Work/ASL/ETF Hedging Data/ETF/processed data/daily-treasury-rates.csv', index_col= 'Date', parse_dates=True)


In [95]:
results = {}
temp = signal_source.loc['2021-01-01':]
idx = signal_source.index.get_loc(temp.index[0])
df = signal_source.iloc[idx-800:]
columns = df.columns
df

Unnamed: 0_level_0,3,84,240,120-60,360-120,24-3
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2017-10-19,0.01,-0.01,-0.02,2.220446e-16,-1.000000e-02,-0.02
2017-10-20,0.01,0.06,0.07,1.000000e-02,0.000000e+00,0.01
2017-10-23,-0.02,-0.02,-0.01,1.000000e-02,1.000000e-02,0.00
2017-10-24,0.03,0.04,0.04,0.000000e+00,-1.000000e-02,-0.01
2017-10-25,0.00,0.02,0.02,1.000000e-02,1.000000e-02,0.01
...,...,...,...,...,...,...
2024-08-06,-0.01,0.13,0.12,1.000000e-02,0.000000e+00,0.11
2024-08-07,0.00,0.06,0.07,0.000000e+00,2.000000e-02,0.01
2024-08-08,0.00,0.04,0.03,-1.000000e-02,-1.000000e-02,0.04
2024-08-09,-0.01,-0.04,-0.05,-2.000000e-02,4.440892e-16,0.02


In [96]:
def calculate_technical_indicators(data, column):
    # Technical indicators calculation, placeholder for the actual implementation
    data = data.copy()

    # Simple Moving Average
    data[f'{column}_SMA_10'] = data[column].rolling(window=10).mean()
    data[f'{column}_SMA_20'] = data[column].rolling(window=20).mean()

    # Exponential Moving Average
    data[f'{column}_EMA_10'] = data[column].ewm(span=10, adjust=False).mean()

    # Relative Strength Index
    delta = data[column].diff(1)
    gain = delta.where(delta > 0, 0).rolling(window=14).mean()
    loss = -delta.where(delta < 0, 0).rolling(window=14).mean()
    data[f'{column}_RSI'] = 100 - (100 / (1 + gain / (loss + 1e-10)))  # Avoid division by zero

    # Bollinger Bands
    data[f'{column}_BB_upper'] = data[f'{column}_SMA_20'] + 2 * data[column].rolling(window=20).std()
    data[f'{column}_BB_lower'] = data[f'{column}_SMA_20'] - 2 * data[column].rolling(window=20).std()

    # Momentum Indicator (10 days)
    data[f'{column}_Momentum'] = data[column].diff(10)

    return data

In [97]:
def rolling_window_forecast_with_indicators(df, window_size=3 * 252):
    results = {}
    predicted_results = {}  # 用于存储每列的预测结果

    # 遍历每一列进行独立预测
    for column in df.columns:
        print(f"Predicting trend for {column}...")

        # 计算当前列的技术指标
        df_with_indicators = calculate_technical_indicators(df, column)
        df_with_indicators.dropna(inplace=True)

        accuracies = []
        predictions = []  # 用于存储预测结果

        # 根据当前列未来变化生成目标标签
        df_with_indicators['Target'] = np.where(df_with_indicators[column].shift(-1) > df_with_indicators[column], 1, np.where(df_with_indicators[column].shift(-1) < df_with_indicators[column], -1, 0))
        df_with_indicators.dropna(inplace=True)

        # 定义贝叶斯优化目标函数
        def optimize_rf(n_estimators, max_depth, min_samples_split, min_samples_leaf):
            n_estimators = int(n_estimators)
            max_depth = int(max_depth)
            min_samples_split = int(min_samples_split)
            min_samples_leaf = int(min_samples_leaf)

            model = RandomForestClassifier(
                n_estimators=n_estimators,
                max_depth=max_depth,
                min_samples_split=min_samples_split,
                min_samples_leaf=min_samples_leaf,
                random_state=42
            )

            # 训练模型并返回测试集准确率
            model.fit(X_train, y_train)
            predictions = model.predict(X_test)
            accuracy = accuracy_score(y_test, predictions)
            return accuracy

        # 定义贝叶斯优化参数空间
        param_bounds = {
            'n_estimators': (50, 200),
            'max_depth': (3, 15),
            'min_samples_split': (2, 10),
            'min_samples_leaf': (1, 5)
        }

        # 滚动窗口预测
        for i in range(len(df_with_indicators) - window_size):
            # 定义滚动窗口的训练集和测试集
            train_data = df_with_indicators.iloc[i:i+window_size]
            test_data = df_with_indicators.iloc[i+window_size:i+window_size+1]

            # 特征和标签
            X_train = train_data.drop('Target', axis=1)
            y_train = train_data['Target']
            X_test = test_data.drop('Target', axis=1)
            y_test = test_data['Target']

            # 数据标准化
            scaler = StandardScaler()
            X_train = scaler.fit_transform(X_train)
            X_test = scaler.transform(X_test)

            # 贝叶斯优化
            optimizer = BayesianOptimization(
                f=optimize_rf,
                pbounds=param_bounds,
                random_state=42,
                verbose=0
            )
            optimizer.maximize(init_points=5, n_iter=15)

            # 使用最优参数进行预测
            best_params = optimizer.max['params']
            model = RandomForestClassifier(
                n_estimators=int(best_params['n_estimators']),
                max_depth=int(best_params['max_depth']),
                min_samples_split=int(best_params['min_samples_split']),
                min_samples_leaf=int(best_params['min_samples_leaf']),
                random_state=42
            )

            model.fit(X_train, y_train)
            prediction = model.predict(X_test)

            # 保存预测值
            predictions.append(prediction[0])

            # 计算准确率
            accuracy = accuracy_score(y_test, prediction)
            accuracies.append(accuracy)
            print(f"Window {i+1}/{len(df_with_indicators) - window_size} - {column} Accuracy: {accuracy:.4f}")

        # 将预测结果保存为与原始列名相同的 DataFrame
        predicted_results[column] = pd.Series(predictions, name=column, index=df_with_indicators.index[-len(predictions):])

        # 保存每列的准确率
        results[column] = accuracies

    # 合并所有列的预测结果
    predicted_df = pd.concat(predicted_results.values(), axis=1)

    return results, predicted_df

# 执行滚动窗口预测并获取预测结果
accuracies, predicted_df = rolling_window_forecast_with_indicators(df)

# 打印各列的平均准确率
for column, acc in accuracies.items():
    print(f"{column} Average Accuracy: {np.mean(acc):.4f}")

# 显示最终的预测结果
print(predicted_df.head())  # 显示前几行预测结果

Predicting trend for 3...
Window 1/929 - 3 Accuracy: 0.0000
Window 2/929 - 3 Accuracy: 1.0000
Window 3/929 - 3 Accuracy: 1.0000
Window 4/929 - 3 Accuracy: 1.0000
Window 5/929 - 3 Accuracy: 0.0000
Window 6/929 - 3 Accuracy: 1.0000
Window 7/929 - 3 Accuracy: 1.0000
Window 8/929 - 3 Accuracy: 1.0000
Window 9/929 - 3 Accuracy: 1.0000
Window 10/929 - 3 Accuracy: 1.0000
Window 11/929 - 3 Accuracy: 1.0000
Window 12/929 - 3 Accuracy: 1.0000
Window 13/929 - 3 Accuracy: 1.0000
Window 14/929 - 3 Accuracy: 1.0000
Window 15/929 - 3 Accuracy: 1.0000
Window 16/929 - 3 Accuracy: 1.0000
Window 17/929 - 3 Accuracy: 1.0000
Window 18/929 - 3 Accuracy: 1.0000
Window 19/929 - 3 Accuracy: 0.0000
Window 20/929 - 3 Accuracy: 1.0000
Window 21/929 - 3 Accuracy: 1.0000
Window 22/929 - 3 Accuracy: 1.0000
Window 23/929 - 3 Accuracy: 1.0000
Window 24/929 - 3 Accuracy: 1.0000
Window 25/929 - 3 Accuracy: 1.0000
Window 26/929 - 3 Accuracy: 0.0000
Window 27/929 - 3 Accuracy: 1.0000
Window 28/929 - 3 Accuracy: 0.0000
Win

In [98]:
for column, acc in accuracies.items():
    print(f"{column} Average Accuracy: {np.mean(acc):.4f}")

3 Average Accuracy: 0.8256
84 Average Accuracy: 0.8224
240 Average Accuracy: 0.8256
120-60 Average Accuracy: 0.8288
360-120 Average Accuracy: 0.7879
24-3 Average Accuracy: 0.8181


In [99]:
predicted_df.to_csv('/Users/evelynli/Documents/Work/ASL/ETF Hedging Data/ETF/processed data/predicted_signals1.csv')