In [None]:
import pandas as pd
df_futures = pd.read_csv('dataset/test/test.csv')

In [None]:
df_futures

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from PyEMD import EMD

# 假设 df_futures 是您的数据
columns_of_interest = ['corn_low', 'corn_high', 'corn_open', 'WTI_close', 
                    'dollar', 'starch_close', 'wheat_close', 
                    'bean2_close', 'beanmeal_close','seed_close']  # 包括金价及其他解释变量

# 提取相关列
df_selected = df_futures[columns_of_interest]

# 初始化EMD对象
emd = EMD()

# 存储分解结果
num_variables = len(columns_of_interest)
num_time_points = df_selected.shape[0]  # 时间序列长度
imfs_list = []

# 对每个变量进行EMD分解
for column in df_selected.columns:
    signal = df_selected[column].values
    imfs = emd.emd(signal)  # 对单个变量分解
    imfs_list.append(imfs)

# 获取IMF分量的最大数量
max_imfs_count = max(imf.shape[0] for imf in imfs_list)

# 将IMF分量对齐并存储为三维数组
# 维度：(IMF数量 × 变量数量 × 时间点数量)
aligned_imfs = np.zeros((max_imfs_count, num_variables, num_time_points))

for var_idx, imfs in enumerate(imfs_list):
    # 将分解结果填充到最大IMF数量
    for imf_idx in range(imfs.shape[0]):
        aligned_imfs[imf_idx, var_idx, :] = imfs[imf_idx]

# 创建图形，大小根据IMF数量调整
fig, axes = plt.subplots(max_imfs_count, 1, figsize=(12, 6 * max_imfs_count))

# 如果IMF分量只有1个，确保axes是一个数组
if max_imfs_count == 1:
    axes = [axes]

# 绘制每个IMF分量下的所有变量
for imf_idx in range(max_imfs_count):
    ax = axes[imf_idx]
    for var_idx in range(num_variables):
        ax.plot(df_selected.index, aligned_imfs[imf_idx, var_idx, :], label=columns_of_interest[var_idx])
    ax.set_title(f"IMF-{imf_idx + 1} (Frequency Scale {imf_idx + 1})")
    ax.set_xlabel("Time")
    ax.set_ylabel("IMF Value")
    ax.legend()
    ax.grid(True)

# 调整布局，使得图像显示不重叠
plt.tight_layout()
plt.show()



In [None]:
# 打印分解结果的形状
print("分解结果维度：", aligned_imfs.shape)

# 将三维数组展平为二维数组，确保形状匹配
flattened_data = aligned_imfs.reshape(num_time_points, -1)

# 生成列名（每个变量的每个IMF分量都有一个列名）
columns = [f"{columns_of_interest[var_idx]}_IMF_{imf_idx+1}" for var_idx in range(num_variables) for imf_idx in range(max_imfs_count)]

# 创建 DataFrame，行对应时间点，列对应每个IMF分量
imf_df = pd.DataFrame(flattened_data, columns=columns)

# 输出IMF分解结果
print(imf_df)

In [None]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import MinMaxScaler

# 假设 aligned_imfs 是您已经得到的三维 IMF 分解结果
# aligned_imfs 形状是 (9, 10, 1683)，其中：
# 9 是 IMF 分量的数量，
# 10 是变量数量，
# 1683 是时间点数量。

# 高频分量（前6个IMF）
high_freq_imfs = aligned_imfs[:6, :, :]

# 趋势分量（后3个IMF）
trend_imfs = aligned_imfs[6:, :, :]

# 创建时间窗口的特征数据集
def create_lstm_dataset(imfs, window=30):
    X, y = [], []
    for i in range(window, imfs.shape[2]):
        X.append(imfs[:, :, i-window:i].reshape(imfs.shape[0]*imfs.shape[1], window).T)
        y.append(imfs[:, :, i].flatten())
    return np.array(X), np.array(y)

# 创建LSTM输入数据
X_high_freq, y_high_freq = create_lstm_dataset(high_freq_imfs)
X_trend, y_trend = create_lstm_dataset(trend_imfs)

# 数据归一化
scaler_high_freq = MinMaxScaler()
scaler_trend = MinMaxScaler()

X_high_freq = scaler_high_freq.fit_transform(X_high_freq.reshape(-1, X_high_freq.shape[-1])).reshape(X_high_freq.shape)
X_trend = scaler_trend.fit_transform(X_trend.reshape(-1, X_trend.shape[-1])).reshape(X_trend.shape)

# 进行时间序列训练集和测试集划分
train_size = int(X_high_freq.shape[0] * 0.7)  # 前70%为训练集
test_size = X_high_freq.shape[0] - train_size  # 后30%为测试集

X_train_high, X_test_high = X_high_freq[:train_size], X_high_freq[train_size:]
y_train_high, y_test_high = y_high_freq[:train_size], y_high_freq[train_size:]

X_train_trend, X_test_trend = X_trend[:train_size], X_trend[train_size:]
y_train_trend, y_test_trend = y_trend[:train_size], y_trend[train_size:]


In [None]:
# from tensorflow.keras.models import Model
# from tensorflow.keras.layers import LSTM, Dense, Attention, Input, Dropout
# 
# # LSTM + Attention 模型
# def create_lstm_attention_model(input_shape):
#     inputs = Input(shape=input_shape)
#     lstm_out = LSTM(64, return_sequences=True)(inputs)
#     attention_out = Attention()([lstm_out, lstm_out])
#     attention_out = Dropout(0.2)(attention_out)
#     attention_out = LSTM(64)(attention_out)
#     outputs = Dense(1)(attention_out)
#     
#     model = Model(inputs, outputs)
#     model.compile(optimizer='adam', loss='mse')
#     return model
# 
# # 为每个高频IMF分量训练LSTM + Attention模型
# models_high_freq = []
# for i in range(high_freq_imfs.shape[0]):
#     model = create_lstm_attention_model(X_train_high.shape[1:])
#     model.fit(X_train_high, y_train_high[:, i], epochs=20, batch_size=32, validation_data=(X_test_high, y_test_high[:, i]))
#     models_high_freq.append(model)

# 改用PyTorch实现
import torch
import torch.nn as nn
import torch.optim as optim

class LSTMAttentionModel(nn.Module):
    def __init__(self, input_size, hidden_size, output_size, dropout_rate=0.2):
        super(LSTMAttentionModel, self).__init__()
        
        self.lstm = nn.LSTM(input_size, hidden_size, batch_first=True)
        self.attention = nn.MultiheadAttention(hidden_size, num_heads=1)  # Simple attention layer
        self.dropout = nn.Dropout(dropout_rate)
        self.fc = nn.Linear(hidden_size, output_size)
        
    def forward(self, x):
        # LSTM output
        lstm_out, (h_n, c_n) = self.lstm(x)
        
        # Attention mechanism
        attn_output, _ = self.attention(lstm_out, lstm_out, lstm_out)
        attn_output = self.dropout(attn_output)
        
        # Reduce sequence length (use the last time step output)
        output = attn_output[:, -1, :]  # Take the last time step of the output
        
        # Fully connected layer for prediction
        output = self.fc(output)
        
        return output

# 训练函数
def train_lstm_attention_model(X_train, y_train, X_test, y_test, input_size, hidden_size, output_size, num_epochs=20, batch_size=32):
    model = LSTMAttentionModel(input_size, hidden_size, output_size)
    model.to(device)  # Make sure to send model to GPU if available

    criterion = nn.MSELoss()
    optimizer = optim.Adam(model.parameters(), lr=0.001)

    for epoch in range(num_epochs):
        model.train()
        for i in range(0, len(X_train), batch_size):
            batch_X = X_train[i:i+batch_size].to(device)
            batch_y = y_train[i:i+batch_size].to(device)
            
            optimizer.zero_grad()
            output = model(batch_X)
            loss = criterion(output, batch_y)
            loss.backward()
            optimizer.step()
        
        # Validation
        model.eval()
        with torch.no_grad():
            val_X = X_test.to(device)
            val_y = y_test.to(device)
            val_output = model(val_X)
            val_loss = criterion(val_output, val_y)
        
        print(f"Epoch {epoch+1}/{num_epochs}, Loss: {loss.item():.4f}, Val Loss: {val_loss.item():.4f}")
    
    return model

# 假设你已经准备好了训练数据并且想为每个高频IMF分量训练模型
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

models_high_freq = []
for i in range(high_freq_imfs.shape[0]):
    model = train_lstm_attention_model(X_train_high, y_train_high[:, i], X_test_high, y_test_high[:, i], 
                                      input_size=X_train_high.shape[2], hidden_size=64, output_size=1)
    models_high_freq.append(model)


In [None]:
# 假设Autoformer模型的代码已实现并可用
from autoformer import Autoformer

# 为每个趋势IMF分量训练Autoformer模型
models_trend = []
for i in range(trend_imfs.shape[0]):
    model = Autoformer() # here you can specify the model's hyperparameters
    model.train() # placeholder for pandas DataFrame with Date and Value columns (A corn_close column is acquired)
    model.predict() # placeholder for the number of days to predict
