# RNN 时间序列预测演示

本Notebook展示了使用RNN进行时间序列预测的完整流程：
1. 数据集的梳理和预处理
2. 模型构建和训练
3. 模型预测
4. 模型评估和可视化

**注意**: 运行前请确保已安装所有依赖包：`pip install -r requirements.txt`

In [None]:
# 导入必要的库
import numpy as np
import matplotlib.pyplot as plt
import torch
import warnings
warnings.filterwarnings('ignore')

# 导入自定义模块
from data_processor import DataProcessor
from rnn_model import SimpleRNN, LSTMModel, RNNTrainer

# 设置中文字体
plt.rcParams['font.sans-serif'] = ['SimHei', 'DejaVu Sans']
plt.rcParams['axes.unicode_minus'] = False

# 设置随机种子
np.random.seed(42)
torch.manual_seed(42)

print("环境设置完成！")

## 第一步：数据集的梳理和预处理

In [None]:
# 创建数据处理器
sequence_length = 20
processor = DataProcessor(sequence_length=sequence_length)

# 生成示例时间序列数据
print("1.1 生成示例数据...")
data = processor.generate_sample_data(n_samples=1000)

# 显示数据基本信息
info = processor.get_data_info()
print("\n数据集基本信息:")
for key, value in info.items():
    print(f"  {key}: {value}")

In [None]:
# 数据归一化
print("1.2 数据归一化...")
scaled_data = processor.normalize_data()

# 可视化原始数据
processor.visualize_data(n_points=300)

In [None]:
# 创建时间序列数据集
print("1.3 创建时间序列数据集...")
X, y = processor.create_sequences()

# 分割数据集
print("1.4 分割数据集...")
X_temp, X_test, y_temp, y_test = processor.split_data(X, y, test_size=0.2, random_state=42)
X_train, X_val, y_train, y_val = processor.split_data(X_temp, y_temp, test_size=0.25, random_state=42)

print(f"  训练集: {X_train.shape[0]} 样本")
print(f"  验证集: {X_val.shape[0]} 样本")
print(f"  测试集: {X_test.shape[0]} 样本")

## 第二步：模型构建和训练

In [None]:
# 训练配置
config = {
    'input_size': 1,
    'hidden_size': 64,
    'num_layers': 2,
    'output_size': 1,
    'dropout': 0.2,
    'epochs': 50,  # Notebook中使用较少的轮数
    'batch_size': 32,
    'learning_rate': 0.001,
    'patience': 10
}

print("模型配置:")
for key, value in config.items():
    print(f"  {key}: {value}")

In [None]:
# 创建和训练Simple RNN模型
print("训练 Simple RNN 模型...")

rnn_model = SimpleRNN(
    input_size=config['input_size'],
    hidden_size=config['hidden_size'],
    num_layers=config['num_layers'],
    output_size=config['output_size'],
    dropout=config['dropout']
)

rnn_trainer = RNNTrainer(rnn_model)

# 显示模型参数数量
total_params = sum(p.numel() for p in rnn_model.parameters())
print(f"模型参数数量: {total_params:,}")

In [None]:
# 训练RNN模型
rnn_trainer.train(
    X_train, y_train, X_val, y_val,
    epochs=config['epochs'],
    batch_size=config['batch_size'],
    learning_rate=config['learning_rate'],
    patience=config['patience'],
    save_path='best_rnn_model.pth'
)

In [None]:
# 绘制RNN训练历史
rnn_trainer.plot_training_history()

In [None]:
# 创建和训练LSTM模型
print("训练 LSTM 模型...")

lstm_model = LSTMModel(
    input_size=config['input_size'],
    hidden_size=config['hidden_size'],
    num_layers=config['num_layers'],
    output_size=config['output_size'],
    dropout=config['dropout']
)

lstm_trainer = RNNTrainer(lstm_model)

# 训练LSTM模型
lstm_trainer.train(
    X_train, y_train, X_val, y_val,
    epochs=config['epochs'],
    batch_size=config['batch_size'],
    learning_rate=config['learning_rate'],
    patience=config['patience'],
    save_path='best_lstm_model.pth'
)

In [None]:
# 绘制LSTM训练历史
lstm_trainer.plot_training_history()

## 第三步：模型预测和评估

In [None]:
# 评估RNN模型
print("评估 Simple RNN 模型性能...")
rnn_metrics, rnn_pred = rnn_trainer.evaluate(X_test, y_test)

# 评估LSTM模型
print("\n评估 LSTM 模型性能...")
lstm_metrics, lstm_pred = lstm_trainer.evaluate(X_test, y_test)

In [None]:
# 模型性能对比
import pandas as pd

comparison_df = pd.DataFrame({
    'Simple RNN': [rnn_metrics['MSE'], rnn_metrics['RMSE'], rnn_metrics['MAE'], rnn_metrics['MAPE']],
    'LSTM': [lstm_metrics['MSE'], lstm_metrics['RMSE'], lstm_metrics['MAE'], lstm_metrics['MAPE']]
}, index=['MSE', 'RMSE', 'MAE', 'MAPE'])

print("模型性能对比:")
print(comparison_df)

In [None]:
# 可视化RNN预测结果
y_test_original = processor.inverse_transform(y_test)
rnn_pred_original = processor.inverse_transform(rnn_pred)

print("RNN 预测结果可视化:")
rnn_trainer.plot_predictions(y_test_original.flatten(), rnn_pred_original.flatten(), n_points=150)

In [None]:
# 可视化LSTM预测结果
lstm_pred_original = processor.inverse_transform(lstm_pred)

print("LSTM 预测结果可视化:")
lstm_trainer.plot_predictions(y_test_original.flatten(), lstm_pred_original.flatten(), n_points=150)

## 第四步：未来预测演示

In [None]:
def predict_future(trainer, last_sequence, steps):
    """进行多步未来预测"""
    predictions = []
    current_sequence = last_sequence.copy()
    
    for _ in range(steps):
        # 预测下一个值
        next_pred = trainer.predict(current_sequence)
        predictions.append(next_pred[0])
        
        # 更新序列
        new_sequence = np.zeros_like(current_sequence)
        new_sequence[0, :-1, :] = current_sequence[0, 1:, :]
        new_sequence[0, -1, 0] = next_pred[0]
        current_sequence = new_sequence
    
    return np.array(predictions)

# 选择最佳模型进行未来预测
best_model_name = 'LSTM' if lstm_metrics['RMSE'] < rnn_metrics['RMSE'] else 'Simple RNN'
best_trainer = lstm_trainer if best_model_name == 'LSTM' else rnn_trainer

print(f"使用最佳模型 ({best_model_name}) 进行未来预测...")

# 进行未来30步预测
future_steps = 30
future_predictions = predict_future(best_trainer, X_test[-1:], future_steps)

print(f"完成未来 {future_steps} 步预测")

In [None]:
# 可视化未来预测结果
future_original = processor.inverse_transform(future_predictions)

plt.figure(figsize=(15, 8))

# 显示最后100个测试点和未来预测
n_show = min(100, len(y_test_original))

# 测试集数据
test_x = range(len(y_test_original) - n_show, len(y_test_original))
test_y = y_test_original[-n_show:].flatten()

# 未来预测数据
future_x = range(len(y_test_original), len(y_test_original) + future_steps)
future_y = future_original.flatten()

plt.plot(test_x, test_y, label='历史数据', color='blue', linewidth=2)
plt.plot(future_x, future_y, label=f'未来预测 ({future_steps}步)', 
         color='red', linewidth=2, linestyle='--')

# 添加分界线
plt.axvline(x=len(y_test_original)-1, color='green', linestyle=':', 
            linewidth=2, label='预测起点')

plt.title('时间序列未来预测', fontsize=16)
plt.xlabel('时间步')
plt.ylabel('数值')
plt.legend()
plt.grid(True, alpha=0.3)

# 添加预测区域阴影
plt.fill_between(future_x, future_y, alpha=0.3, color='red')

plt.tight_layout()
plt.show()

print(f"\n未来 {future_steps} 步预测统计:")
print(f"  预测均值: {np.mean(future_original):.4f}")
print(f"  预测标准差: {np.std(future_original):.4f}")
print(f"  预测范围: [{np.min(future_original):.4f}, {np.max(future_original):.4f}]")

## 总结

通过本演示，我们完成了：

1. **数据处理**: 生成时间序列数据，进行归一化和序列化处理
2. **模型训练**: 训练了Simple RNN和LSTM两种模型
3. **模型评估**: 使用多种指标评估模型性能
4. **预测可视化**: 展示了预测结果和未来预测

### 实验建议

你可以尝试：
- 调整模型参数（隐藏层大小、层数等）
- 修改序列长度
- 使用不同的数据集
- 添加更多的特征工程
- 尝试其他RNN变体（如GRU）