In [None]:
"""
数据预处理
使用数据工程方法，将比特币价格数据归一化为近似周期性的数据。
"""

In [5]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from plotly.subplots import make_subplots
import plotly.graph_objects as go
from datetime import timedelta

# -------------------------------
# 1. 加载数据
# 假设CSV文件 'btc_prices.csv' 中包含 Date 和 Close 两列
df = pd.read_csv('btc_prices.csv', parse_dates=['open_date'], index_col='open_date')
df = df.sort_index()  # 确保按日期排序

# -------------------------------
# 2. 对价格取对数（缓解异方差性）
df['Log_Close'] = np.log(df['open_price'])

# -------------------------------
# 3. 去趋势处理
# 使用30天移动平均来估计趋势，得到“去趋势”后的数据
window = 30
df['Moving_Avg'] = df['Log_Close'].rolling(window=window, min_periods=1).mean()
df['Detrended'] = df['Log_Close'] - df['Moving_Avg']

# -------------------------------
# 4. 归一化处理
# 方法1：Z-score 标准化
scaler_z = StandardScaler()
df['Zscore'] = scaler_z.fit_transform(df[['Detrended']])

# 方法2：Min-Max 归一化到 [0,1]
scaler_mm = MinMaxScaler(feature_range=(0, 1))
df['MinMax'] = scaler_mm.fit_transform(df[['Detrended']])

# -------------------------------
# 5. 使用 Plotly 绘制图形
# 创建两个子图：上图显示 Log_Close 与趋势（Moving_Avg），下图显示去趋势数据及归一化结果
fig = make_subplots(rows=2, cols=1, shared_xaxes=True,
                    subplot_titles=["Log Price and 30d Moving Average", "Detrended & Normalized Data"],
                    vertical_spacing=0.1)

# 子图1：Log价格与趋势
fig.add_trace(go.Scatter(x=df.index, y=df['Log_Close'], mode='lines',
                         name='Log(Close)', line=dict(color='black')), row=1, col=1)
fig.add_trace(go.Scatter(x=df.index, y=df['Moving_Avg'], mode='lines',
                         name='30d Moving Avg', line=dict(dash='dash', color='blue')), row=1, col=1)

# 子图2：去趋势数据及归一化结果
fig.add_trace(go.Scatter(x=df.index, y=df['Detrended'], mode='lines',
                         name='Detrended Log Price', line=dict(color='purple')), row=2, col=1)
fig.add_trace(go.Scatter(x=df.index, y=df['Zscore'], mode='lines',
                         name='Z-score Normalized', line=dict(color='green')), row=2, col=1)
fig.add_trace(go.Scatter(x=df.index, y=df['MinMax'], mode='lines',
                         name='Min-Max Normalized', line=dict(color='red')), row=2, col=1)

fig.update_layout(title_text="BTC Price Data: Normalization & Detrending",
                  height=800, xaxis2_title="Date", yaxis_title="Value")
fig.show()

In [4]:

# -------------------------------
# 6. 逆归一化示例
# 假设经过后续预测得到 Min-Max 归一化后的未来数据（示例数据）
forecast_norm = np.array([0.45, 0.47, 0.50, 0.52, 0.55])
# 逆 Min-Max 归一化：将归一化值还原到去趋势数据尺度
forecast_detrended = scaler_mm.inverse_transform(forecast_norm.reshape(-1, 1)).flatten()

# 逆去趋势：将预测的去趋势数据加上最后一天的移动平均（简单假设未来趋势与最后值一致）
last_mavg = df['Moving_Avg'].iloc[-1]
forecast_log = forecast_detrended + last_mavg
# 逆对数转换：取指数还原为价格
forecast_price = np.exp(forecast_log)
print("预测的未来价格:", forecast_price)

# 可将预测结果与原始数据绘制在一张图中：
last_date = df.index[-1]
future_dates = [last_date + timedelta(days=i) for i in range(1, len(forecast_price) + 1)]
fig2 = go.Figure()
fig2.add_trace(go.Scatter(x=df.index, y=df['open_price'], mode='lines', name='Original Price'))
fig2.add_trace(go.Scatter(x=future_dates, y=forecast_price, mode='lines', name='Forecast Price'))
fig2.update_layout(title="BTC Price Forecast (Inverse Normalization)",
                   xaxis_title="Date", yaxis_title="Price (USD)", template="plotly_white")
fig2.show()



预测的未来价格: [ 91834.42654456  93590.92063032  96288.85463114  98130.54744775
 100959.34471458]
