In [1]:
from sklearn.preprocessing import MinMaxScaler
import numpy as np
import matplotlib.pyplot as plt

# 設定參數
SEQ_LEN = 30
FUTURE_PERIOD_PREDICT = 1
RATIO_TO_PREDICT = "2330.TW"

# 獲取所有數據並進行預處理
all_data = avg_values[RATIO_TO_PREDICT].values.reshape(-1, 1)

# 使用MinMaxScaler進行整體歸一化
scaler = MinMaxScaler()
all_data_scaled = scaler.fit_transform(all_data)

# 應用EMA平滑處理到整個數據集
def apply_ema(data, gamma=0.1):
    ema = 0.0
    smoothed_data = np.zeros_like(data)
    for i in range(len(data)):
        ema = gamma * data[i] + (1 - gamma) * ema
        smoothed_data[i] = ema
    return smoothed_data

all_data_smoothed = apply_ema(all_data_scaled)

# 計算分割點
total_len = len(all_data_smoothed)
train_len = int(0.8 * total_len)
valid_len = int(0.1 * total_len)
test_len = total_len - train_len - valid_len

# 創建序列數據的函數
def create_sequences(data, seq_length, future_period):
    X, y = [], []
    for i in range(len(data) - seq_length - future_period + 1):
        X.append(data[i:(i + seq_length)])
        y.append(data[i + seq_length + future_period - 1])
    return np.array(X), np.array(y)

# 創建完整的序列數據
X_all, y_all = create_sequences(all_data_smoothed, SEQ_LEN, FUTURE_PERIOD_PREDICT)

# 分割序列數據
train_end = train_len - SEQ_LEN
val_end = train_len + valid_len - SEQ_LEN

X_train = X_all[:train_end]
y_train = y_all[:train_end]

X_valid = X_all[train_end:val_end]
y_valid = y_all[train_end:val_end]

X_test = X_all[val_end:]
y_test = y_all[val_end:]

# 重塑數據
if len(X_train) > 0:
    X_train = X_train.reshape((X_train.shape[0], X_train.shape[1], 1))
if len(X_valid) > 0:
    X_valid = X_valid.reshape((X_valid.shape[0], X_valid.shape[1], 1))
if len(X_test) > 0:
    X_test = X_test.reshape((X_test.shape[0], X_test.shape[1], 1))

# 畫出預測圖表
plt.figure(figsize=(15, 5))

# 繪製訓練數據
if len(y_train) > 0:
    plt.plot(range(len(y_train)), y_train, 
             color='blue', label='Train', alpha=0.7)

# 繪製驗證數據
if len(y_valid) > 0:
    plt.plot(range(len(y_train), len(y_train) + len(y_valid)), 
             y_valid, color='gray', label='Valid', alpha=0.7)

# 繪製測試數據
if len(y_test) > 0:
    plt.plot(range(len(y_train) + len(y_valid), 
                   len(y_train) + len(y_valid) + len(y_test)),
             y_test, color='black', label='Test', alpha=0.7)

plt.title('Data')
plt.xlabel('Time(days)')
plt.ylabel('price')
plt.legend(loc='best')
plt.grid(True)
plt.show()

# 打印數據集大小
print(f"總數據量: {len(all_data_smoothed)}")
print(f"訓練集大小: {len(y_train)}")
print(f"驗證集大小: {len(y_valid)}")
print(f"測試集大小: {len(y_test)}")

NameError: name 'avg_values' is not defined