In [4]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import GRU, Dense

# 生成示例时间序列数据
np.random.seed(0)
num_samples = 100
num_features = 5
sequence_length = 10

data = np.random.randn(num_samples, num_features)
df = pd.DataFrame(data, columns=[f'feature_{i}' for i in range(num_features)])

df.head()

Unnamed: 0,feature_0,feature_1,feature_2,feature_3,feature_4
0,1.764052,0.400157,0.978738,2.240893,1.867558
1,-0.977278,0.950088,-0.151357,-0.103219,0.410599
2,0.144044,1.454274,0.761038,0.121675,0.443863
3,0.333674,1.494079,-0.205158,0.313068,-0.854096
4,-2.55299,0.653619,0.864436,-0.742165,2.269755


In [12]:
# 归一化数据
scaler = MinMaxScaler()
scaled_data = scaler.fit_transform(df)

# 构建训练集和测试集
train_size = int(0.8 * num_samples)
train_data = scaled_data[:train_size]
test_data = scaled_data[train_size:]
train_data

array([[0.87206156, 0.67474814, 0.73725507, 0.99595074, 0.84526522],
       [0.33115965, 0.79170191, 0.51318961, 0.48052667, 0.57321075],
       [0.55241169, 0.89892685, 0.69409135, 0.5299764 , 0.57942219],
       [0.58982843, 0.9073923 , 0.5025224 , 0.57205989, 0.3370575 ],
       [0.02025016, 0.72865172, 0.71459231, 0.340035  , 0.92036641],
       [0.23702371, 0.59937824, 0.50608621, 0.84025042, 0.77091046],
       [0.55456317, 0.67007053, 0.36717695, 0.06768434, 0.43157592],
       [0.55483972, 0.85129247, 0.78159684, 0.41805693, 0.44009245],
       [0.3170961 , 0.28765183, 0.20489496, 0.93215955, 0.4013746 ],
       [0.43755188, 0.32321503, 0.69735344, 0.14835812, 0.45681623],
       [0.34730214, 0.67192926, 0.44192139, 0.2436247 , 0.4912783 ],
       [0.60850566, 0.60379299, 0.6031709 , 0.36374753, 0.42880694],
       [0.3913043 , 0.51318068, 0.38197582, 0.12364693, 0.52967105],
       [0.44471306, 0.2429528 , 0.63495585, 0.30372543, 0.50624034],
       [0.66784946, 0.61707755, 0.

In [13]:
def create_sequences(data, sequence_length):
    X, y = [], []
    for i in range(len(data) - sequence_length):
        X.append(data[i:i+sequence_length])
        y.append(data[i+sequence_length])
    return np.array(X), np.array(y)
train_X, train_y = create_sequences(train_data, sequence_length)
test_X, test_y = create_sequences(test_data, sequence_length)

In [14]:
train_X[0]

array([[0.87206156, 0.67474814, 0.73725507, 0.99595074, 0.84526522],
       [0.33115965, 0.79170191, 0.51318961, 0.48052667, 0.57321075],
       [0.55241169, 0.89892685, 0.69409135, 0.5299764 , 0.57942219],
       [0.58982843, 0.9073923 , 0.5025224 , 0.57205989, 0.3370575 ],
       [0.02025016, 0.72865172, 0.71459231, 0.340035  , 0.92036641],
       [0.23702371, 0.59937824, 0.50608621, 0.84025042, 0.77091046],
       [0.55456317, 0.67007053, 0.36717695, 0.06768434, 0.43157592],
       [0.55483972, 0.85129247, 0.78159684, 0.41805693, 0.44009245],
       [0.3170961 , 0.28765183, 0.20489496, 0.93215955, 0.4013746 ],
       [0.43755188, 0.32321503, 0.69735344, 0.14835812, 0.45681623]])

In [17]:
train_y[0]

array([0.34730214, 0.67192926, 0.44192139, 0.2436247 , 0.4912783 ])

In [None]:


# 创建GRU模型
model = Sequential([
    GRU(64, activation='relu', input_shape=(sequence_length, num_features), return_sequences=True),
    GRU(64, activation='relu', return_sequences=False),
    Dense(num_features)  # 输出层
])

# 编译模型
model.compile(loss='mean_squared_error', optimizer='adam')

# 训练模型
epochs = 50
batch_size = 32
model.fit(train_X, train_y, epochs=epochs, batch_size=batch_size, validation_split=0.1)

# 连续预测未来一个月的值
num_days_to_predict = 30
recent_features = test_X[-1]  # 最近一个已知的特征序列作为初始输入

predicted_values = []
for _ in range(num_days_to_predict):
    predicted_value = model.predict(np.array([recent_features]))[0]
    predicted_values.append(predicted_value)
    recent_features = np.concatenate((recent_features[1:], [predicted_value]))

# 反归一化预测结果
predicted_values_original = scaler.inverse_transform(predicted_values)

# 打印连续预测的未来一个月的值
print("Continuous Predictions for the Next Month:")
print(predicted_values_original)

In [26]:
import pandas as pd
import numpy as np

# 创建一个示例时间序列，你需要用你的实际数据替代这里的示例数据
data = {
    'Date': pd.date_range(start='2023-08-01', end='2023-08-30'),
    'Value': [10,10, 12, 13, 14, 15, 16, 20, 22, 23, 24, 30, 35, 38,
              20, 55, 60, 70, 75, 80, 90, 95, 100, 120, 125, 130, 140, 145, 150, 200]
}

df = pd.DataFrame(data)

# 确定15号的索引位置
index_15th = df[df['Date'] == '2023-08-15'].index[0]

# 计算15线和85线
values_before_15th = df.loc[index_15th - 6:index_15th - 1, 'Value']
percentile_15 = np.percentile(values_before_15th, 15)
percentile_85 = np.percentile(values_before_15th, 85)

# 更新15号以后的值
for i in range(index_15th, len(df)):
    if df.at[i, 'Value'] < percentile_15:
        df.at[i, 'Value'] = percentile_15
    elif df.at[i, 'Value'] > percentile_85:
        df.at[i, 'Value'] = percentile_85

print(df)


         Date  Value
0  2023-08-01  10.00
1  2023-08-02  10.00
2  2023-08-03  12.00
3  2023-08-04  13.00
4  2023-08-05  14.00
5  2023-08-06  15.00
6  2023-08-07  16.00
7  2023-08-08  20.00
8  2023-08-09  22.00
9  2023-08-10  23.00
10 2023-08-11  24.00
11 2023-08-12  30.00
12 2023-08-13  35.00
13 2023-08-14  38.00
14 2023-08-15  22.75
15 2023-08-16  35.75
16 2023-08-17  35.75
17 2023-08-18  35.75
18 2023-08-19  35.75
19 2023-08-20  35.75
20 2023-08-21  35.75
21 2023-08-22  35.75
22 2023-08-23  35.75
23 2023-08-24  35.75
24 2023-08-25  35.75
25 2023-08-26  35.75
26 2023-08-27  35.75
27 2023-08-28  35.75
28 2023-08-29  35.75
29 2023-08-30  35.75


In [23]:
values_before_15th

8     22
9     23
10    24
11    30
12    35
13    38
Name: Value, dtype: int64

In [24]:
percentile_15

22.75

In [25]:
percentile_85

35.75

In [17]:
print(lower_bound,upper_bound)

-18.466185312619388 18.466185312619388
