## 获取数据

In [10]:
import pandas_datareader.data as web
import datetime
start = datetime.datetime(2020, 1, 1)
end = datetime.datetime(2024, 1, 1)
df = web.DataReader('GOOGL', 'stooq', start, end)
df.dropna(inplace=True)
per_days = 10
df['label'] = df['Close'].shift(-per_days)
print(df)

                Open      High       Low     Close    Volume   label
Date                                                                
2023-12-29  139.6300  140.3600  138.7800  139.6900  18733017  131.94
2023-12-28  140.7800  141.1400  139.7500  140.2300  16045712  132.57
2023-12-27  141.5900  142.0800  139.8860  140.3700  19628618  132.52
2023-12-26  141.5900  142.6800  141.1900  141.5200  16780333  133.29
2023-12-22  140.7700  141.9900  140.7100  141.4900  26532199  134.99
...              ...       ...       ...       ...       ...     ...
2020-01-08   69.7410   70.5925   69.6315   70.2520  35325480     NaN
2020-01-07   70.0230   70.1750   69.5780   69.7555  34529120     NaN
2020-01-06   67.5815   69.9160   67.5500   69.8905  46786860     NaN
2020-01-03   67.4000   68.6875   67.3660   68.0760  23412580     NaN
2020-01-02   67.4205   68.4340   67.3245   68.4340  27285300     NaN

[1006 rows x 6 columns]


## 标准化
fit_transform方法首先计算选定列的均值和标准差（即拟合数据），然后将数据转换（缩放）为均值为0，标准差为1的分布。转换后的数据被赋值给变量X。

In [11]:
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
sca_X = scaler.fit_transform(df.iloc[:,:-1])
print(sca_X)

[[ 1.26518345  1.23787354  1.28886469  1.2676006  -1.05082779]
 [ 1.31089744  1.26874918  1.32770914  1.28911254 -1.22989906]
 [ 1.34309598  1.30595829  1.33315537  1.29468971 -0.99114851]
 ...
 [-1.59883732 -1.55059286 -1.56359964 -1.51299633  0.81856825]
 [-1.60605218 -1.599222   -1.57096807 -1.58528042 -0.7390004 ]
 [-1.60523727 -1.60925658 -1.57262997 -1.5710188  -0.48093781]]


## 输入输出

In [12]:
mem_his_days = 5
from collections import deque
deq = deque(maxlen=mem_his_days)

X = []
for i in sca_X:
    deq.append(list(i))
    if len(deq)==mem_his_days:
        X.append(list(deq))

X_lately = X[-per_days:]
X = X[:-per_days]
print(len(X))
print(len(X_lately))

992
10


In [13]:
y = df['label'].values[mem_his_days-1:-per_days]
print(len(y))

992


In [14]:
import numpy as np
X = np.array(X)
y = np.array(y)
print(X.shape)
print(y.shape)

(992, 5, 5)
(992,)


## 构建网络

In [15]:
from sklearn.model_selection import train_test_split
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.1)

In [16]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM,Dense,Dropout
model = Sequential()
model.add(LSTM(10,input_shape=X.shape[1:],activation='relu',return_sequences=True))
model.add(Dropout(0.1))
model.add(LSTM(10,activation='relu',return_sequences=True))
model.add(Dropout(0.1))
model.add(LSTM(10,activation='relu'))
model.add(Dropout(0.1))
model.add(Dense(10,activation='relu'))
model.add(Dropout(0.1))
model.add(Dense(1))
model.summary()

Model: "sequential_3"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm_6 (LSTM)                (None, 5, 10)             640       
_________________________________________________________________
dropout_8 (Dropout)          (None, 5, 10)             0         
_________________________________________________________________
lstm_7 (LSTM)                (None, 5, 10)             840       
_________________________________________________________________
dropout_9 (Dropout)          (None, 5, 10)             0         
_________________________________________________________________
lstm_8 (LSTM)                (None, 10)                840       
_________________________________________________________________
dropout_10 (Dropout)         (None, 10)                0         
_________________________________________________________________
dense_4 (Dense)              (None, 10)               

In [17]:
model.compile(optimizer='adam',loss='mse',metrics=['mape'])
model.fit(X_train,y_train,batch_size=32,epochs=50,validation_data=(X_test,y_test))

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


<tensorflow.python.keras.callbacks.History at 0x1be8ea31160>