In [62]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import os 
import pickle as pkl
from tensorflow.keras.optimizers import Adam

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import r2_score

import tensorflow as tf
from tensorflow.keras import Sequential,layers,utils,losses
from tensorflow.keras.callbacks import ModelCheckpoint,TensorBoard

import warnings
warnings.filterwarnings("ignore")

In [63]:
with open("features.pkl","rb") as f:
    x=pkl.load(f)
with open("labels.pkl","rb") as f:
    y=pkl.load(f)
x.shape,y.shape

((4168, 5, 34), (4168,))

In [64]:
#归一化预处理
x_back=np.copy(x)
# print(x_back)
for i in range(x.shape[0]):
    for j in range(x.shape[1]):
        min_v=min(x_back[i,j,:])
        max_v=max(x_back[i,j,:])
        if min_v==max_v:
            continue 
        else:
            x_back[i,j,:]=(x_back[i,j,:] - min_v) / (max_v - min_v)
        scaler=MinMaxScaler()
x=x_back

In [65]:
x_train,x_test,y_train,y_test=train_test_split(x,y,test_size=0.15,shuffle=True,random_state=2)


x_train.shape,x_test.shape,y_train.shape,y_test.shape

((3542, 5, 34), (626, 5, 34), (3542,), (626,))

In [66]:
#构造批数据

def create_batch_dataset(x,y,train=True,buffer_size=1000,batch_size=128):
    batch_data=tf.data.Dataset.from_tensor_slices((tf.constant(x),tf.constant(y)))
    if train:
        return batch_data.batch(batch_size)
    else:
        return batch_data.batch(batch_size)

In [67]:
#创建训练和测试数据集
train_batch_dataset=create_batch_dataset(x_train,y_train)
test_batch_dataset=create_batch_dataset(x_test,y_test,train=False)
list(test_batch_dataset.as_numpy_iterator())[0]

(array([[[0.        , 0.        , 0.        , ..., 1.        ,
          0.71897473, 0.97838324],
         [0.        , 0.        , 0.        , ..., 1.        ,
          0.7129626 , 0.97606822],
         [0.        , 0.        , 0.        , ..., 1.        ,
          0.7042306 , 0.97254341],
         [0.        , 0.        , 0.        , ..., 1.        ,
          0.70694403, 0.96163612],
         [0.        , 0.        , 0.        , ..., 1.        ,
          0.7099712 , 0.98045957]],
 
        [[0.        , 0.        , 0.        , ..., 0.        ,
          0.        , 0.        ],
         [0.        , 0.        , 0.        , ..., 0.        ,
          0.        , 0.        ],
         [0.89644371, 0.33480047, 0.        , ..., 0.        ,
          0.        , 0.        ],
         [0.90073929, 0.33106197, 0.        , ..., 0.        ,
          0.        , 0.        ],
         [0.        , 0.        , 0.        , ..., 0.        ,
          0.        , 0.        ]],
 
        [[0.88

In [68]:
#模型搭建
model = Sequential([
    layers.LSTM(units=256,input_shape=(5,34),return_sequences=True),
    layers.Dropout(0.2),
    layers.LSTM(units=256,return_sequences=True),
    layers.Dropout(0.3),
    layers.LSTM(units=128,return_sequences=True),
    layers.LSTM(units=32),
    layers.Dense(1)
])
#模型编译

model.compile(optimizer="adam",loss='mse')

#权重保存
# checkpoint_file='best_model.hdf5'
checkpoint_file='best_model_drop.weights.h5'
checkpoint_callback =ModelCheckpoint(
    filepath=checkpoint_file,
    monitor='loss',
    mode='min',
    save_best_only=True,
    save_weights_only=True
)

In [69]:
#模型训练
history=model.fit(train_batch_dataset,
                  epochs=100,
                  validation_data=test_batch_dataset,
                  callbacks=[checkpoint_callback]
                 )

Epoch 1/100
[1m28/28[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 81ms/step - loss: 0.2195 - val_loss: 0.1603
Epoch 2/100
[1m28/28[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 45ms/step - loss: 0.1515 - val_loss: 0.1279
Epoch 3/100
[1m28/28[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 52ms/step - loss: 0.1284 - val_loss: 0.0990
Epoch 4/100
[1m28/28[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 55ms/step - loss: 0.1067 - val_loss: 0.0894
Epoch 5/100
[1m28/28[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 57ms/step - loss: 0.0836 - val_loss: 0.0815
Epoch 6/100
[1m28/28[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 53ms/step - loss: 0.0826 - val_loss: 0.0764
Epoch 7/100
[1m28/28[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 56ms/step - loss: 0.0672 - val_loss: 0.0669
Epoch 8/100
[1m28/28[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 51ms/step - loss: 0.0662 - val_loss: 0.0698
Epoch 9/100
[1m28/28[0m [32m━━━━━━━━━

[1m28/28[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 55ms/step - loss: 0.0135 - val_loss: 0.0286
Epoch 69/100
[1m28/28[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 57ms/step - loss: 0.0110 - val_loss: 0.0230
Epoch 70/100
[1m28/28[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 55ms/step - loss: 0.0073 - val_loss: 0.0221
Epoch 71/100
[1m28/28[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 56ms/step - loss: 0.0075 - val_loss: 0.0180
Epoch 72/100
[1m28/28[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 57ms/step - loss: 0.0055 - val_loss: 0.0237
Epoch 73/100
[1m28/28[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 55ms/step - loss: 0.0063 - val_loss: 0.0234
Epoch 74/100
[1m28/28[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 55ms/step - loss: 0.0104 - val_loss: 0.0254
Epoch 75/100
[1m28/28[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 58ms/step - loss: 0.0084 - val_loss: 0.0169
Epoch 76/100
[1m28/28[0m [32m━━━━━━━━━━━━━

In [70]:
test_pred=model.predict(x_test)
test_pred=test_pred[:,0]
score=r2_score(y_test,test_pred)
score

[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 42ms/step


0.8980221964238331

In [104]:
pred_copy=np.copy(test_pred)

#假设阈值进行判断
threshold=0.5
condition = test_copy > threshold
pred_copy[condition] = 1
pred_copy[~condition] = 0

tp=0
tn=0
fn=0
fp=0

#将fall认为正例
for i in range(pred_copy.size):
    if pred_copy[i]==1 and y_test[i]==1:
        tp+=1
    elif pred_copy[i]==0 and y_test[i]==0:
        tn+=1
    elif pred_copy[i]==1 and y_test[i]==0:
        fp+=1
    elif pred_copy[i]==0 and y_test[i]==1:
        fn+=1

zong=pred_copy.size
true_count=np.sum(pred_copy==y_test)
tp=tp/zong
tn=tn/zong
fn=fn/zong
fp=fp/zong
accuracy = true_count / test_copy.size
accuracy,tp,tn,fp,fn

(0.9712460063897763,
 0.29233226837060705,
 0.6789137380191693,
 0.01437699680511182,
 0.01437699680511182)