In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from os import listdir
from keras.preprocessing import sequence
import tensorflow as tf
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LSTM
from keras.optimizers import Adam
from keras.models import load_model
from keras.callbacks import ModelCheckpoint

Using TensorFlow backend.
  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])
  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])


#  Example代码分析
## (1)由几个部分构成：三部分(读取和理解数据、数据预处理、构建时间序列分类模型)
## (2)功能是什么
## (3)输入和输出都是什么(类型和格式)
## (4)输入数据可以用什么工具生成

## 第一部分:读取和理解数据
### 功能：读取数据，初步理解要处理的数据
### 输入：.csv文件，pandas读取csv文件得到dataframe类型的数据
### 输出：list，将pandas读取的数据存储在list中，输出为list

In [2]:
df1 = pd.read_csv('../data/MovementAAL/dataset/MovementAAL_RSS_1.csv')
df2 = pd.read_csv('../data/MovementAAL/dataset/MovementAAL_RSS_2.csv')

In [3]:
df1.head;

In [4]:
df2.head;

In [5]:
df1.shape, df2.shape;

In [6]:
#在构建模型之前，我们必须处理这些不同的长度。现在，我们将使用以下代码块读取传感器中的值并将其存储在列表中
path = '../data/MovementAAL/dataset/MovementAAL_RSS_'
sequences = list()
for i in range(1,315):
    file_path = path + str(i) + '.csv'
    #print(file_path)
    df = pd.read_csv(file_path, header=0)
    values = df.values
    #print(values)
    sequences.append(values)
targets = pd.read_csv('../data/MovementAAL/dataset/MovementAAL_target.csv')
targets = targets.values[:,1]

In [7]:
#我们现在有一个列表“序列”，其中包含来自运动传感器的数据和“目标”，
#其中包含csv文件的标签。当我们打印序列[0]时，从第一个csv文件中获取传感器的值：
sequences[0];

In [8]:
#如前所述，数据集是在三对不同的房间中收集的——因此有三组。
#此信息可用于将数据集划分为训练集、测试集和验证集。我们现在将加载DatasetGroup csv文件：
groups = pd.read_csv('../data/MovementAAL/groups/MovementAAL_DatasetGroup.csv', header=0)
groups = groups.values[:,1]

## 第二部分: 数据预处理
### 功能：划分数据集，并处理不同长度的数据
### (1)我们将前两组的数据用于培训目的，第三组用于测试。
### (2)处理不同长度的数据：
#### 由于时间序列数据的长度不同，我们无法直接在此数据集上构建模型。那么怎样才能决定一个系列的理想长度呢？我们可以通过多种方式处理它，这里有一些想法：
#### · 用零填充较短的序列，使所有序列的长度相等。在这种情况下，我们将向模型提供不正确的数据。
#### · 查找序列的最大长度，并使用最后一行中的数据填充序列。
#### · 确定数据集中序列的最小长度，并将所有其他序列截断为该长度。但是，这将导致数据的巨大损失。
#### · 取所有长度的平均值，截断较长的系列，并填充比平均长度短的序列。

### 输入：list
### 输出：list

In [9]:
#让我们找出最小长度、最大长度和平均长度：
len_sequences = list()
for one_seq in sequences:
    len_sequences.append(len(one_seq))
pd.Series(len_sequences).describe();

In [10]:
#Padding the sequence with the values in last row to max length
to_pad = 129
new_seq = list()
for one_seq in sequences:
    len_one_seq = len(one_seq)
    last_val = one_seq[-1]
    n = to_pad - len_one_seq
    to_concat = np.repeat(one_seq[-1], n).reshape(4, n).transpose()
    new_one_seq = np.concatenate([one_seq, to_concat])
    new_seq.append(new_one_seq)
final_seq = np.stack(new_seq)

#truncate the sequence to length 60
from keras.preprocessing import sequence
seq_len = 60
final_seq=sequence.pad_sequences(final_seq, maxlen=seq_len, padding='post', dtype='float', truncating='post')

In [11]:
#既然数据集已准备好，我们将根据组将其分开。准备训练、验证和测试集
train = [final_seq[i] for i in range(len(groups)) if (groups[i]==2)]
validation = [final_seq[i] for i in range(len(groups)) if groups[i]==1]
test = [final_seq[i] for i in range(len(groups)) if groups[i]==3]
train_target = [targets[i] for i in range(len(groups)) if (groups[i]==2)]
validation_target = [targets[i] for i in range(len(groups)) if groups[i]==1]
test_target = [targets[i] for i in range(len(groups)) if groups[i]==3]
train = np.array(train)
validation = np.array(validation)
test = np.array(test)
train_target = np.array(train_target)
train_target = (train_target+1)/2
validation_target = np.array(validation_target)
validation_target = (validation_target+1)/2
test_target = np.array(test_target)
test_target = (test_target+1)/2

## 第三部分: 构建模型
### 功能：构建时间序列分类模型
### 输入：.csv文件，pandas读取csv文件得到dataframe类型的数据
### 输出：list，将pandas读取的数据存储在list中，输出为list

In [12]:
#构建时间序列分类模型
#我们准备了用于LSTM（长短期记忆）模型的数据。我们处理了可变长度序列并创建了训练、验证和测试集。构建一个单层LSTM网络。
model = Sequential()
model.add(LSTM(256, input_shape=(seq_len, 4)))
model.add(Dense(1, activation='sigmoid'))
model.summary();

W0802 11:37:19.029883 140342617003840 deprecation_wrapper.py:119] From /home/niuyuxia/Software/anaconda3/envs/py37/lib/python3.7/site-packages/keras/backend/tensorflow_backend.py:74: The name tf.get_default_graph is deprecated. Please use tf.compat.v1.get_default_graph instead.

W0802 11:37:19.041346 140342617003840 deprecation_wrapper.py:119] From /home/niuyuxia/Software/anaconda3/envs/py37/lib/python3.7/site-packages/keras/backend/tensorflow_backend.py:517: The name tf.placeholder is deprecated. Please use tf.compat.v1.placeholder instead.

W0802 11:37:19.043571 140342617003840 deprecation_wrapper.py:119] From /home/niuyuxia/Software/anaconda3/envs/py37/lib/python3.7/site-packages/keras/backend/tensorflow_backend.py:4138: The name tf.random_uniform is deprecated. Please use tf.random.uniform instead.



_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm_1 (LSTM)                (None, 256)               267264    
_________________________________________________________________
dense_1 (Dense)              (None, 1)                 257       
Total params: 267,521
Trainable params: 267,521
Non-trainable params: 0
_________________________________________________________________


In [13]:
adam = Adam(lr=0.001)
chk = ModelCheckpoint('best_model.pkl', monitor='val_acc', save_best_only=True, mode='max', verbose=1)
model.compile(loss='binary_crossentropy', optimizer=adam, metrics=['accuracy'])
model.fit(train, train_target, epochs=200, batch_size=128, callbacks=[chk], validation_data=(validation,validation_target))

W0802 11:37:19.424056 140342617003840 deprecation_wrapper.py:119] From /home/niuyuxia/Software/anaconda3/envs/py37/lib/python3.7/site-packages/keras/optimizers.py:790: The name tf.train.Optimizer is deprecated. Please use tf.compat.v1.train.Optimizer instead.

W0802 11:37:19.430152 140342617003840 deprecation_wrapper.py:119] From /home/niuyuxia/Software/anaconda3/envs/py37/lib/python3.7/site-packages/keras/backend/tensorflow_backend.py:3376: The name tf.log is deprecated. Please use tf.math.log instead.

W0802 11:37:19.435863 140342617003840 deprecation.py:323] From /home/niuyuxia/Software/anaconda3/envs/py37/lib/python3.7/site-packages/tensorflow/python/ops/nn_impl.py:180: add_dispatch_support.<locals>.wrapper (from tensorflow.python.ops.array_ops) is deprecated and will be removed in a future version.
Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where
W0802 11:37:19.917254 140342617003840 deprecation_wrapper.py:119] From /home/niuyuxia/Softw

Train on 106 samples, validate on 104 samples
Epoch 1/200

Epoch 00001: val_acc improved from -inf to 0.61538, saving model to best_model.pkl
Epoch 2/200

Epoch 00002: val_acc did not improve from 0.61538
Epoch 3/200

Epoch 00003: val_acc did not improve from 0.61538
Epoch 4/200

Epoch 00004: val_acc did not improve from 0.61538
Epoch 5/200

Epoch 00005: val_acc did not improve from 0.61538
Epoch 6/200

Epoch 00006: val_acc did not improve from 0.61538
Epoch 7/200

Epoch 00007: val_acc did not improve from 0.61538
Epoch 8/200

Epoch 00008: val_acc did not improve from 0.61538
Epoch 9/200

Epoch 00009: val_acc did not improve from 0.61538
Epoch 10/200

Epoch 00010: val_acc did not improve from 0.61538
Epoch 11/200

Epoch 00011: val_acc did not improve from 0.61538
Epoch 12/200

Epoch 00012: val_acc did not improve from 0.61538
Epoch 13/200

Epoch 00013: val_acc did not improve from 0.61538
Epoch 14/200

Epoch 00014: val_acc did not improve from 0.61538
Epoch 15/200

Epoch 00015: val_acc


Epoch 00045: val_acc did not improve from 0.61538
Epoch 46/200

Epoch 00046: val_acc did not improve from 0.61538
Epoch 47/200

Epoch 00047: val_acc did not improve from 0.61538
Epoch 48/200

Epoch 00048: val_acc did not improve from 0.61538
Epoch 49/200

Epoch 00049: val_acc did not improve from 0.61538
Epoch 50/200

Epoch 00050: val_acc did not improve from 0.61538
Epoch 51/200

Epoch 00051: val_acc did not improve from 0.61538
Epoch 52/200

Epoch 00052: val_acc did not improve from 0.61538
Epoch 53/200

Epoch 00053: val_acc did not improve from 0.61538
Epoch 54/200

Epoch 00054: val_acc did not improve from 0.61538
Epoch 55/200

Epoch 00055: val_acc did not improve from 0.61538
Epoch 56/200

Epoch 00056: val_acc did not improve from 0.61538
Epoch 57/200

Epoch 00057: val_acc did not improve from 0.61538
Epoch 58/200

Epoch 00058: val_acc did not improve from 0.61538
Epoch 59/200

Epoch 00059: val_acc did not improve from 0.61538
Epoch 60/200

Epoch 00060: val_acc did not improve fr


Epoch 00088: val_acc did not improve from 0.73077
Epoch 89/200

Epoch 00089: val_acc did not improve from 0.73077
Epoch 90/200

Epoch 00090: val_acc did not improve from 0.73077
Epoch 91/200

Epoch 00091: val_acc did not improve from 0.73077
Epoch 92/200

Epoch 00092: val_acc did not improve from 0.73077
Epoch 93/200

Epoch 00093: val_acc did not improve from 0.73077
Epoch 94/200

Epoch 00094: val_acc did not improve from 0.73077
Epoch 95/200

Epoch 00095: val_acc did not improve from 0.73077
Epoch 96/200

Epoch 00096: val_acc did not improve from 0.73077
Epoch 97/200

Epoch 00097: val_acc did not improve from 0.73077
Epoch 98/200

Epoch 00098: val_acc did not improve from 0.73077
Epoch 99/200

Epoch 00099: val_acc did not improve from 0.73077
Epoch 100/200

Epoch 00100: val_acc did not improve from 0.73077
Epoch 101/200

Epoch 00101: val_acc did not improve from 0.73077
Epoch 102/200

Epoch 00102: val_acc did not improve from 0.73077
Epoch 103/200

Epoch 00103: val_acc did not improv


Epoch 00133: val_acc did not improve from 0.73077
Epoch 134/200

Epoch 00134: val_acc did not improve from 0.73077
Epoch 135/200

Epoch 00135: val_acc did not improve from 0.73077
Epoch 136/200

Epoch 00136: val_acc did not improve from 0.73077
Epoch 137/200

Epoch 00137: val_acc did not improve from 0.73077
Epoch 138/200

Epoch 00138: val_acc did not improve from 0.73077
Epoch 139/200

Epoch 00139: val_acc did not improve from 0.73077
Epoch 140/200

Epoch 00140: val_acc did not improve from 0.73077
Epoch 141/200

Epoch 00141: val_acc did not improve from 0.73077
Epoch 142/200

Epoch 00142: val_acc did not improve from 0.73077
Epoch 143/200

Epoch 00143: val_acc did not improve from 0.73077
Epoch 144/200

Epoch 00144: val_acc did not improve from 0.73077
Epoch 145/200

Epoch 00145: val_acc did not improve from 0.73077
Epoch 146/200

Epoch 00146: val_acc did not improve from 0.73077
Epoch 147/200

Epoch 00147: val_acc did not improve from 0.73077
Epoch 148/200

Epoch 00148: val_acc did


Epoch 00177: val_acc did not improve from 0.73077
Epoch 178/200

Epoch 00178: val_acc did not improve from 0.73077
Epoch 179/200

Epoch 00179: val_acc did not improve from 0.73077
Epoch 180/200

Epoch 00180: val_acc did not improve from 0.73077
Epoch 181/200

Epoch 00181: val_acc did not improve from 0.73077
Epoch 182/200

Epoch 00182: val_acc did not improve from 0.73077
Epoch 183/200

Epoch 00183: val_acc did not improve from 0.73077
Epoch 184/200

Epoch 00184: val_acc did not improve from 0.73077
Epoch 185/200

Epoch 00185: val_acc did not improve from 0.73077
Epoch 186/200

Epoch 00186: val_acc did not improve from 0.73077
Epoch 187/200

Epoch 00187: val_acc did not improve from 0.73077
Epoch 188/200

Epoch 00188: val_acc did not improve from 0.73077
Epoch 189/200

Epoch 00189: val_acc did not improve from 0.73077
Epoch 190/200

Epoch 00190: val_acc did not improve from 0.73077
Epoch 191/200

Epoch 00191: val_acc did not improve from 0.73077
Epoch 192/200

Epoch 00192: val_acc did

<keras.callbacks.History at 0x7fa4084e3e10>

In [14]:
#loading the model and checking accuracy on the test data
model = load_model('best_model.pkl')

from sklearn.metrics import accuracy_score
test_preds = model.predict_classes(test)

accuracy_score(test_target, test_preds)

0.6826923076923077