In [8]:
# 所需库包
import tensorflow as tf
import numpy as np
import time
import matplotlib.pyplot as plt
%matplotlib inline

In [9]:
#定义LSTM类
class LSTMcell(object):
    def __init__(self, incoming, D_input, D_cell, initializer, f_bias=1.0):       
        # 变量
        # the shape of incoming is [n_samples, n_steps, D_cell] 输入数据
        self.incoming = incoming   # incoming是用来接收输入数据的，其形状为三维数组
        self.D_input = D_input    # 输入的维度
        self.D_cell = D_cell     # LSTM的hidden state的维度，同时也是memory cell的维度
        # 初始化参数
          # igate = W_xi.* x + W_hi.* h + b_i  输入门的 三个参数
        self.W_xi = initializer([self.D_input, self.D_cell])       
        self.W_hi = initializer([self.D_cell, self.D_cell])
        self.b_i  = tf.Variable(tf.zeros([self.D_cell]))  
          # fgate = W_xf.* x + W_hf.* h + b_f  遗忘门的 三个参数
        self.W_xf = initializer([self.D_input, self.D_cell])
        self.W_hf = initializer([self.D_cell, self.D_cell])
        self.b_f  = tf.Variable(tf.constant(f_bias, shape=[self.D_cell]))   #一维数组[1.0,1.0,....,1.0]
          # ogate = W_xo.* x + W_ho.* h + b_o  输出门的 三个参数 
        self.W_xo = initializer([self.D_input, self.D_cell]) 
        self.W_ho = initializer([self.D_cell, self.D_cell])
        self.b_o  = tf.Variable(tf.zeros([self.D_cell])) 
          # cell = W_xc.* x + W_hc.* h + b_c  计算新信息候选值（暂态）的三个参数
        self.W_xc = initializer([self.D_input, self.D_cell])
        self.W_hc = initializer([self.D_cell, self.D_cell])
        self.b_c  = tf.Variable(tf.zeros([self.D_cell]))  
        # init cell and hidden state whose shapes are [n_samples, D_cell]
        # 最初时的hidden state和memory cell的值，二者的形状都是[n_samples, D_cell]
        # 如果没有特殊指定，这里直接设成全部为0
        init_for_both = tf.matmul(self.incoming[:,0,:], tf.zeros([self.D_input, self.D_cell]))
        self.hid_init = init_for_both
        self.cell_init = init_for_both
        # because tf.scan only takes two arguments, the hidden state and cell are needed to merge  
        #所以要将hidden state和memory并在一起。 矩阵拼接
        self.previous_h_c_tuple = tf.stack([self.hid_init, self.cell_init])
        # transpose the tensor so that the first dim is time_step 
        #需要将数据由[n_samples, n_steps, D_cell]的形状变成[n_steps, n_samples, D_cell]的形状
        self.incoming = tf.transpose(self.incoming, perm=[1,0,2])
           
        #将hidden state和memory并在一起，以及将输入的形状变成[n_steps, n_samples, D_cell]是为了满足tensorflow中的scan的特点  
        
    def one_step(self, previous_h_c_tuple, current_x):   
        # current_x是当前的输入，
        # to split hidden state and cell
        # 再将hidden state和memory cell拆分开
        prev_h, prev_c = tf.unstack(previous_h_c_tuple)
       # prev_h是上一个时刻的hidden state
       # prev_c是上一个时刻的memory cell
        
        # 计算输入门
        # input gate    it=σ(Wi*[ht-1,Xt]+bi)
        i = tf.sigmoid(
            tf.matmul(current_x, self.W_xi) + 
            tf.matmul(prev_h, self.W_hi) + 
            self.b_i)
        # 计算遗忘门    ft=σ(Wf*[ht-1,Xt]+bf)
        f = tf.sigmoid(
            tf.matmul(current_x, self.W_xf) + 
            tf.matmul(prev_h, self.W_hf) + 
            self.b_f)
        # 计算输出门   ot=σ(Wo*[ht-1,Xt]+bo)
        o = tf.sigmoid(
            tf.matmul(current_x, self.W_xo) + 
            tf.matmul(prev_h, self.W_ho) + 
            self.b_o)
        # 计算新的数据来源  Ct = tanh(Wc*[ht-1,Xt]+bc)
        c = tf.tanh(
            tf.matmul(current_x, self.W_xc) + 
            tf.matmul(prev_h, self.W_hc) + 
            self.b_c)
        
        # current cell  计算当前时刻的memory cell   Ct = ft * Ct-1 + it * Ct
        current_c = f*prev_c + i*c
        # current hidden state 计算当前时刻的hidden state  ht = Ot * tanh(Ct)
        current_h = o*tf.tanh(current_c) 
        
        #再次将当前的hidden state和memory cell并在一起返回
        return tf.stack([current_h, current_c])    
    
    #将上一时刻的hidden state和memory拆开，用于计算后，所出现的新的当前时刻的hidden state和memory会再次并在一起作为该function的返回值，
    
    def all_steps(self):
        # inputs shape : [n_sample, n_steps, D_input]
        # outputs shape : [n_steps, n_sample, D_output]
      #同样是为了满足scan的特点。定义该function后，LSTM就已经完成了。
      #one_step方法会使用LSTM类中所定义的parameters与当前时刻的输入和上一时刻的hidden state与memory cell计算当前时刻的hidden state和memory cell。
    
        hstates = tf.scan(fn = self.one_step,
                          elems = self.incoming,
                          initializer = self.previous_h_c_tuple,
                          name = 'hstates')[:,0,:,:]
        return hstates
       #scan接受的fn, elems, initializer有以下要求：
       #fn：第一个输入是上一时刻的输出（需要与fn的返回值保持一致），第二个输入是当前时刻的输入。
       #elems：scan方法每一步都会沿着所要处理的tensor的第一个维进行一次一次取值，
       #所以要将数据由[n_samples, n_steps, D_cell]的形状变成[n_steps, n_samples, D_cell]的形状。
       #initializer：初始值，需要与fn的第一个输入和返回值保持一致。
       #scan的返回值在上例中是[n_steps, 2, n_samples, D_cell]，其中第二个维度的2是由hidden state和memory cell组成的。

In [10]:
#权重初始化方法
#输出层的权重参数
def weight_init(shape):
    initial = tf.random_uniform(shape,minval=-np.sqrt(5)*np.sqrt(1.0/shape[0]), maxval=np.sqrt(5)*np.sqrt(1.0/shape[0]))
    return tf.Variable(initial,trainable=True)
# 正交矩阵初始化
def orthogonal_initializer(shape,scale = 1.0):
    #https://github.com/Lasagne/Lasagne/blob/master/lasagne/init.py
    scale = 1.0
    flat_shape = (shape[0], np.prod(shape[1:]))
    a = np.random.normal(0.0, 1.0, flat_shape)
    u, _, v = np.linalg.svd(a, full_matrices=False)
    q = u if u.shape == flat_shape else v
    q = q.reshape(shape) #this needs to be corrected to float32
    return tf.Variable(scale * q[:shape[0], :shape[1]],trainable=True, dtype=tf.float32)
#输出层的偏置
def bias_init(shape):
    initial = tf.constant(0.01, shape=shape)
    return tf.Variable(initial)
#随机打乱列表
def shufflelists(data):
    ri=np.random.permutation(len(data))
    data=[data[i] for i in ri]
    return data

def Standardize(seq):
    #subtract mean 减去平均值 一维数组
    centerized=seq-np.mean(seq, axis = 0)
    #divide standard deviation  np.std计算每一列的标准差 
    normalized=centerized/np.std(centerized, axis = 0)
    return normalized
# 读取数据
mfc=np.load('X.npy')
art=np.load('Y.npy')
totalsamples=len(mfc)
# 20%的数据作为validation se
vali_size=0.2
# 将每个样本的输入和输出数据合成list，再将所有的样本合成list
# 其中输入数据的形状是[n_samples, n_steps, D_input]
# 其中输出数据的形状是[n_samples, D_output]
def data_prer(X, Y):
    D_input=X[0].shape[1]
    D_output=24
    data=[]
    for x,y in zip(X,Y):
        data.append([Standardize(x).reshape((1,-1,D_input)).astype("float32"),
                     Standardize(y).astype("float32")])
    return data
# 处理数据
data=data_prer(mfc, art)
# 分训练集与验证集
train=data[int(totalsamples*vali_size):]
test=data[:int(totalsamples*vali_size)]
print('num of train sequences:%s' %len(train))
print('num of test sequences:%s' %len(test))
print('shape of inputs:' ,test[0][0].shape)
print('shape of labels:' ,test[0][1].shape)

num of train sequences:83
num of test sequences:20
('shape of inputs:', (1, 886, 39))
('shape of labels:', (886, 24))


In [11]:
#构建网络
D_input = 39               #输入的维度
D_label = 24               #样本标签维度
learning_rate = 7e-5       #学习率
num_units=1024             #隐藏层神经元结点总数

# 样本的输入和标签
#tf.placeholder：用于得到传递进来的真实的训练样本 多维数组 
inputs = tf.placeholder(tf.float32, [None, None, D_input], name="inputs")
labels = tf.placeholder(tf.float32, [None, D_label], name="labels")

# 实例LSTM类
rnn_cell = LSTMcell(inputs, D_input, num_units, orthogonal_initializer)

#调用scan计算所有hidden states
rnn0 = rnn_cell.all_steps()

# reshape for output layer 将3维tensor [n_steps, n_samples, D_cell]转成 矩阵[n_steps*n_samples, D_cell]
rnn = tf.reshape(rnn0, [-1, num_units])
# 输出层的学习参数
W = weight_init([num_units, D_label])
b = bias_init([D_label])
output = tf.matmul(rnn, W) + b
# 计算损失 平均
loss=tf.reduce_mean((output-labels)**2)
# 训练 TensorFlow提供的tf.train.AdamOptimizer来控制学习速度
train_step = tf.train.AdamOptimizer(learning_rate).minimize(loss)

In [12]:
# 建立session并实际初始化所有参数
sess = tf.InteractiveSession()
logdir = '/home/zhout/tf_logs/LSTM'
writer = tf.summary.FileWriter(logdir,sess.graph)
tf.global_variables_initializer().run()
writer.close()

In [13]:
# 训练并记录
def train_epoch(EPOCH):
    for k in range(EPOCH):
        #随机打乱列表
        train0=shufflelists(train) 
#         print train0
        for i in range(len(train)):
            sess.run(train_step,feed_dict={inputs:train0[i][0],labels:train0[i][1]})
        tl=0
        dl=0
        for i in range(len(test)):
            dl+=sess.run(loss,feed_dict={inputs:test[i][0],labels:test[i][1]})
            print 'dl的值'
            print dl 
        for i in range(len(train)):
            tl+=sess.run(loss,feed_dict={inputs:train[i][0],labels:train[i][1]})
            print 'tl的值'
            print tl
        print(k,'train:',round(tl/83,3),'test:',round(dl/20,3))

In [14]:
t0 = time.time()
train_epoch(10)
t1 = time.time()
print(" %f seconds" % round((t1 - t0),2))

dl的值
0.679268598557
dl的值
1.37250548601
dl的值
2.14175504446
dl的值
2.87273812294
dl的值
3.51536428928
dl的值
4.19876468182
dl的值
4.78046423197
dl的值
5.53421998024
dl的值
6.29102295637
dl的值
6.96525031328
dl的值
7.7880962491
dl的值
8.51852798462
dl的值
9.22005546093
dl的值
9.87049615383
dl的值
10.6186441183
dl的值
11.2681168318
dl的值
12.0257478356
dl的值
12.6873311996
dl的值
13.356626749
dl的值
14.1814752221
tl的值
0.721774995327
tl的值
1.44283986092
tl的值
2.1015779376
tl的值
2.64568585157
tl的值
3.50254774094
tl的值
4.04224050045
tl的值
4.638479352
tl的值
5.26665568352
tl的值
5.87749773264
tl的值
6.52025264502
tl的值
7.29239916801
tl的值
7.93550550938
tl的值
8.50916981697
tl的值
9.16766905785
tl的值
9.82571351528
tl的值
10.5201889277
tl的值
11.1419605017
tl的值
11.8526679873
tl的值
12.5053431392
tl的值
13.1868947744
tl的值
13.7232994437
tl的值
14.2592334151
tl的值
14.8624946475
tl的值
15.5444895029
tl的值
16.1654516459
tl的值
16.9766278267
tl的值
17.6494229436
tl的值
18.374186337
tl的值
19.0578363538
tl的值
19.753608346
tl的值
20.5442534685
tl的值
21.1799426675
tl的值
21.854286551

KeyboardInterrupt: 

In [None]:
pY=sess.run(output,feed_dict={inputs:test[10][0]})
plt.plot(pY[:,8])
plt.plot(test[10][1][:,8])
plt.title('test')
plt.legend(['predicted','real'])

In [None]:
pY=sess.run(output,feed_dict={inputs:train[1][0]})
plt.plot(pY[:,6])
plt.plot(train[1][1][:,6])
plt.title('train') 
plt.legend(['predicted','real'])