In [10]:
import numpy as np

In [11]:
class RNN:
    
    def __init__(self,in_shape,unit,out_shape):
        '''
        in_shape:输入向量的大小
        unit:隐层大小
        out_shape:输出向量的大小
        '''
        self.U = np.random.random(size=(in_shape,unit)) #输入层到隐层的权重
        self.W = np.random.random(size=(unit,unit))
        self.V = np.random.random(size=(unit,out_shape))
        
        self.in_shape = in_shape
        self.unit = unit
        self.out_shape = out_shape
        
        self.start_h = np.random.random(size=(self.unit,)) #初试隐层的状态
    
    @staticmethod
    def tanh(x):
        return (np.exp(x)-np.exp(-x))/(np.exp(x)+np.exp(-x))
    
    @staticmethod
    def tanh_der(y):
        return 1 - y*y
    
    @staticmethod
    def softmax(x):
        tmp = np.exp(x)
        return tmp/sum(tmp)
    
    @staticmethod
    def softmax_der(y,y_):
        j = np.argmax(y_)
        tmp = y[j]
        y = -y[j]*y
        y[j] = tmp*(1-tmp)
        return y
    
    @staticmethod
    def cross_entropy(y, y_):
        '''
        交叉熵
        y:预测值
        y_: 真值
        '''
        return sum(-np.log(y)*y_)
    
    @staticmethod
    def cross_entropy_der(y, y_):
        j = np.argmax(y_)
        return -1/y[j]
    
    def inference(self, x, h_1):
        '''
        前向传播
        x: 输入向量
        h_1: 上一隐层

        '''
        h = self.tanh(np.dot(x, self.U) + np.dot(h_1, self.W))
        y = self.softmax(np.dot(h, self.V))
        return h, y
    
    def train(self, x_data, y_data, alpha=0.1, steps=100):
        '''
        训练模型
        x_data: 输入样本
        y_data: 标签
        alpha: 学习率
        steps: 迭代伦次
        '''
        for step in range(steps):  # 迭代伦次
            print("step:", step+1)
            for xs, ys in zip(x_data,y_data):  # 每个样本
                h_list = []
                h = self.start_h  # 初始化初始隐层状态
                h_list.append(h)
                y_list = []
                losses = []
                for x, y_ in zip(xs, ys):  # 前向传播
                    h, y = self.inference(x, h)
                    loss = self.cross_entropy(y=y, y_=y_)
                    h_list.append(h)
                    y_list.append(y)
                    losses.append(loss)
                print("loss:", np.mean(losses))
                V_update = np.zeros(shape=self.V.shape)
                U_update = np.zeros(shape=self.U.shape)
                W_update = np.zeros(shape=self.W.shape)
                next_layer1_delta = np.zeros(shape=(self.unit,))

                for i in range(len(xs))[::-1]:  # 反向传播
                    layer2_delta = -self.cross_entropy_der(y_list[i], ys[i])*self.softmax_der(y_list[i], ys[i])  # 输出层误差
                    # 当前隐层梯度 = 下一隐层梯度 * 下一隐层权重 + 输出层梯度 * 输出层权重
                    layer1_delta = self.tanh_der(h_list[i+1])*(np.dot(layer2_delta, self.V.T) + np.dot(next_layer1_delta, self.W.T))

                    V_update += np.dot(np.atleast_2d(h_list[i+1]).T, np.atleast_2d(layer2_delta))  # V增量
                    W_update += np.dot(np.atleast_2d(h_list[i]).T,  np.atleast_2d(layer1_delta))  # W增量
                    U_update += np.dot(np.atleast_2d(xs[i]).T,  np.atleast_2d(layer1_delta))  # U增量

                    next_layer1_delta = layer1_delta  # 更新下一隐层的梯度等于当前隐层的梯度
                self.W += W_update * alpha
                self.V += V_update * alpha
                self.U += U_update * alpha
    def predict(self, xs, return_sequence=False):
        '''
        RNN预测
        xs: 单个样本
        :param return_sequence: 是否返回整个输出序列
        '''

        y_list = []
        h_list = []
        h = self.start_h
        for x in xs:
            h, y = self.inference(x,h)
            y_list.append(y)
            h_list.append(h)
        if return_sequence:
            return h_list, y_list
        else:
            return h_list[-1], y_list[-1]


In [16]:
class RNNTest:

    def __init__(self, hidden_num, all_chars):
        '''
        创建一个rnn
        :param hidden_num: 隐层数目
        :param all_chars: 所有字符集
        '''
        self.all_chars = all_chars
        self.len = len(all_chars)
        self.rnn = RNN(self.len, hidden_num, self.len)

    def str2onehots(self, string):
        '''
        字符串转独热码
        :param string:
        :return:
        '''
        one_hots = []
        for char in string:
            one_hot = np.zeros((self.len,),dtype=np.int)
            one_hot[self.all_chars.index(char)] = 1
            one_hots.append(one_hot)
        return one_hots

    def vector2char(self, vector):
        '''
        预测向量转字符
        :param vector:
        :return:
        '''
        return self.all_chars[int(np.argmax(vector))]

    def run(self, x_data, y_data, alpha=0.1, steps=100):

        x_data_onehot = [self.str2onehots(xs) for xs in x_data]
        y_data_onehot = [self.str2onehots(ys) for ys in y_data]
        self.rnn.train(x_data_onehot, y_data_onehot, alpha=alpha, steps=steps) # 训练
        vector_g = self.rnn.predict(self.str2onehots("g"), False)[1] # 预测g下一个字母
        vector_abc = self.rnn.predict(self.str2onehots("abc"), False)[1] # 预测abc的下一个字母
        print("g.next=",self.vector2char(vector_g))
        print("abc.next=",self.vector2char(vector_abc))


# 测试：下一个字母
x_data = ["abc","bcd","cdef","fgh","a","bc","abcdef"]
y_data = ["bcd","cde","defg","ghi","b","cd","bcdefg"]
all_chars = "abcdefghi"

rnn_test = RNNTest(10, all_chars)
rnn_test.run(x_data,y_data)

step: 1
loss: 2.077201875224448
loss: 1.8894262844484704
loss: 2.417838391008928
loss: 3.331204131167494
loss: 2.531513100410853
loss: 2.2405261910608543
loss: 2.0387999920226787
step: 2
loss: 1.8554869779839755
loss: 1.821915883080802
loss: 2.2371012613526706
loss: 3.178030395799348
loss: 2.617270814803658
loss: 2.2487265809471104
loss: 2.0154752203625677
step: 3
loss: 1.8534320824298367
loss: 1.8157543637997067
loss: 2.2239464827825373
loss: 3.146247349059942
loss: 2.612266936597083
loss: 2.2467253986396867
loss: 2.010165913591942
step: 4
loss: 1.8499162094825221
loss: 1.8119167415329376
loss: 2.220083818214011
loss: 3.130581571049366
loss: 2.605796312706854
loss: 2.244405136816895
loss: 2.007365557294827
step: 5
loss: 1.847032556935157
loss: 1.809008929396873
loss: 2.2181149113262455
loss: 3.1220121859838286
loss: 2.5984399085399494
loss: 2.241832076015241
loss: 2.005078214374914
step: 6
loss: 1.8443001339557046
loss: 1.8063983358956515
loss: 2.216940988311771
loss: 3.11672321124079

loss: 0.2178461988474669
step: 60
loss: 0.0578560222196527
loss: 0.14835902454310002
loss: 0.29282695118580576
loss: 0.8182862688845204
loss: 0.07606127715595178
loss: 0.06917712320884407
loss: 0.20608633472786075
step: 61
loss: 0.055357449727782164
loss: 0.1412990868265204
loss: 0.2772068756344768
loss: 0.7760221178921439
loss: 0.07762756681159715
loss: 0.0640818629047245
loss: 0.1948359568295969
step: 62
loss: 0.05284859767477713
loss: 0.13478508970176759
loss: 0.2624623150223789
loss: 0.7264200007892092
loss: 0.07859723231375731
loss: 0.059622146595624814
loss: 0.1835809352552673
step: 63
loss: 0.050470396633115165
loss: 0.1283616036701499
loss: 0.24834143407172388
loss: 0.6579353249036873
loss: 0.0792252212695042
loss: 0.056052777793461836
loss: 0.1709012610149636
step: 64
loss: 0.04832048693231625
loss: 0.1213215740959308
loss: 0.23400757772896438
loss: 0.559904940367198
loss: 0.08050421412625108
loss: 0.05379800705320302
loss: 0.15448362881782354
step: 65
loss: 0.0463848297833896