In [1]:
sentences = ["I like dog", "I love coffee", "I hate milk", "You like cat", "You love milk", "You hate coffee"]
import numpy 

class RNN :
    #RNN(x) = h*Wh + x*Wx + b
    
    def __init__(self, input_size, output_size, start_h : numpy.array = None) :    
        self.Wh = numpy.random.randn(output_size, output_size)
        self.Wx = numpy.random.randn(input_size, output_size)
        self.b  = numpy.random.randn(1, output_size)
        self.hs = None
        self.cache = None #x 저장공간 #(1,in)
        self.start_h = start_h

    def forward(self, x) :
        self.cache =  x
        self.hs = []
        for b in range(x.shape[0]) :
            temp = []
            h = numpy.zeros(shape=(1,self.Wx.shape[1])) if self.start_h is None else self.start_h[b:b+1,:]
            for s in range(x.shape[1]) :
                h = numpy.tanh(numpy.dot(h, self.Wh) + numpy.dot(x[b,s:s+1,:], self.Wx) + self.b) #(1,out)
                temp.append(h[0])
            self.hs.append(temp)
        self.hs = numpy.array(self.hs)
        return self.hs;

    def backward(self, dh, lr) :
        dWh = numpy.zeros_like(self.Wh) #(out, out)
        dWx = numpy.zeros_like(self.Wx) #(in, out)
        db  = numpy.zeros_like(self.b)  #(1, out)
        
        for b in range(self.hs.shape[0]) :
            for s in range(self.hs.shape[1]-1,-1,-1) :
                dt = dh * (1 - (self.hs[b,s:s+1,:] ** 2)) #tanh 미분 # = (1, out)
                if  s != 0 :
                    dWh = dWh + numpy.dot(self.hs[b,s-1:s,:].T, dt)
                else :
                    dWh = dWh # = dWh + numpy.dot(영행렬, dt)
                dWx = dWx + numpy.dot(self.cache[b,s:s+1,:].T, dt)  #(1,out) @ (1,in) = (in,out) 
                db  = db  + dt # = (1,out)
                dh  = numpy.dot(dt, self.Wh.T) #(1,out) @ (out, out) = (1, out)
                
            self.Wh = self.Wh - dWh * lr
            self.Wx = self.Wx - dWx * lr
            self.b  = self.b  - db  * lr
        return dh;

class Linear :
    #Linear(x) = x*W + b
    def __init__(self, input_size, output_size) :
        self.W = numpy.random.randn(input_size, output_size) #(in, out)
        self.b = numpy.random.randn(1, output_size) #(n, out)
        self.cache = None #x 저장공간 #(n,in)

    def forward(self, x) :
        self.cache = x
        y = numpy.dot(x, self.W) + self.b #(n,out)
        return y

    def backward(self, dy, lr) :
        dW = numpy.dot(self.cache.T, dy) #(n,out) @ (n,in) = (in, out)
        db = dy
        dx = numpy.dot(dy, self.W.T) #(n, out) @ (in, out) = (n, in)
        
        self.W = self.W - dW * lr
        self.b = self.b - db * lr
        return dx;

class Softmax :
    def __init__(self) :
        self.dummpy = None

    def forward(self, x) :
        exp_val = numpy.exp(x)
        return exp_val / numpy.sum(exp_val)
        
    def backward(self, dy, lr = 0) :
        return dy
    
class CEE :
    def __init__(self) :
        self.cache_y = None #y값 저장
        self.cache_t = None #t값 저장
    
    def forward(self, y, t) :
        epsilon = 1e-15
        self.cache_y = y
        self.cache_t = t
        return -1 * numpy.sum(t * numpy.log(y + epsilon))
        
    def backward(self) :
        return self.cache_y - self.cache_t
    

In [5]:
### 다음에 올 단어 맞추는 AI

##데이터 전처리
#1) 원 핫 인코딩
data = list(set(" ".join(sentences).split()))
#print(data)
word_dict = {w : i for i,w in enumerate(data)}
number_dict = {i : w for i,w in enumerate(data)}

corpus = []
for s in sentences :
    words = s.split()
    s_array = []
    for w in words :
        s_array.append(numpy.eye(len(word_dict))[word_dict[w]])
    corpus.append(s_array)
corpus = numpy.array(corpus)
#print(corpus)

##AI 학습
#1) x, t 입력
x = corpus[:,0:2,:]
t = corpus[:,2:3,:]

#2) 함수(F(x), 손실함수, 최적화 함수)들 선언
#F(x) :  x -> rnn(x) -> f(x) -> softmax(x) -> y
rnn = RNN(x.shape[2],5)
f = Linear(5, 9)
softmax = Softmax()
loss_function = CEE()
epoch = 200
batch_size = x.shape[0]

for e in range(epoch) :
    loss_sum = 0
    for b in range(batch_size) :
        #3) y = F(x)
        y = rnn.forward(x[b:b+1,:,:])  
        y = y[:,-1,:]
        y = f.forward(y)
        y = softmax.forward(y)
        
        #4) y와 t 비교
        loss = loss_function.forward(y, t[b])
        loss_sum += loss
        
        #5) F(x) 수정 (=미분)
        lr = 0.5
        dy = loss_function.backward()
        dy = softmax.backward(dy, lr = lr)
        dy = f.backward(dy, lr = lr)
        dy = rnn.backward(dy, lr = lr)

    if (e+1) % 10 == 0 :
        print("epoch {} | loss {}".format(e+1, loss_sum))



epoch 10 | loss 9.436872985947847
epoch 20 | loss 0.6403388771693282
epoch 30 | loss 0.26290553779340886
epoch 40 | loss 0.16952121733894404
epoch 50 | loss 0.12526508910625633
epoch 60 | loss 0.09902007811998749
epoch 70 | loss 0.08146774769997062
epoch 80 | loss 0.06886137850603907
epoch 90 | loss 0.059413089990400667
epoch 100 | loss 0.05212795662959781
epoch 110 | loss 0.04637843333132221
epoch 120 | loss 0.04174461458955749
epoch 130 | loss 0.037939249958410935
epoch 140 | loss 0.03476241378130984
epoch 150 | loss 0.032072171046311944
epoch 160 | loss 0.029765623341808677
epoch 170 | loss 0.02776663918805706
epoch 180 | loss 0.026017814258550428
epoch 190 | loss 0.02447511005456791
epoch 200 | loss 0.023104209030335325


In [3]:
#검증
#sentences = ["I like dog", "I love coffee", "I hate milk", "You like cat", "You love milk", "You hate coffee"]

y = rnn.forward(x)  
y = y[:,-1,:]
y = f.forward(y)
y = softmax.forward(y)


for i in y :
    print(number_dict[numpy.argmax(i)])

dog
coffee
milk
cat
milk
coffee
