In [157]:
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
 

'apple!' : input_data => apple, output_data = pple!

In [158]:
input_str = 'apple'
label_str = 'pple!'
char_set = sorted(list(set(input_str+label_str)))  #set은 집합이므로 중복문자 허용 x => 문자 5개 남음

print(char_set)

char_set_size =len(char_set)
print(char_set_size)

['!', 'a', 'e', 'l', 'p']
5


In [159]:
input_size = 5
hidden_size = 5
output_size = 5
learning_rate = 0.1

In [160]:
#문자의 벡터값으로 훈련시킴

char_to_index = dict((c, i) for i, c in enumerate(char_set))  #enumerate를 활용하면 char_set의 인덱스를 리턴할 수 있음 

#아래의 코드와 같은 의미
# char_list = [c for c in char_set]
# char_list = []
# for c in char_set:
#     char_set.append(c)

print(char_to_index)


{'!': 0, 'a': 1, 'e': 2, 'l': 3, 'p': 4}


In [161]:
char_to_index.items()  #key, value를 튜플 형태로 리턴
#cf. char_to_index.values()
#cf. char_to_index.keys()

dict_items([('!', 0), ('a', 1), ('e', 2), ('l', 3), ('p', 4)])

In [162]:
index_to_char ={}
for key, value in char_to_index.items():
    index_to_char[value] = key
    
print(index_to_char)

{0: '!', 1: 'a', 2: 'e', 3: 'l', 4: 'p'}


In [163]:
x_data = [char_to_index[c] for c in input_str]  #apple
y_data = [char_to_index[c] for c in label_str]  #pple!
x_data=[x_data]
y_data=[y_data]

#문자값으로 처리하는 것이 아닌, 문자의 인덱스 값으로 처리함

print(x_data)
print(y_data) 

[[1, 4, 4, 3, 2]]
[[4, 4, 3, 2, 0]]


In [164]:
x_one_hot = [np.eye(char_set_size)[x] for x in x_data]
print(x_one_hot) #데이터의 인덱스의 위치에 1을 넣음

[array([[0., 1., 0., 0., 0.],
       [0., 0., 0., 0., 1.],
       [0., 0., 0., 0., 1.],
       [0., 0., 0., 1., 0.],
       [0., 0., 1., 0., 0.]])]


In [165]:
X=torch.FloatTensor(x_one_hot)
Y=torch.LongTensor(y_data)
print(X)
print(Y)

tensor([[[0., 1., 0., 0., 0.],
         [0., 0., 0., 0., 1.],
         [0., 0., 0., 0., 1.],
         [0., 0., 0., 1., 0.],
         [0., 0., 1., 0., 0.]]])
tensor([[4, 4, 3, 2, 0]])


In [166]:
print(X.size(), Y.size())

torch.Size([1, 5, 5]) torch.Size([1, 5])


In [167]:
#모델 만들기
#nn.RNN(input_size, hidden_size, output_size) #RNN모델에서 필요한 데이터들 - 만들고 사용해야 하므로 클래스로 만들어서 사용할 것임
class Net(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):#모델을 만들 때는 init을 씀
        super(Net, self).__init__()
        self.rnn = nn.RNN(input_size, hidden_size, batch_first=True)
        #batch_first : 입력데이터 batch의 차원의 위치를 설정
        #(배치크기, 시퀀스 길이, 특징 수)
        #입력의 형태가 (32, 10, 50) 라면 순서대로 위와 같음
        self.fc = nn.Linear(hidden_size, output_size, bias=True)
        
#실제 모델 구현(forward)
    def forward(self, x):
        x, _status = self.rnn(x) #x는 rnn을 수행한 값
        x = self.fc(x)
        return x       
        

In [168]:
#모델 생성
model=Net(input_size, hidden_size, output_size)
criterion=nn.CrossEntropyLoss()
optimizer=optim.Adam(model.parameters(), lr=learning_rate)
#최적화 방벙 정함

In [169]:
output=model(X)
print(output)
print(output.size())

tensor([[[-0.0268, -0.4474, -0.5844,  0.4991, -0.6480],
         [ 0.2113, -0.2797, -0.4596,  0.4670, -0.6389],
         [ 0.1170, -0.3987, -0.5075,  0.5359, -0.6814],
         [ 0.0435, -0.4429, -0.6038,  0.6208, -0.7316],
         [ 0.2142, -0.5514, -0.5065,  0.5264, -0.5320]]],
       grad_fn=<ViewBackward0>)
torch.Size([1, 5, 5])


In [170]:
result = output.data.numpy().argmax(axis = 2)  #0:3차원, 1:열, 2:행
print(np.squeeze(result)) #squeeze는 크기가 1인 차원을 제거함, unsqueeze(dim): dim에 해당하는 위치에 크기가 1인 차원을 추가함 

[3 3 3 3 3]


In [None]:
for i in range(200):
    optimizer.zero_grad()
    output = model(X)
    loss = criterion(output.view(-1, input_size), Y.view(-1))
    loss.backward()
    optimizer.step()
    
    result = output.data.numpy().argmax(axis=-1) #인덱스로 된 결과
    str_result=''.join([index_to_char[c] for c in np.squeeze(result)]) #문자로 된 결과
    print(i, "loss", loss.item(), "prediction :", result,str_result, 'true:', y_data)

0 loss 1.7524408102035522 prediction : [[3 3 3 3 3]] lllll true: [[4, 4, 3, 2, 0]]
1 loss 1.479591727256775 prediction : [[3 0 3 3 0]] l!ll! true: [[4, 4, 3, 2, 0]]
2 loss 1.293205976486206 prediction : [[4 0 3 4 0]] p!lp! true: [[4, 4, 3, 2, 0]]
3 loss 1.121469497680664 prediction : [[4 4 4 4 0]] pppp! true: [[4, 4, 3, 2, 0]]
4 loss 0.9451153874397278 prediction : [[4 4 4 2 0]] pppe! true: [[4, 4, 3, 2, 0]]
5 loss 0.7610827088356018 prediction : [[4 4 4 2 0]] pppe! true: [[4, 4, 3, 2, 0]]
6 loss 0.6084244847297668 prediction : [[4 4 3 2 0]] pple! true: [[4, 4, 3, 2, 0]]
7 loss 0.4610603451728821 prediction : [[4 4 3 2 0]] pple! true: [[4, 4, 3, 2, 0]]
8 loss 0.33331093192100525 prediction : [[4 4 3 2 0]] pple! true: [[4, 4, 3, 2, 0]]
9 loss 0.2349424660205841 prediction : [[4 4 3 2 0]] pple! true: [[4, 4, 3, 2, 0]]
10 loss 0.15956611931324005 prediction : [[4 4 3 2 0]] pple! true: [[4, 4, 3, 2, 0]]
11 loss 0.10892747342586517 prediction : [[4 4 3 2 0]] pple! true: [[4, 4, 3, 2, 0]]
12

In [172]:
sentence = """The unsqueeze inserts a dimension of size 1 at the specified dim."""

In [173]:
char_set1 = list(set(sentence))
print(char_set1)

['d', 'e', ' ', 'n', 'i', '.', 's', 'h', 'm', 't', 'T', 'a', 'u', '1', 'f', 'q', 'o', 'p', 'c', 'z', 'r']


In [174]:
char_dic = {c:i for i, c in enumerate(char_set1)}
char_dic

{'d': 0,
 'e': 1,
 ' ': 2,
 'n': 3,
 'i': 4,
 '.': 5,
 's': 6,
 'h': 7,
 'm': 8,
 't': 9,
 'T': 10,
 'a': 11,
 'u': 12,
 '1': 13,
 'f': 14,
 'q': 15,
 'o': 16,
 'p': 17,
 'c': 18,
 'z': 19,
 'r': 20}

In [175]:
#char_dict 크기 구하기
char_dic_size = len(char_dic)
print(char_dic_size)

21


In [176]:
hidden_size = char_dic_size
sequence_length = 10 #데이터가 너무 크면 한 세트에 처리하면 숫자가 너무 커지므로 한 번에 처리할 개수를 정함(훈련데이터 batch size)
learning_rate = 0.1


In [177]:
x_data = []
y_data = []

for i in range(0, len(sentence)-sequence_length):
    x_str = sentence[i:i+sequence_length]
    y_str = sentence[i+1 : i+sequence_length+1]
    print(i, x_str, ':',y_str)
    x_data.append([char_dic[c] for c in x_str])
    y_data.append([char_dic[c] for c in y_str])    

0 The unsque : he unsquee
1 he unsquee : e unsqueez
2 e unsqueez :  unsqueeze
3  unsqueeze : unsqueeze 
4 unsqueeze  : nsqueeze i
5 nsqueeze i : squeeze in
6 squeeze in : queeze ins
7 queeze ins : ueeze inse
8 ueeze inse : eeze inser
9 eeze inser : eze insert
10 eze insert : ze inserts
11 ze inserts : e inserts 
12 e inserts  :  inserts a
13  inserts a : inserts a 
14 inserts a  : nserts a d
15 nserts a d : serts a di
16 serts a di : erts a dim
17 erts a dim : rts a dime
18 rts a dime : ts a dimen
19 ts a dimen : s a dimens
20 s a dimens :  a dimensi
21  a dimensi : a dimensio
22 a dimensio :  dimension
23  dimension : dimension 
24 dimension  : imension o
25 imension o : mension of
26 mension of : ension of 
27 ension of  : nsion of s
28 nsion of s : sion of si
29 sion of si : ion of siz
30 ion of siz : on of size
31 on of size : n of size 
32 n of size  :  of size 1
33  of size 1 : of size 1 
34 of size 1  : f size 1 a
35 f size 1 a :  size 1 at
36  size 1 at : size 1 at 
37 size 1 a

In [178]:
print(x_data)
print(y_data)

[[10, 7, 1, 2, 12, 3, 6, 15, 12, 1], [7, 1, 2, 12, 3, 6, 15, 12, 1, 1], [1, 2, 12, 3, 6, 15, 12, 1, 1, 19], [2, 12, 3, 6, 15, 12, 1, 1, 19, 1], [12, 3, 6, 15, 12, 1, 1, 19, 1, 2], [3, 6, 15, 12, 1, 1, 19, 1, 2, 4], [6, 15, 12, 1, 1, 19, 1, 2, 4, 3], [15, 12, 1, 1, 19, 1, 2, 4, 3, 6], [12, 1, 1, 19, 1, 2, 4, 3, 6, 1], [1, 1, 19, 1, 2, 4, 3, 6, 1, 20], [1, 19, 1, 2, 4, 3, 6, 1, 20, 9], [19, 1, 2, 4, 3, 6, 1, 20, 9, 6], [1, 2, 4, 3, 6, 1, 20, 9, 6, 2], [2, 4, 3, 6, 1, 20, 9, 6, 2, 11], [4, 3, 6, 1, 20, 9, 6, 2, 11, 2], [3, 6, 1, 20, 9, 6, 2, 11, 2, 0], [6, 1, 20, 9, 6, 2, 11, 2, 0, 4], [1, 20, 9, 6, 2, 11, 2, 0, 4, 8], [20, 9, 6, 2, 11, 2, 0, 4, 8, 1], [9, 6, 2, 11, 2, 0, 4, 8, 1, 3], [6, 2, 11, 2, 0, 4, 8, 1, 3, 6], [2, 11, 2, 0, 4, 8, 1, 3, 6, 4], [11, 2, 0, 4, 8, 1, 3, 6, 4, 16], [2, 0, 4, 8, 1, 3, 6, 4, 16, 3], [0, 4, 8, 1, 3, 6, 4, 16, 3, 2], [4, 8, 1, 3, 6, 4, 16, 3, 2, 16], [8, 1, 3, 6, 4, 16, 3, 2, 16, 14], [1, 3, 6, 4, 16, 3, 2, 16, 14, 2], [3, 6, 4, 16, 3, 2, 16, 14, 2, 6], [6, 

In [179]:
x_one_hot = [np.eye(char_dic_size)[x] for x in x_data]
X = torch.FloatTensor(x_one_hot)
Y = torch.LongTensor(y_data)
print(X)
print(Y)

tensor([[[0., 0., 0.,  ..., 0., 0., 0.],
         [0., 0., 0.,  ..., 0., 0., 0.],
         [0., 1., 0.,  ..., 0., 0., 0.],
         ...,
         [0., 0., 0.,  ..., 0., 0., 0.],
         [0., 0., 0.,  ..., 0., 0., 0.],
         [0., 1., 0.,  ..., 0., 0., 0.]],

        [[0., 0., 0.,  ..., 0., 0., 0.],
         [0., 1., 0.,  ..., 0., 0., 0.],
         [0., 0., 1.,  ..., 0., 0., 0.],
         ...,
         [0., 0., 0.,  ..., 0., 0., 0.],
         [0., 1., 0.,  ..., 0., 0., 0.],
         [0., 1., 0.,  ..., 0., 0., 0.]],

        [[0., 1., 0.,  ..., 0., 0., 0.],
         [0., 0., 1.,  ..., 0., 0., 0.],
         [0., 0., 0.,  ..., 0., 0., 0.],
         ...,
         [0., 1., 0.,  ..., 0., 0., 0.],
         [0., 1., 0.,  ..., 0., 0., 0.],
         [0., 0., 0.,  ..., 0., 1., 0.]],

        ...,

        [[0., 0., 0.,  ..., 0., 0., 0.],
         [0., 1., 0.,  ..., 0., 0., 0.],
         [0., 0., 0.,  ..., 1., 0., 0.],
         ...,
         [1., 0., 0.,  ..., 0., 0., 0.],
         [0., 0., 1., 

In [180]:
print(X.size(), Y.size())

torch.Size([55, 10, 21]) torch.Size([55, 10])


In [181]:
#모델 만들기
class Net(nn.Module):
    def __init__(self, input_dim, hidden_dim, layers):
        super(Net, self).__init__()
        self.rnn = nn.RNN(input_dim, hidden_dim, num_layers=layers,
                        batch_first=True)
        self.fc = nn.Linear(hidden_dim, hidden_dim, bias=True)
        
    def forward(self, x):
        x, _status = self.rnn(x)
        x= self.fc(x)
        return x

In [182]:
model = Net(char_dic_size, hidden_size, 2) #층을 2개 쌓겠다.
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

In [183]:
outputs = model(X)
print(outputs)

tensor([[[-0.1159, -0.0979, -0.0342,  ..., -0.0637, -0.1310, -0.2268],
         [-0.0755, -0.0763,  0.0087,  ..., -0.0929, -0.1017, -0.0913],
         [-0.0865, -0.0493, -0.0036,  ..., -0.0046, -0.1799, -0.1867],
         ...,
         [-0.0847, -0.1310,  0.0368,  ..., -0.0158, -0.1942, -0.0698],
         [-0.0429, -0.0573, -0.0218,  ...,  0.0403, -0.2727, -0.1963],
         [-0.0697, -0.0691,  0.0167,  ..., -0.0339, -0.1950, -0.1793]],

        [[-0.0944, -0.0860,  0.0162,  ..., -0.0864, -0.0889, -0.0974],
         [-0.0828, -0.0767, -0.0510,  ..., -0.0060, -0.2002, -0.2191],
         [ 0.0152,  0.0142,  0.0017,  ..., -0.0093, -0.1681, -0.1351],
         ...,
         [-0.0423, -0.0583, -0.0235,  ...,  0.0404, -0.2754, -0.1975],
         [-0.0691, -0.0696,  0.0165,  ..., -0.0334, -0.1961, -0.1794],
         [-0.0380, -0.0258, -0.0450,  ...,  0.0117, -0.2052, -0.1868]],

        [[-0.0901, -0.1020, -0.0343,  ..., -0.0143, -0.1465, -0.2130],
         [-0.0118, -0.0029, -0.0379,  ..., -0

In [184]:
outputs = model(X)
print(outputs[0])

tensor([[-0.1159, -0.0979, -0.0342, -0.1275, -0.0307, -0.1067, -0.3330, -0.0751,
         -0.0795, -0.0133,  0.0082, -0.1182,  0.0386, -0.3003,  0.0961, -0.0005,
         -0.0425,  0.2353, -0.0637, -0.1310, -0.2268],
        [-0.0755, -0.0763,  0.0087,  0.0048, -0.1888, -0.1112, -0.2754, -0.0981,
         -0.3220,  0.0417, -0.1168, -0.1547,  0.0676, -0.3476,  0.0372, -0.0127,
         -0.0978,  0.2436, -0.0929, -0.1017, -0.0913],
        [-0.0865, -0.0493, -0.0036, -0.0778, -0.1810, -0.1469, -0.3032, -0.0294,
         -0.2926,  0.0472, -0.0596, -0.0679,  0.0201, -0.3103,  0.0921, -0.0218,
         -0.0678,  0.2861, -0.0046, -0.1799, -0.1867],
        [ 0.0175,  0.0371,  0.0174, -0.0603, -0.1597, -0.0390, -0.3068, -0.0509,
         -0.4101,  0.0489, -0.1457, -0.1667, -0.0489, -0.2365, -0.0656,  0.0288,
         -0.0847,  0.3044, -0.0019, -0.1426, -0.1399],
        [ 0.0115, -0.0228, -0.0455, -0.1157, -0.0999, -0.0691, -0.1965, -0.1897,
         -0.2953, -0.0050, -0.1120, -0.1424, -0.043

In [185]:
# result = outputs.view(-1, char_dic_size)
# print(result.size())

In [186]:
a = ['p', 'y', 't','h','o','n']
a1 = ''.join(a)
a1

'python'

In [187]:
for i in range(4800):
    optimizer.zero_grad()
    outputs = model(X)
    loss = criterion(outputs.view(-1, char_dic_size), Y.view(-1))
    
    loss.backward()
    optimizer.step()
    
    results = outputs.argmax(dim = 2)
    predict_str = ""
    
    for j, result in enumerate(results): #10개씩 되어있는 문자들 붙이는 것
        if j == 0 :
            predict_str +=''.join([char_set1[t] for t in result])
        else:
            predict_str += char_set1[result[-1]]
    print(predict_str)

pppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppp
eeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeee
      i                                                         
 uTfnoiofeonofnonoTeoponpos fnoi ftoueoe fnotoueoioTosufeoTtonof
es oesTze oi oisioiezseot e 1ioieoieoise 1tsterieoti eeoi oizi  
ee smsesi tizeesesizesizize size tmeseze sizestme t ze 1feet1eze
ee smeesmesias sisteesfeefeesizeesfesize stae sme smte zeeeiaeee
ee shsizs de snseatetatetae stat sh snae snat the th s ae snatat
ee shsize se 1ns dte aedtme s at sh seee s ae te  th s ze s atmt
ee spsize ze 1 s dte s dime size of spee 1 ae tpe sp  ime s aime
ee spsife ze cne ris s dife sife ofesize 1 ae spe spe ife d hise
he spsiue oe c siris h sife sife ofesife 1 ae spe spe ife d fime
he spsiue ze ctsiris a dife sife ofesime 1 ae ohe specifi a rime
he sn iue ze cnseris a dimeesioeeofesize 1 ae the specifiea dime
he sn iueeze inserts a dimeesioeeofesize 1 ae the specified dime
he snsqueeze inserts a di