# Laboratorium 5: Rekurencyjne Sieci Neuronowe (RNN)
## Część 1: number2roman

In [1]:
from tensorflow.keras.layers import Flatten
from tensorflow.keras.layers import MaxPooling2D
from tensorflow.keras.layers import Dense
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import RNN, LSTM, RepeatVector, Dropout
import numpy as np
from roman_numerals import convert

### Tworzenie zestawu danych

In [2]:
MAX_OUTPUT_SEQUENCE_LEN=0
DATASET_SIZE=200

samples = []
labels = []

import random

for i in range(DATASET_SIZE):
    samples.append(i)
    roman = convert(i)
    labels.append(list(roman))
    if len(roman) > MAX_OUTPUT_SEQUENCE_LEN:
        MAX_OUTPUT_SEQUENCE_LEN = len(roman)
    
print(MAX_OUTPUT_SEQUENCE_LEN)
samples = np.array(samples)
labels = np.array(labels, dtype=object)

print("Sample (input):",samples[0])
print("Label",labels[0])


codes = ' IVXLC'

nlabels = np.zeros((DATASET_SIZE,MAX_OUTPUT_SEQUENCE_LEN,len(codes)))
for i in range(DATASET_SIZE):
    for j in range(MAX_OUTPUT_SEQUENCE_LEN):
        if j>=len(labels[i]): 
                nlabels[i][j][0]=1
                continue
        x = labels[i][j]
        index = codes.index(x)
        nlabels[i][j][index] = 1
print("Label encoded (output):\n",nlabels[123])
labels = nlabels
print(labels.shape)

9
Sample (input): 0
Label []
Label encoded (output):
 [[0. 0. 0. 0. 0. 1.]
 [0. 0. 0. 1. 0. 0.]
 [0. 0. 0. 1. 0. 0.]
 [0. 1. 0. 0. 0. 0.]
 [0. 1. 0. 0. 0. 0.]
 [0. 1. 0. 0. 0. 0.]
 [1. 0. 0. 0. 0. 0.]
 [1. 0. 0. 0. 0. 0.]
 [1. 0. 0. 0. 0. 0.]]
(200, 9, 6)


### Podział na zestaw uczący i trenujący

In [3]:
TRAINING_SIZE = .5
from sklearn.model_selection import train_test_split
(trainSamples, testSamples, trainLabels, testLabels) = train_test_split(samples, labels,train_size=TRAINING_SIZE)
print('Training samples:',len(trainSamples),' test samples',len(testSamples))

Training samples: 100  test samples 100


### Tworzenie sieci

In [4]:
model = Sequential()
model.add(Dense(16, input_dim=1) ) 
model.add(RepeatVector(9)) #length of the text
model.add(LSTM(128, return_sequences=True))
model.add(LSTM(128, return_sequences=True))
model.add(Dropout(0.4))
model.add(LSTM(128, return_sequences=True))
model.add(Dense(len(codes),activation='softmax'))
model.summary()

model.compile(loss='categorical_crossentropy', optimizer="adam",metrics=['accuracy', 'mae'])
num_epochs = 0

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense (Dense)                (None, 16)                32        
_________________________________________________________________
repeat_vector (RepeatVector) (None, 9, 16)             0         
_________________________________________________________________
lstm (LSTM)                  (None, 9, 128)            74240     
_________________________________________________________________
lstm_1 (LSTM)                (None, 9, 128)            131584    
_________________________________________________________________
dropout (Dropout)            (None, 9, 128)            0         
_________________________________________________________________
lstm_2 (LSTM)                (None, 9, 128)            131584    
_________________________________________________________________
dense_1 (Dense)              (None, 9, 6)              7

In [5]:
def label2words(label):
    s = ''
    for r in label:
        s+=codes[int(r)]
        #print(i,'->',s)
    return s.strip()    
    
def check_model(verbose=0,show_training=1):
    pred = model.predict(samples)
    res = pred.argmax(axis=2)
    correct = 0
    for i in range(len(pred)):
        if(not show_training and i in trainSamples): continue
        train=''
        if i in trainSamples: train='[T]'
        txt = label2words(res[i])
        txt_correct = convert(i)
        ok=''
        if(txt==txt_correct): 
            correct+=1
            ok = "[ok]"
        if(verbose==1):
            print(i,'->',txt, ok,train)
    if verbose==0:
        for i in range(5):        
            x = random.randrange(DATASET_SIZE)
            print(x,'->',label2words(res[x]))    
    print('Correct',correct,'of',len(pred),' = ',(correct/len(pred)))
    return correct,len(pred),(correct/len(pred))


EPOCHS=50
BATCH_SIZE = int(len(trainSamples)/2)
print('Training with',len(trainSamples),'samples',EPOCHS,'epochs and batch_size=',BATCH_SIZE)
print("Epochs so far",num_epochs)
for x in range(20):
    H = model.fit(trainSamples, trainLabels, epochs=EPOCHS,verbose=0,batch_size=BATCH_SIZE)
    num_epochs += EPOCHS
    print()
    print("Epoch {} - loss ={:6.3f}, loss improvement ={:6.3f}".
          format(num_epochs,H.history['loss'][-1], H.history['loss'][0]-H.history['loss'][-1]))
    pred = model.predict(samples)
    res = pred.argmax(axis=2)
    c,l,p = check_model()
#    print("accuracy={:6.3f}%".format(100*p))
#     f = open("output.txt", "a")
#     f.write("=================================================================================\n")
#     f.write("{} Epoch {} - loss ={:6.3f}, loss improvement ={:6.3f}\n".
#             format(i,num_epochs,H.history['loss'][-1], H.history['loss'][0]-H.history['loss'][-1]))
#     f.write("accuracy={:6.3f}%\n".format(100*p))
#     for i in range(len(pred)):
#         txt = label2words(res[i])
#         f.write("{} -> {}\n".format(i,txt))
#     f.close()
print("Done")

Training with 100 samples 50 epochs and batch_size= 50
Epochs so far 0

Epoch 50 - loss = 0.783, loss improvement = 0.909
186 -> LXXXII
63 -> XXXI
124 -> XXXII
61 -> XXXI
51 -> XXXI
Correct 4 of 200  =  0.02

Epoch 100 - loss = 0.701, loss improvement = 0.081
195 -> CLXXXI
55 -> XXXI
18 -> XXI
63 -> XXXI
145 -> CXXXI
Correct 6 of 200  =  0.03

Epoch 150 - loss = 0.639, loss improvement = 0.066
168 -> CLXXI
100 -> CXXI
55 -> XXXI
25 -> XXII
55 -> XXXI
Correct 8 of 200  =  0.04

Epoch 200 - loss = 0.585, loss improvement = 0.052
19 -> XXII
7 -> II
125 -> CXXII
60 -> LXVI
181 -> CLXXXI
Correct 9 of 200  =  0.045

Epoch 250 - loss = 0.529, loss improvement = 0.052
6 -> I
37 -> XXXI
35 -> XXXI
172 -> CLXXII
103 -> CXXI
Correct 10 of 200  =  0.05

Epoch 300 - loss = 0.476, loss improvement = 0.052
120 -> CXXI
105 -> CXII
145 -> CXLII
137 -> CXXII
81 -> LXXXI
Correct 17 of 200  =  0.085

Epoch 350 - loss = 0.491, loss improvement =-0.019
161 -> CLXII
167 -> CLXXII
92 -> XCII
104 -> CXII
92 ->

In [6]:
check_model(1)

0 -> CCCCCCCCC  
1 -> CCCCCCCCC  
2 -> XI  
3 -> III [ok] [T]
4 -> IV [ok] [T]
5 -> V [ok] [T]
6 -> V  
7 -> I  
8 -> I  
9 -> IX [ok] [T]
10 -> X [ok] [T]
11 -> XI [ok] 
12 -> XII [ok] [T]
13 -> XIII [ok] [T]
14 -> XIV [ok] [T]
15 -> XIV  
16 -> XIVI  
17 -> XVII [ok] [T]
18 -> XVIII [ok] [T]
19 -> XVIII  
20 -> XX [ok] [T]
21 -> XXI [ok] [T]
22 -> XXII [ok] 
23 -> XXIVI  [T]
24 -> XXIV [ok] [T]
25 -> XXIV  
26 -> XXIV  
27 -> XXIV  
28 -> XXIX  
29 -> XXIX [ok] [T]
30 -> XXX [ok] [T]
31 -> XXXI [ok] [T]
32 -> XXXII [ok] [T]
33 -> XXXII  
34 -> XXXV  
35 -> XXXV [ok] [T]
36 -> XXXVI [ok] [T]
37 -> XXXVII [ok] [T]
38 -> XXXVII  
39 -> XXXVX  [T]
40 -> XXXIX  
41 -> XLI [ok] [T]
42 -> XLI  
43 -> XLII  [T]
44 -> XLIV [ok] [T]
45 -> XLVV  [T]
46 -> XLVI [ok] [T]
47 -> XLVII [ok] [T]
48 -> XLVIII [ok] [T]
49 -> XLVIII  
50 -> XLV  [T]
51 -> L  
52 -> LII [ok] [T]
53 -> LII  [T]
54 -> LII  [T]
55 -> LII  
56 -> LVI [ok] [T]
57 -> LVI  [T]
58 -> LVI  
59 -> LVI  
60 -> LVII  
61 -> LVII  
6

(49, 200, 0.245)

### Wnioski
Model ma słabą dokładność. Zmiany w budowie modelu, rozmiarach zbiorów trenujących i testujących i zmiany innych parametrów najczęściej tylko wpływały na bardziej widoczne zjawisko przetrenowania