In [20]:
import tensorflow as tf

EPOCHS = 10
NUM_WORDS = 10000

#모델 정의

In [21]:
class RNN(tf.keras.Model):
    def __init__(self):
        super(RNN,self).__init__()
        #단어를 one hot vector로 표현할거임 실수로 embedding을 사용해서 길이가 16인 feature 벡터로 바꿔줌
        self.emb = tf.keras.layers.Embedding(NUM_WORDS, 16)
        self.rnn = tf.keras.layers.SimpleRNN(32)#vanlia rnn
        self.dense = tf.keras.layers.Dense(2,activation='softmax')#imdb set 쓸건데, 영화 리뷰가 있고, 긍정적이냐 부정적이냐로 함
        
    def __call__(self,x,training = False,mask = None):
        x=self.emb(x)
        x=self.rnn(x)
        return self.dense(x)

#루프 정의

In [22]:
@tf.function
def train_step(model,inputs,labels,loss_object,optimizer,train_loss,train_accuracy):
  with tf.GradientTape() as tape:
    predictions = model(inputs,training = True)
    loss = loss_object(labels, predictions)
  gradients = tape.gradient(loss, model.trainable_variables)

  optimizer.apply_gradients(zip(gradients,model.trainable_variables))
  train_loss(loss)
  train_accuracy(labels,predictions)
  
@tf.function
def test_step(model,inputs,labels,loss_object,test_loss,test_accuracy):
  predictions = model(inputs, training = False)

  t_loss = loss_object(labels,predictions)
  test_loss(t_loss)
  test_accuracy(labels,predictions)

#데이터셋

In [26]:
imdb = tf.keras.datasets.imdb
(x_train,y_train),(x_test,y_test) = imdb.load_data(num_words = NUM_WORDS)

#길이 맞춰서 앞에 padding으로 0을 넣어준다.
x_train = tf.keras.preprocessing.sequence.pad_sequences(x_train,
                                                        value=0,
                                                        padding='pre',
                                                        maxlen=32)
x_test = tf.keras.preprocessing.sequence.pad_sequences(x_test,
                                                        value=0,
                                                        padding='pre',
                                                        maxlen=32)

train_ds = tf.data.Dataset.from_tensor_slices((x_train,y_train)).shuffle(10000).batch(32)
test_ds = tf.data.Dataset.from_tensor_slices((x_test,y_test)).batch(32) 

[1, 14, 22, 16, 43, 530, 973, 1622, 1385, 65, 458, 4468, 66, 3941, 4, 173, 36, 256, 5, 25, 100, 43, 838, 112, 50, 670, 2, 9, 35, 480, 284, 5, 150, 4, 172, 112, 167, 2, 336, 385, 39, 4, 172, 4536, 1111, 17, 546, 38, 13, 447, 4, 192, 50, 16, 6, 147, 2025, 19, 14, 22, 4, 1920, 4613, 469, 4, 22, 71, 87, 12, 16, 43, 530, 38, 76, 15, 13, 1247, 4, 22, 17, 515, 17, 12, 16, 626, 18, 2, 5, 62, 386, 12, 8, 316, 8, 106, 5, 4, 2223, 5244, 16, 480, 66, 3785, 33, 4, 130, 12, 16, 38, 619, 5, 25, 124, 51, 36, 135, 48, 25, 1415, 33, 6, 22, 12, 215, 28, 77, 52, 5, 14, 407, 16, 82, 2, 8, 4, 107, 117, 5952, 15, 256, 4, 2, 7, 3766, 5, 723, 36, 71, 43, 530, 476, 26, 400, 317, 46, 7, 4, 2, 1029, 13, 104, 88, 4, 381, 15, 297, 98, 32, 2071, 56, 26, 141, 6, 194, 7486, 18, 4, 226, 22, 21, 134, 476, 26, 480, 5, 144, 30, 5535, 18, 51, 36, 28, 224, 92, 25, 104, 4, 226, 65, 16, 38, 1334, 88, 12, 16, 283, 5, 16, 4472, 113, 103, 32, 15, 16, 5345, 19, 178, 32]


#학습 환경 정의

In [24]:
model = RNN()

loss_object = tf.keras.losses.SparseCategoricalCrossentropy()
optimizer = tf.keras.optimizers.Adam()

train_loss = tf.keras.metrics.Mean(name = 'train_loss')
train_accuracy = tf.keras.metrics.SparseCategoricalAccuracy(name = 'train_accuracy')

test_loss = tf.keras.metrics.Mean(name = 'test_loss')
test_accuracy = tf.keras.metrics.SparseCategoricalAccuracy(name = 'test_accuracy')

#학습

In [25]:
for epoch in range(EPOCHS):
  for seqs,labels in train_ds:
    train_step(model, seqs, labels, loss_object, optimizer, train_loss, train_accuracy)

  for test_seqs, test_labels in test_ds:
    test_step(model, test_seqs, test_labels, loss_object, test_loss, test_accuracy)

  template = 'Epoch:{}, Loss: {}, Accuracy: {}, Test Loss: {}, Test Accuracy: {}'
  print(template.format(epoch+1,
                        train_loss.result(),
                        train_accuracy.result()*100,
                        test_loss.result(),
                        test_accuracy.result()*100))
  train_loss.reset_states()
  train_accuracy.reset_states()
  test_loss.reset_states()
  test_accuracy.reset_states()

Epoch:1, Loss: 0.5959213376045227, Accuracy: 65.51599884033203, Test Loss: 0.457592636346817, Test Accuracy: 78.54400634765625
Epoch:2, Loss: 0.36908474564552307, Accuracy: 83.87200164794922, Test Loss: 0.477140337228775, Test Accuracy: 77.60000610351562
Epoch:3, Loss: 0.2114686518907547, Accuracy: 91.7280044555664, Test Loss: 0.5953189134597778, Test Accuracy: 76.09199523925781
Epoch:4, Loss: 0.09055936336517334, Accuracy: 96.96800231933594, Test Loss: 0.807816743850708, Test Accuracy: 74.77200317382812
Epoch:5, Loss: 0.03735027834773064, Accuracy: 98.83200073242188, Test Loss: 1.0107612609863281, Test Accuracy: 74.31200408935547
Epoch:6, Loss: 0.01767653226852417, Accuracy: 99.46399688720703, Test Loss: 1.1951831579208374, Test Accuracy: 75.2439956665039
Epoch:7, Loss: 0.016713310033082962, Accuracy: 99.447998046875, Test Loss: 1.2709640264511108, Test Accuracy: 73.53600311279297
Epoch:8, Loss: 0.013645369559526443, Accuracy: 99.53600311279297, Test Loss: 1.42566978931427, Test Accur