In [34]:
# 500개 단어 -> 1000개 단어
# 길이 100 -> 200으로 변경

In [35]:
from tensorflow import keras
from tensorflow.keras.datasets import imdb
from sklearn.model_selection import train_test_split
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib
matplotlib.rcParams['axes.unicode_minus'] = False  # 마이너스 표시 해결
# 한글설정
matplotlib.rcParams['font.family'] = 'Malgun Gothic' # windows 사용자
# matplotlib.rcParams['font.family'] = 'AppleGothic Gothic' # Mac사용자
matplotlib.rcParams['font.size'] = '10' # 글자크기

In [36]:
(train_input,train_target),(test_input,test_target) = imdb.load_data(
    num_words = 2000
)

In [37]:
train_input.shape

(25000,)

In [38]:
train_input,val_input,train_target,val_target = train_test_split(
    train_input,train_target,test_size=0.2,random_state=42
)

In [39]:
from keras.preprocessing.sequence import pad_sequences
train_seq = pad_sequences(train_input,maxlen=300)
val_seq = pad_sequences(val_input,maxlen=300)

In [40]:
val_seq.shape

(5000, 300)

In [41]:
train_seq[0]

array([   0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
          0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
          0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
          0,    0,    0,    0,    0,    0,    0,    0,    1,   73,   89,
         81,   25,   60,  967,    6,   20,  141,   17,   14,   31,  127,
         12,   60,   28, 1360, 1107,   66,   45,    6,   20,   15,  497,
          8,   79,   17,  491,    8,  112,    6,    2,   20,   17,  614,
        691,    4,  436,   20,    9,    2,    6,  762,    7,  493,    2,
          6,  185,  250,   24,   55,    2,    5,   23,  350,    7,   15,
         82,   24,   15,  821,   66,   10,   10,   45,  578,   15,    4,
         20,  805,    8,   30,   17,  821,    5, 1621,   17,  614,  190,
          4,   20,    9,   43,   32,   99, 1214,   18,   15,    8,  157,
         46,   17, 1436,    4,    2,    5,    2,    9,   32, 1796,    5,
       1214,  267,   17,   73,   17,    2,   36,   

In [42]:
model = keras.Sequential()
# 길이 200, 단어사전 1000
# 임베딩 층 추가
model.add(keras.layers.Embedding(2000,16,input_length=300))
model.add(keras.layers.SimpleRNN(8))
model.add(keras.layers.Dense(1,activation='sigmoid'))



In [43]:
model.summary()

In [44]:
rmsprop = keras.optimizers.RMSprop(learning_rate=0.0001)
model.compile(optimizer=rmsprop,loss='binary_crossentropy',metrics=['accuracy'])
checkpoint_cb = keras.callbacks.ModelCheckpoint('simpleRnn_embedding1_model.keras')
early_stopping_cb = keras.callbacks.EarlyStopping(patience=3,restore_best_weights=True)
history = model.fit(train_seq,train_target,batch_size=64,epochs=100,validation_data=(val_seq,val_target),callbacks=(checkpoint_cb,early_stopping_cb))


Epoch 1/100
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 24ms/step - accuracy: 0.5075 - loss: 0.6960 - val_accuracy: 0.5146 - val_loss: 0.6933
Epoch 2/100
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 24ms/step - accuracy: 0.5476 - loss: 0.6873 - val_accuracy: 0.5206 - val_loss: 0.6923
Epoch 3/100
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 23ms/step - accuracy: 0.5811 - loss: 0.6792 - val_accuracy: 0.5570 - val_loss: 0.6848
Epoch 4/100
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 24ms/step - accuracy: 0.6412 - loss: 0.6628 - val_accuracy: 0.6506 - val_loss: 0.6570
Epoch 5/100
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 23ms/step - accuracy: 0.7145 - loss: 0.6354 - val_accuracy: 0.6984 - val_loss: 0.6354
Epoch 6/100
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 23ms/step - accuracy: 0.7558 - loss: 0.6065 - val_accuracy: 0.7380 - val_loss: 0.6074
Epoch 7/100
[1m

In [45]:
model.evaluate(val_seq,val_target)

[1m157/157[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step - accuracy: 0.8515 - loss: 0.3694


[0.3624987006187439, 0.8551999926567078]

In [46]:
test_seq = pad_sequences(test_input,maxlen=300)
model.evaluate(test_seq,test_target)

[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 4ms/step - accuracy: 0.8580 - loss: 0.3575


[0.35794275999069214, 0.8569599986076355]