In [None]:
#모듈 호출
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from tensorflow import keras
import tensorflow as tf

#로그
print("✨모듈 호출이 완료되었습니다.")

#동일한 결과를 얻기 위해 케라스 랜덤 시드를 사용하고, 텐서플로 연산을 결정적으로 만들기
tf.keras.utils.set_random_seed(42)
print("⚙️케라스 랜덤 시드를 사용하고, 텐서플로 연산을 결정적으로 만들었습니다.")

In [None]:
#IMDB리뷰 데이터셋 모듈
from tensorflow.keras.datasets import imdb
#IMDB리뷰 데이터셋 테스트셋과 훈련셋으로 나누기
(train_input, train_target), (test_input, test_target)= \
    imdb.load_data(num_words=500)

In [None]:
print(train_input.shape, test_input.shape)

In [None]:
print(train_input[0])
print(train_input[:20])

In [None]:
#훈련 세트 준비
train_input, val_input, train_target, val_target = train_test_split(
    train_input, train_target, test_size=0.2, random_state=42
)

In [None]:
lengths = np.array([len(x) for x in train_input])
print(np.mean(lengths), np.median(lengths))

In [None]:
plt.hist(lengths)
plt.xlabel('length')
plt.ylabel('frequency')
plt.show()

In [None]:
#시퀀스 패딩
from tensorflow.keras.preprocessing.sequence import pad_sequences #앞부분을 자름

train_seq = pad_sequences(train_input, maxlen=100)

In [None]:
print(train_seq.shape)

In [None]:
print(train_seq[0])

In [None]:
print(train_input[0][-10:])

In [None]:
print(train_seq[5])

In [None]:
val_seq = pad_sequences(val_input, maxlen=100)

### 기본적인 순환 신경망 모델

In [None]:
#순환 신경망 모델 만들기
from tensorflow import keras

model = keras.Sequential()

model.add(keras.layers.SimpleRNN(8, input_shape = (100, 500)))
model.add(keras.layers.Dense(1, activation='sigmoid'))

In [None]:
#원 핫 인코딩
train_oh = keras.utils.to_categorical(train_seq)

print(train_oh.shape)

In [None]:
print(train_oh[0][0][:12])
print(np.sum(train_oh[0][0]))

In [None]:
val_oh = keras.utils.to_categorical(val_seq)

In [None]:
model.summary()

In [None]:
#모델 훈련
rmsprop = keras.optimizers.RMSprop(learning_rate=1e-4)
#컴파일
model.compile(optimizer=rmsprop, loss='binary_crossentropy', metrics=['accuracy'])

#콜백, 조기종료
checkpoint_cb = keras.callbacks.ModelCheckpoint('best-simplernn-model.keras')
early_stopping_cb = keras.callbacks.EarlyStopping(patience=3, restore_best_weights=True)

#훈련
history = model.fit(train_oh, train_target, epochs=100,
                    validation_data=(val_oh, val_target),
                    callbacks=[checkpoint_cb, early_stopping_cb])


In [None]:
#시각화
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.xlabel('epoch')
plt.ylabel('loss')
plt.legend(['train','val'])
plt.show()

### 임베딩

In [None]:
#임베딩
model2 = keras.Sequential()

model2.add(keras.layers.Embedding(200, 16, input_shape=(100,)))
model2.add(keras.layers.SimpleRNN(8))
model2.add(keras.layers.Dense(1, activation='sigmoid'))

model2.summary()

In [None]:
#모델 훈련
rmsprop = keras.optimizers.RMSprop(learning_rate=1e-4)
#컴파일
model.compile(optimizer=rmsprop, loss='binary_crossentropy', metrics=['accuracy'])

#콜백, 조기종료
checkpoint_cb = keras.callbacks.ModelCheckpoint('best-simplernne-Embedding-model.keras')
early_stopping_cb = keras.callbacks.EarlyStopping(patience=3, restore_best_weights=True)

#훈련
history = model.fit(train_oh, train_target, epochs=100,batch_size=64,
                    validation_data=(val_seq, val_target),
                    callbacks=[checkpoint_cb, early_stopping_cb])


In [None]:
#시각화
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.xlabel('epoch')
plt.ylabel('loss')
plt.legend(['train','val'])
plt.show()

### LSTM 모델

In [None]:
#LSTM을 위한 데이터 초기화
#IMDB리뷰 데이터셋 테스트셋과 훈련셋으로 나누기
(train_input, train_target), (test_input, test_target)= \
    imdb.load_data(num_words=500)

#훈련 세트 준비
train_input, val_input, train_target, val_target = train_test_split(
    train_input, train_target, test_size=0.2, random_state=42
)

In [None]:
#텍스트를 특정 길이로 자르기
train_seq = pad_sequences(train_input, maxlen=100)
val_seq = pad_sequences(val_input, maxlen=100)

In [None]:
#LSTM신경망
model = keras.Sequential()

#신경망 구현
model.add(keras.layers.Embedding(500, 16, input_shape=(100,)))
model.add(keras.layers.LSTM(8))
model.add(keras.layers.Dense(1, activation='sigmoid'))

model.summary()

In [None]:
#모델 훈련
rmsprop = keras.optimizers.RMSprop(learning_rate=1e-4)
#컴파일
model.compile(optimizer=rmsprop, loss='binary_crossentropy', metrics=['accuracy'])

#콜백, 조기종료
checkpoint_cb = keras.callbacks.ModelCheckpoint('best-LSTM-model.keras')
early_stopping_cb = keras.callbacks.EarlyStopping(patience=3, restore_best_weights=True)

#훈련
history = model.fit(train_seq, train_target, epochs=100,batch_size=64,
                    validation_data=(val_seq, val_target),
                    callbacks=[checkpoint_cb, early_stopping_cb])

In [None]:
#시각화
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.xlabel('epoch')
plt.ylabel('loss')
plt.legend(['train','val'])
plt.show()

### LSTM에 드롭아웃 적용

In [None]:
#LSTM신경망 - 드롭아웃 적용
model = keras.Sequential()

#신경망 구현
model.add(keras.layers.Embedding(500, 16, input_shape=(100,)))
model.add(keras.layers.LSTM(8, dropout=0.3))
model.add(keras.layers.Dense(1, activation='sigmoid'))

model.summary()

In [None]:
#모델 훈련
rmsprop = keras.optimizers.RMSprop(learning_rate=1e-4)
#컴파일
model.compile(optimizer=rmsprop, loss='binary_crossentropy', metrics=['accuracy'])

#콜백, 조기종료
checkpoint_cb = keras.callbacks.ModelCheckpoint('best-LSTM-dropout-model.keras')
early_stopping_cb = keras.callbacks.EarlyStopping(patience=3, restore_best_weights=True)

#훈련
history = model.fit(train_seq, train_target, epochs=100,batch_size=64,
                    validation_data=(val_seq, val_target),
                    callbacks=[checkpoint_cb, early_stopping_cb])

In [None]:
#시각화
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.xlabel('epoch')
plt.ylabel('loss')
plt.legend(['train','val'])
plt.show()

### LSTM 두 개의 층을 연결하기

In [None]:
#LSTM신경망 - 드롭아웃 적용
model = keras.Sequential()

#신경망 구현
model.add(keras.layers.Embedding(500, 16, input_shape=(100,)))
model.add(keras.layers.LSTM(8, dropout=0.3, return_sequences=True))
model.add(keras.layers.LSTM(8, dropout=0.3))
model.add(keras.layers.Dense(1, activation='sigmoid'))

model.summary()

In [None]:
#모델 훈련
rmsprop = keras.optimizers.RMSprop(learning_rate=1e-4)
#컴파일
model.compile(optimizer=rmsprop, loss='binary_crossentropy', metrics=['accuracy'])

#콜백, 조기종료
checkpoint_cb = keras.callbacks.ModelCheckpoint('best-LSTM-dropout-LSTM-model.keras')
early_stopping_cb = keras.callbacks.EarlyStopping(patience=3, restore_best_weights=True)

#훈련
history = model.fit(train_seq, train_target, epochs=100,batch_size=64,
                    validation_data=(val_seq, val_target),
                    callbacks=[checkpoint_cb, early_stopping_cb])

In [None]:
#시각화
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.xlabel('epoch')
plt.ylabel('loss')
plt.legend(['train','val'])
plt.show()

### GRU 신경망

In [None]:
#LSTM신경망 - 드롭아웃 적용
model = keras.Sequential()

#신경망 구현
model.add(keras.layers.Embedding(500, 16, input_shape=(100,)))
model.add(keras.layers.GRU(8))
model.add(keras.layers.Dense(1, activation='sigmoid'))

model.summary()

In [None]:
#모델 훈련
rmsprop = keras.optimizers.RMSprop(learning_rate=1e-4)
#컴파일
model.compile(optimizer=rmsprop, loss='binary_crossentropy', metrics=['accuracy'])

#콜백, 조기종료
checkpoint_cb = keras.callbacks.ModelCheckpoint('best-GRU-model.keras')
early_stopping_cb = keras.callbacks.EarlyStopping(patience=3, restore_best_weights=True)

#훈련
history = model.fit(train_seq, train_target, epochs=100,batch_size=64,
                    validation_data=(val_seq, val_target),
                    callbacks=[checkpoint_cb, early_stopping_cb])

In [None]:
#시각화
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.xlabel('epoch')
plt.ylabel('loss')
plt.legend(['train','val'])
plt.show()

### 마무리

In [None]:
test_seq = pad_sequences(test_input, maxlen=100)

rnn_model = keras.models.load_model('best-simplernn-model.keras')

rnn_model.evaluate(test_seq, test_target)