## 1. 데이터 준비

In [None]:
# gensim 패키지는 워드벡터를 다루는데 유용함.
import os
import numpy as np
from gensim.models import KeyedVectors

word2vec_path = os.getenv('HOME') + '/aiffel/sentiment_classification/data/GoogleNews-vectors-negative300.bin.gz'
word2vec = KeyedVectors.load_word2vec_format( word2vec_path, binary=True, limit=1000000 )  
# 가장 많이 사용되는 상위 100만개만 limt으로 조건을 주어 로딩

vocab_size = 10000    # 어휘 사전의 크기 (10,000개의 단어)
word_vector_dim = 300  # 워드 벡터의 차원수
embedding_matrix = np.random.rand( vocab_size, word_vector_dim )

# embedding_matrix에 Word2Vec의 단어를 카피함.
for i in range( 4, vocab_size ):
    if  index_to_word[ i ]  in  word2vec:
        embedding_matrix[ i ] = word2vec[ index_to_word[ i ] ]

## 2. 모델 구성

In [None]:
from tensorflow.keras.initializers import Constant

model = tf.keras.Sequential()
model.add(tf.keras.layers.Embedding(vocab_size, 
                                    word_vector_dim, 
                                    embeddings_initializer=Constant(embedding_matrix),
                                    input_length=maxlen, 
                                     trainable=True))   # trainable을 True로 주면 Fine-tuning

model.add(tf.keras.layers.Conv1D(16, 7, activation='relu'))
model.add(tf.keras.layers.MaxPooling1D(5))

model.add(tf.keras.layers.Conv1D(16, 7, activation='relu'))
model.add(tf.keras.layers.GlobalMaxPooling1D())

model.add(tf.keras.layers.Dense(8, activation='relu'))
model.add(tf.keras.layers.Dense(1, activation='sigmoid')) 

model.summary()

## 3. 모델 학습

In [None]:
model.compile(optimizer='adam',
              loss='binary_crossentropy',
               metrics=['accuracy']) 

history = model.fit(partial_x_train,
                    partial_y_train,
                    epochs=20,
                    batch_size=512,
                    validation_data=( x_val, y_val ),
                    verbose=1)

## 4. 모델 평가

In [None]:
results = model.evaluate( x_test,  y_test, verbose=2 )
print(results)

In [None]:
history_dict = history.history 
print(history_dict.keys())

import matplotlib.pyplot as plt
acc = history_dict['accuracy']
val_acc = history_dict['val_accuracy']
loss = history_dict['loss']
val_loss = history_dict['val_loss']
epochs = range(1, len(acc) + 1)

plt.plot( epochs, acc, 'bo', label='Training acc' )
plt.plot( epochs, val_acc, 'b', label='Validation acc' )
plt.plot( epochs, loss, 'bo', label='Training loss' )
plt.plot( epochs, val_loss, 'b', label='Validation loss' )
plt.legend()
plt.show()