<a href="https://colab.research.google.com/github/Yeo-Jun-Choi/DeepLearning/blob/master/IMBb%EB%B6%84%EC%84%9D.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [3]:
import tensorflow as tf
from tensorflow.keras import datasets, layers, models, preprocessing
import tensorflow_datasets as tfds

In [4]:
max_len = 200
n_words = 10000
dim_embedding = 256
EPOCHS = 20
BATCH_SIZE = 500

In [5]:
def load_data():
  #데이터 로드
  (X_train, y_train), (X_test,y_test) = datasets.imdb.load_data(num_words=n_words)
  #문장을 max_len이 되도록 채워 넣는다
  X_train = preprocessing.sequence.pad_sequences(X_train, maxlen=max_len)
  X_test = preprocessing.sequence.pad_sequences(X_test, maxlen=max_len)
  return (X_train, y_train), (X_test,y_test)

In [10]:
def build_model():
  model = models.Sequential()
  #입력 : - eEmbedding Layer
  #모델은 크기의 정수 행렬을 입력으로 취한다(batch, input_length).
  #모델의 출력은 차원이다. (input_length, dim_embedding).
  #입력 중 가장 큰 정수는 n_words보다 작거나 같다(어휘 크기).
  model.add(layers.Embedding(n_words,
                             dim_embedding, input_length=max_len))
  
  model.add(layers.Dropout(0.3))

  #각 n_words 특징에서 특정 벡터의 최댓값을 취한다.

  model.add(layers.GlobalMaxPool1D())
  model.add(layers.Dense(128,activation='relu'))
  model.add(layers.Dropout(0.5))
  model.add(layers.Dense(1,activation='sigmoid'))
  return model

In [7]:
(X_train, y_train), (X_test,y_test) = load_data()

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/imdb.npz


In [11]:
model = build_model()
model.summary()

Model: "sequential_2"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_2 (Embedding)      (None, 200, 256)          2560000   
_________________________________________________________________
dropout (Dropout)            (None, 200, 256)          0         
_________________________________________________________________
global_max_pooling1d (Global (None, 256)               0         
_________________________________________________________________
dense (Dense)                (None, 128)               32896     
_________________________________________________________________
dropout_1 (Dropout)          (None, 128)               0         
_________________________________________________________________
dense_1 (Dense)              (None, 1)                 129       
Total params: 2,593,025
Trainable params: 2,593,025
Non-trainable params: 0
____________________________________________

In [13]:
model.compile(optimizer = "adam", loss = "binary_crossentropy", metrics = ["accuracy"])

In [15]:
score = model.fit(X_train,y_train,
                  epochs = EPOCHS,
                  batch_size = BATCH_SIZE,
                  validation_data = (X_test,y_test))

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


In [16]:
score = model.evaluate(X_test,y_test,batch_size = BATCH_SIZE)
print("\nTest score : ", score[0])
print("Test accuracy : ", score[1])


Test score :  0.4991203248500824
Test accuracy :  0.8506399989128113
