# IMDB 영화리뷰 감성분석 - Conv1D
- Conv1D + Conv1D + Dense
- Conv1D + LSTM + Dense
- Conv1D + Dense + Dense

In [1]:
import numpy as np
import tensorflow as tf
seed = 2022 
np.random.seed(seed)
tf.random.set_seed(seed)

In [2]:
from tensorflow.keras.datasets import imdb
from tensorflow.keras.models import Sequential, load_model
from tensorflow.keras.layers import Dense, Embedding, LSTM
from tensorflow.keras.layers import Conv1D, MaxPooling1D, GlobalMaxPooling1D, Dropout
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping
from tensorflow.keras.preprocessing.sequence import pad_sequences
from sklearn.model_selection import train_test_split

In [3]:
num_words = 10000
(X_train, y_train), (_, _) = imdb.load_data(num_words = num_words)

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/imdb.npz


In [4]:
X_train, X_test, y_train, y_test = train_test_split(
    X_train, y_train, stratify = y_train, test_size = 0.2, random_state = seed
)
X_train.shape, X_test.shape, y_train.shape, y_test.shape

((20000,), (5000,), (20000,), (5000,))

In [5]:
max_len = 500
X_train = pad_sequences(X_train, maxlen = max_len)
X_test = pad_sequences(X_test, maxlen=max_len)

- Case 1) Conv1D X 2

In [6]:
model1 = Sequential([
        Embedding(num_words, 100, input_length = max_len),
        Dropout(0.5),
        Conv1D(64, 7, activation = "relu"),
        MaxPooling1D(7),
        Conv1D(64, 5, activation = "relu"),
        MaxPooling1D(5),
        GlobalMaxPooling1D(),
        Dense(1, activation = "sigmoid")
])
model1.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding (Embedding)       (None, 500, 100)          1000000   
                                                                 
 dropout (Dropout)           (None, 500, 100)          0         
                                                                 
 conv1d (Conv1D)             (None, 494, 64)           44864     
                                                                 
 max_pooling1d (MaxPooling1D  (None, 70, 64)           0         
 )                                                               
                                                                 
 conv1d_1 (Conv1D)           (None, 66, 64)            20544     
                                                                 
 max_pooling1d_1 (MaxPooling  (None, 13, 64)           0         
 1D)                                                    

In [7]:
model1.compile("adam", "binary_crossentropy", ["accuracy"])
model1_path = "best-conv1d-conv1d.h5"
mc1 = ModelCheckpoint(model1_path, save_best_only = True)
es1 = EarlyStopping(patience = 10)

In [17]:
hist = model1.fit(
    X_train, y_train, epochs = 30,
     batch_size = 64, callbacks = [mc1, es1],
      validation_split = 0.2
)

Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30


In [9]:
best_model1 = load_model(model1_path)
best_model1.evaluate(X_test,y_test)



[0.3091760575771332, 0.8687999844551086]

- Case2) Conv1D + LSTM

In [19]:
model2 = Sequential([
        Embedding(num_words, 100, input_length = max_len),
        Dropout(0.5),
        Conv1D(64, 7, activation = "relu"),
        MaxPooling1D(7),
        LSTM(100),
        Dense(1,activation="sigmoid")
])
model2.summary()

Model: "sequential_3"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding_3 (Embedding)     (None, 500, 100)          1000000   
                                                                 
 dropout_3 (Dropout)         (None, 500, 100)          0         
                                                                 
 conv1d_4 (Conv1D)           (None, 494, 64)           44864     
                                                                 
 max_pooling1d_4 (MaxPooling  (None, 70, 64)           0         
 1D)                                                             
                                                                 
 lstm_1 (LSTM)               (None, 100)               66000     
                                                                 
 dense_4 (Dense)             (None, 1)                 101       
                                                      

In [20]:
model2.compile("adam", "binary_crossentropy", ["accuracy"])
model2_path = "best-conv1d-LSTM.h5"
mc2 = ModelCheckpoint(model2_path, save_best_only = True)
es2 = EarlyStopping(patience = 10)

In [21]:
hist = model2.fit(
    X_train, y_train, epochs = 30,
     batch_size = 64, callbacks = [mc2, es2],
      validation_split = 0.2
)

Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30


In [22]:
best_model2 = load_model(model2_path)
best_model2.evaluate(X_test,y_test)



[0.2842826545238495, 0.8813999891281128]

- Case 3) Conv1D + Dense

In [23]:
model3 = Sequential([
        Embedding(num_words, 100, input_length = max_len),
        Dropout(0.5),
        Conv1D(64, 7, activation = "relu"),
        MaxPooling1D(7),
        GlobalMaxPooling1D(),
        Dense(100, activation = "relu"),
        Dense(1, activation = "sigmoid"),
])
model3.summary()

Model: "sequential_4"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding_4 (Embedding)     (None, 500, 100)          1000000   
                                                                 
 dropout_4 (Dropout)         (None, 500, 100)          0         
                                                                 
 conv1d_5 (Conv1D)           (None, 494, 64)           44864     
                                                                 
 max_pooling1d_5 (MaxPooling  (None, 70, 64)           0         
 1D)                                                             
                                                                 
 global_max_pooling1d_2 (Glo  (None, 64)               0         
 balMaxPooling1D)                                                
                                                                 
 dense_5 (Dense)             (None, 100)              

In [24]:
model3.compile("adam", "binary_crossentropy", ["accuracy"])
model3_path = "best-conv1d-Dense.h5"
mc3 = ModelCheckpoint(model3_path, save_best_only = True)
es3 = EarlyStopping(patience = 10)

In [25]:
hist = model3.fit(
    X_train, y_train, epochs = 30,
     batch_size = 64, callbacks = [mc3, es3],
      validation_split = 0.2
)

Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30


In [26]:
best_model3 = load_model(model3_path)
best_model3.evaluate(X_test,y_test)



[0.29662615060806274, 0.8759999871253967]