# imdb 영화리뷰 감성분석 - conv 1d
- conv1d+conv1d+dense
- conv1d+LSTM+dense
- conv1d+dense+dense

In [1]:
import tensorflow as tf
import numpy as np
seed=2022
np.random.seed(seed)
tf.random.set_seed(seed)

In [2]:
from tensorflow.keras.datasets import imdb
(X_train,y_train),(X_test,y_test) = imdb.load_data()

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/imdb.npz


In [12]:
from tensorflow.keras.models import Sequential,load_model
from tensorflow.keras.layers import Embedding, LSTM, Dense

from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.utils import to_categorical

from tensorflow.keras.layers import Conv1D, MaxPooling1D,GlobalMaxPooling1D
from tensorflow.keras.layers import Dropout

In [4]:
num_words=10000
(X_train,y_train),(_, _)=imdb.load_data(num_words=num_words)

In [5]:
from sklearn.model_selection import train_test_split

X_train,X_test,y_train,y_test=train_test_split(
    X_train,y_train,stratify=y_train, test_size=0.2,random_state=seed
)

In [8]:
max_len=500
X_train=pad_sequences(X_train,maxlen=max_len)
X_test=pad_sequences(X_test,maxlen=max_len)


## case1 : conv1d x 2
[0.30951714515686035, 0.8686000108718872]


In [17]:
model1=Sequential ([Embedding(num_words,100,input_length=max_len),
                    Dropout(0.5),
                    Conv1D(64,7,activation='relu'),
                    MaxPooling1D(7),
                    Conv1D(64,5,activation='relu'),
                    MaxPooling1D(5),
                    GlobalMaxPooling1D(),
                    Dense(1,activation='sigmoid')
])
model1.summary()
#494=500개를 7개 묶음으로 conv1d했기 때문.
#단어 7개를 conv함.
#70은 494를 7로 나누고 나머지 4는 버린 것.


Model: "sequential_3"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding_5 (Embedding)     (None, 500, 100)          1000000   
                                                                 
 dropout_5 (Dropout)         (None, 500, 100)          0         
                                                                 
 conv1d_6 (Conv1D)           (None, 494, 64)           44864     
                                                                 
 max_pooling1d_4 (MaxPooling  (None, 70, 64)           0         
 1D)                                                             
                                                                 
 conv1d_7 (Conv1D)           (None, 66, 64)            20544     
                                                                 
 max_pooling1d_5 (MaxPooling  (None, 13, 64)           0         
 1D)                                                  

In [18]:
model1.compile('adam','binary_crossentropy',['accuracy'])

In [19]:
model1_path='best-imdb-m1.h5'
mc1=ModelCheckpoint(model1_path,verbose=1,save_best_only=True)
es1=EarlyStopping(patience=5)

In [20]:
hist1=model1.fit(
    X_train,y_train,validation_split=0.2,
    epochs=30,batch_size=64,callbacks=[mc1,es1]
)

Epoch 1/30
Epoch 1: val_loss improved from inf to 0.35983, saving model to best-imdb-m1.h5
Epoch 2/30
Epoch 2: val_loss improved from 0.35983 to 0.31904, saving model to best-imdb-m1.h5
Epoch 3/30
Epoch 3: val_loss did not improve from 0.31904
Epoch 4/30
Epoch 4: val_loss did not improve from 0.31904
Epoch 5/30
Epoch 5: val_loss did not improve from 0.31904
Epoch 6/30
Epoch 6: val_loss did not improve from 0.31904
Epoch 7/30
Epoch 7: val_loss did not improve from 0.31904


In [21]:
best_model1=load_model(model1_path)
best_model1.evaluate(X_test,y_test)



[0.30951714515686035, 0.8686000108718872]

## case2 : conv1d + LSTM
[0.27335667610168457, 0.8907999992370605]


In [22]:
model2=Sequential ([Embedding(num_words,100,input_length=max_len),
                    Dropout(0.5),
                    Conv1D(64,5,activation='relu'),
                    MaxPooling1D(5),
                    LSTM(100),
                    Dense(1,activation='sigmoid')
])
model2.summary()

Model: "sequential_4"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding_6 (Embedding)     (None, 500, 100)          1000000   
                                                                 
 dropout_6 (Dropout)         (None, 500, 100)          0         
                                                                 
 conv1d_8 (Conv1D)           (None, 496, 64)           32064     
                                                                 
 max_pooling1d_6 (MaxPooling  (None, 99, 64)           0         
 1D)                                                             
                                                                 
 lstm_1 (LSTM)               (None, 100)               66000     
                                                                 
 dense_5 (Dense)             (None, 1)                 101       
                                                      

In [23]:
model2.compile('adam','binary_crossentropy',['accuracy'])

In [24]:
model2_path='best-imdb-m2.h5'
mc2=ModelCheckpoint(model2_path,verbose=1,save_best_only=True)
es2=EarlyStopping(patience=5)

In [25]:
hist2=model2.fit(
    X_train,y_train,validation_split=0.2,
    epochs=30,batch_size=64,callbacks=[mc2,es2]
)

Epoch 1/30
Epoch 1: val_loss improved from inf to 0.28306, saving model to best-imdb-m2.h5
Epoch 2/30
Epoch 2: val_loss improved from 0.28306 to 0.26199, saving model to best-imdb-m2.h5
Epoch 3/30
Epoch 3: val_loss did not improve from 0.26199
Epoch 4/30
Epoch 4: val_loss did not improve from 0.26199
Epoch 5/30
Epoch 5: val_loss did not improve from 0.26199
Epoch 6/30
Epoch 6: val_loss did not improve from 0.26199
Epoch 7/30
Epoch 7: val_loss did not improve from 0.26199


In [26]:
best_model2=load_model(model2_path)
best_model2.evaluate(X_test,y_test)



[0.27335667610168457, 0.8907999992370605]

## case3 : conv1d +dense
[0.28843289613723755, 0.883400022983551]


In [27]:
model3=Sequential ([Embedding(num_words,100,input_length=max_len),
                    Dropout(0.5),
                    Conv1D(64,5,activation='relu'),
                    MaxPooling1D(5),
                    GlobalMaxPooling1D(),
                    Dense(100,activation='relu'), #은닉층?
                    Dense(1,activation='sigmoid')
])
model3.summary()

Model: "sequential_5"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding_7 (Embedding)     (None, 500, 100)          1000000   
                                                                 
 dropout_7 (Dropout)         (None, 500, 100)          0         
                                                                 
 conv1d_9 (Conv1D)           (None, 496, 64)           32064     
                                                                 
 max_pooling1d_7 (MaxPooling  (None, 99, 64)           0         
 1D)                                                             
                                                                 
 global_max_pooling1d_3 (Glo  (None, 64)               0         
 balMaxPooling1D)                                                
                                                                 
 dense_6 (Dense)             (None, 100)              

In [28]:
model3.compile('adam','binary_crossentropy',['accuracy'])

In [29]:
model3_path='best-imdb-m3.h5'
mc3=ModelCheckpoint(model3_path,verbose=1,save_best_only=True)
es3=EarlyStopping(patience=5)

In [30]:
hist=model3.fit(
    X_train,y_train,validation_split=0.2,
    epochs=30,batch_size=64,callbacks=[mc3,es3]
)

Epoch 1/30
Epoch 1: val_loss improved from inf to 0.38979, saving model to best-imdb-m3.h5
Epoch 2/30
Epoch 2: val_loss improved from 0.38979 to 0.30973, saving model to best-imdb-m3.h5
Epoch 3/30
Epoch 3: val_loss improved from 0.30973 to 0.28607, saving model to best-imdb-m3.h5
Epoch 4/30
Epoch 4: val_loss did not improve from 0.28607
Epoch 5/30
Epoch 5: val_loss did not improve from 0.28607
Epoch 6/30
Epoch 6: val_loss did not improve from 0.28607
Epoch 7/30
Epoch 7: val_loss did not improve from 0.28607
Epoch 8/30
Epoch 8: val_loss did not improve from 0.28607


In [31]:
best_model3=load_model(model3_path)
best_model3.evaluate(X_test,y_test)



[0.28843289613723755, 0.883400022983551]