In [None]:
pip install konlpy

In [87]:
import pandas as pd
from tqdm import tqdm
from tensorflow.keras.models import load_model
import re
from tensorflow.keras.preprocessing.text import Tokenizer
from konlpy.tag import Okt
from tensorflow.keras.preprocessing.sequence import pad_sequences
import numpy as np
import pickle
from tensorflow.keras.layers import Embedding, Dense, LSTM, Bidirectional, GlobalAveragePooling1D
from tensorflow.keras.models import Sequential
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint
from tensorflow.python.client import device_lib
from imblearn.over_sampling import SMOTE
import warnings
warnings.filterwarnings("ignore", category=np.VisibleDeprecationWarning)

#데이터 전처리

##영화 + 스팀 게임 리뷰 / 테스트 데이터

In [131]:
steam_movie_train = pd.read_table('steam_movie_train.txt')
steam_movie_test = pd.read_table('steam_movie_test.txt')
test_data = pd.read_table('test_sample_0or1.txt')

print("train len: ", len(steam_movie_train))
print("test len: ", len(steam_movie_test))
print("test data len: ", len(test_data))

train len:  225000
test len:  75000
test data len:  9284


In [None]:
okt = Okt()
print('-'*20, "train data pre-processing...", '-'*20)

steam_movie_train.drop_duplicates(subset=['document'], inplace=True)    # document 열에서 중복인 내용이 있다면 중복 제거
steam_movie_train = steam_movie_train.dropna(how = 'any')   # Null 값이 존재하는 행 제거
steam_movie_train['document'] = steam_movie_train['document'].str.replace("[^ㄱ-ㅎㅏ-ㅣ가-힣 ]","")   # 한글과 공백을 제외하고 모두 제거
steam_movie_train['document'] = steam_movie_train['document'].str.replace('^ +', "")    # white space 데이터를 empty value로 변경
steam_movie_train['document'].replace('', np.nan, inplace=True)
steam_movie_train = steam_movie_train.dropna(how = 'any')

print("Any null value in train data: ", steam_movie_train.isnull().values.any())
print("train len after pre-processing: ", len(steam_movie_train))


print('-'*20, "train data tokenizing...", '-'*20)

stopwords = ['의','가','이','은','들','는','좀','잘','걍','과','도','를','으로','자','에','와','한','하다']

X_train = []
for sentence in tqdm(steam_movie_train['document']):
    tokenized_sentence = okt.morphs(sentence, stem=True) #토큰화
    stopwords_removed_sentence = [word for word in tokenized_sentence if not word in stopwords] #불용어 제거
    X_train.append(stopwords_removed_sentence)

x_train = X_train

with open('x_train.pkl', 'wb') as f:    #리스트 저장
    pickle.dump(x_train, f)

# with open('x_train.pkl', 'rb') as f:    #저장한 리스트 로드해서 사용
#     x_train = pickle.load(f)

tokenizer = Tokenizer()
tokenizer.fit_on_texts(x_train)

print('-'*20, "test data pre-processing...", '-'*20)

steam_movie_test.drop_duplicates(subset = ['document'], inplace=True)
steam_movie_test['document'] = steam_movie_test['document'].str.replace("[^ㄱ-ㅎㅏ-ㅣ가-힣 ]","")
steam_movie_test['document'] = steam_movie_test['document'].str.replace('^ +', "") 
steam_movie_test['document'].replace('', np.nan, inplace=True) 

steam_movie_test = steam_movie_test.dropna(how='any') 
print("Any null value in test data: ", steam_movie_test.isnull().values.any())
print('test len after pre-processing:',len(steam_movie_test))


print('-'*20, "test data tokenizing...", '-'*20)

X_test = []
for sentence in tqdm(steam_movie_test['document']):
    tokenized_sentence = okt.morphs(sentence, stem=True)
    stopwords_removed_sentence = [word for word in tokenized_sentence if not word in stopwords]
    X_test.append(stopwords_removed_sentence)

x_test = X_test

with open('x_test.pkl', 'wb') as f:
    pickle.dump(x_test, f)

# with open('x_test.pkl', 'rb') as f:
#     x_test = pickle.load(f)

print('-'*20, "review test data pre-processing...", '-'*20)
test_data.drop_duplicates(subset = ['document'], inplace=True)
test_data['document'] = test_data['document'].str.replace("[^ㄱ-ㅎㅏ-ㅣ가-힣 ]","")
test_data['document'] = test_data['document'].str.replace('^ +', "") 
test_data['document'].replace('', np.nan, inplace=True)
test_data = test_data.dropna(how='any')

print("Any null value in review test data: ", test_data.isnull().values.any())
print('test len after pre-processing:',len(test_data))

print('-'*20, "review test data tokenizing...", '-'*20)
review_test = []
for sentence in tqdm(test_data['document']):
    tokenized_sentence = okt.morphs(sentence, stem=True)
    stopwords_removed_sentence = [word for word in tokenized_sentence if not word in stopwords]
    review_test.append(stopwords_removed_sentence)

with open('review_test.pkl', 'wb') as f:
    pickle.dump(review_test, f)

# with open('review_test.pkl', 'rb') as f:
#     review_test = pickle.load(f)

print('-'*20, "loading saved list data...", '-'*20)
print("x_train len: ", len(x_train))
print("x_test len: ", len(x_test))
print("review test: ", len(review_test))


threshold = 3
total_cnt = len(tokenizer.word_index)   # 단어의 수
rare_cnt = 0    # 등장 빈도수가 threshold보다 작은 단어의 개수를 카운트
total_freq = 0  # 훈련 데이터의 전체 단어 빈도수 총 합
rare_freq = 0   # 등장 빈도수가 threshold보다 작은 단어의 등장 빈도수의 총 합

# 단어와 빈도수의 쌍(pair)을 key와 value로 받는다.
for key, value in tokenizer.word_counts.items():

    # 단어의 등장 빈도수가 threshold 보다 작으면
    if value < threshold:
        rare_cnt = rare_cnt + 1
        rare_freq = rare_freq + value

vocab_size = total_cnt - rare_cnt + 1

tokenizer = Tokenizer(vocab_size) # 빈도수 2 이하인 단어는 제거
tokenizer.fit_on_texts(x_train)

print('-'*20, "text to sequences...", '-'*20)
x_train = tokenizer.texts_to_sequences(x_train)
x_test = tokenizer.texts_to_sequences(x_test)
review_test = tokenizer.texts_to_sequences(review_test)

y_train = np.array(steam_movie_train['label'])
y_test = np.array(steam_movie_test['label'])
review_label = np.array(test_data['label']) 

drop_train = [index for index, sentence in enumerate(x_train) if len(sentence) < 1]
drop_test = [index for index, sentence in enumerate(x_test) if len(sentence) < 1]

x_train = np.delete(x_train, drop_train, axis=0)
y_train = np.delete(y_train, drop_train, axis=0)

x_test = np.delete(x_test, drop_test, axis=0)
y_test = np.delete(y_test, drop_test, axis=0)

max_len = 30

x_train = pad_sequences(x_train, maxlen = max_len)
x_test = pad_sequences(x_test, maxlen = max_len)
review_test = pad_sequences(review_test, maxlen = max_len)

print('-'*20, "all pre-processing done", '-'*20)

print("x train: ", len(x_train))
print("y train: ", len(y_train))
print("x test: ", len(x_test))
print("y test: ", len(y_test))
print("review test: ", len(review_test))
print("review label: ", len(review_label))

-------------------- train data pre-processing... --------------------
Any null value in train data:  False
train len after pre-processing:  217732
-------------------- train data tokenizing... --------------------


100%|██████████| 217732/217732 [24:20<00:00, 149.09it/s]


-------------------- test data pre-processing... --------------------
Any null value in test data:  False
test len after pre-processing: 73238
-------------------- test data tokenizing... --------------------


 66%|██████▌   | 48124/73238 [06:08<02:38, 158.06it/s]

#데이터 학습 및 성능 평가

##1.DNN Model

####- case 1. embedding demension = 100

In [23]:
max_len = 30
input_length = max_len

embedding_dim = 100
model_dnn_e100_adam = Sequential()
model_dnn_e100_adam.add(Embedding(vocab_size, embedding_dim, input_length=input_length))
model_dnn_e100_adam.add(GlobalAveragePooling1D())
model_dnn_e100_adam.add(Dense(units=16, activation='relu'))
model_dnn_e100_adam.add(Dense(units=4, activation='relu'))
model_dnn_e100_adam.add(Dense(units=1, activation='sigmoid'))
es = EarlyStopping(monitor='val_loss', mode='min', verbose=1, patience=7)
mc = ModelCheckpoint('movie_game_e100_dnn_model_adam.h1', monitor='val_acc', mode='max', verbose=1, save_best_only=True)
model_dnn_e100_adam.compile(optimizer='adam', loss='binary_crossentropy', metrics=['acc'])
history = model_dnn_e100_adam.fit(x_train, y_train, epochs=15, callbacks=[es, mc], batch_size=64, validation_split=0.2)

Epoch 1/15
Epoch 00001: val_acc improved from -inf to 0.75736, saving model to movie_game_e100_dnn_model_adam.h1
INFO:tensorflow:Assets written to: movie_game_e100_dnn_model_adam.h1/assets
Epoch 2/15
Epoch 00002: val_acc improved from 0.75736 to 0.75777, saving model to movie_game_e100_dnn_model_adam.h1
INFO:tensorflow:Assets written to: movie_game_e100_dnn_model_adam.h1/assets
Epoch 3/15
Epoch 00003: val_acc did not improve from 0.75777
Epoch 4/15
Epoch 00004: val_acc did not improve from 0.75777
Epoch 5/15
Epoch 00005: val_acc did not improve from 0.75777
Epoch 6/15
Epoch 00006: val_acc did not improve from 0.75777
Epoch 7/15
Epoch 00007: val_acc did not improve from 0.75777
Epoch 8/15
Epoch 00008: val_acc did not improve from 0.75777
Epoch 00008: early stopping


In [32]:
loaded_model = load_model('movie_game_e100_dnn_model_adam.h1')
print("\n DNN e100 adam 게임 영화 테스트 정확도: %.4f" % (loaded_model.evaluate(x_test, y_test)[1]))
print("\n DNN e100 adam 맛집 테스트 정확도: %.4f" % (loaded_model.evaluate(review_test, review_label)[1]))


 DNN e100 adam 게임 영화 테스트 정확도: 0.8104

 DNN e100 adam 맛집 테스트 정확도: 0.8599


In [31]:
model_dnn_e100_rmsprop = Sequential()
model_dnn_e100_rmsprop.add(Embedding(vocab_size, embedding_dim, input_length=input_length))
model_dnn_e100_rmsprop.add(GlobalAveragePooling1D())
model_dnn_e100_rmsprop.add(Dense(units=16, activation='relu'))
model_dnn_e100_rmsprop.add(Dense(units=4, activation='relu'))
model_dnn_e100_rmsprop.add(Dense(units=1, activation='sigmoid'))
es = EarlyStopping(monitor='val_loss', mode='min', verbose=1, patience=7)
mc = ModelCheckpoint('movie_game_e100_dnn_model_rmsprop.h1', monitor='val_acc', mode='max', verbose=1, save_best_only=True)
model_dnn_e100_rmsprop.compile(optimizer='rmsprop', loss='binary_crossentropy', metrics=['acc'])
history = model_dnn_e100_rmsprop.fit(x_train, y_train, epochs=15, callbacks=[es, mc], batch_size=64, validation_split=0.2)

Epoch 1/15
Epoch 00001: val_acc improved from -inf to 0.75133, saving model to movie_game_e100_dnn_model_rmsprop.h1
INFO:tensorflow:Assets written to: movie_game_e100_dnn_model_rmsprop.h1/assets
Epoch 2/15
Epoch 00002: val_acc improved from 0.75133 to 0.75722, saving model to movie_game_e100_dnn_model_rmsprop.h1
INFO:tensorflow:Assets written to: movie_game_e100_dnn_model_rmsprop.h1/assets
Epoch 3/15
Epoch 00003: val_acc improved from 0.75722 to 0.75836, saving model to movie_game_e100_dnn_model_rmsprop.h1
INFO:tensorflow:Assets written to: movie_game_e100_dnn_model_rmsprop.h1/assets
Epoch 4/15
Epoch 00004: val_acc did not improve from 0.75836
Epoch 5/15
Epoch 00005: val_acc did not improve from 0.75836
Epoch 6/15
Epoch 00006: val_acc did not improve from 0.75836
Epoch 7/15
Epoch 00007: val_acc did not improve from 0.75836
Epoch 8/15
Epoch 00008: val_acc did not improve from 0.75836
Epoch 9/15
Epoch 00009: val_acc did not improve from 0.75836
Epoch 10/15
Epoch 00010: val_acc did not im

In [33]:
loaded_model = load_model('movie_game_e100_dnn_model_rmsprop.h1')
print("\n DNN e100 rmsprop 게임 영화 테스트 정확도: %.4f" % (loaded_model.evaluate(x_test, y_test)[1]))
print("\n DNN e100 rmsprop 맛집 테스트 정확도: %.4f" % (loaded_model.evaluate(review_test, review_label)[1]))


 DNN e100 rmsprop 게임 영화 테스트 정확도: 0.8080

 DNN e100 rmsprop 맛집 테스트 정확도: 0.8800


####- case 2. embedding demension = 75

In [34]:
embedding_dim = 75

model_dnn_e75_adam = Sequential()
model_dnn_e75_adam.add(Embedding(vocab_size, embedding_dim, input_length=input_length))
model_dnn_e75_adam.add(GlobalAveragePooling1D())
model_dnn_e75_adam.add(Dense(units=16, activation='relu'))
model_dnn_e75_adam.add(Dense(units=4, activation='relu'))
model_dnn_e75_adam.add(Dense(units=1, activation='sigmoid'))
es = EarlyStopping(monitor='val_loss', mode='min', verbose=1, patience=7)
mc = ModelCheckpoint('movie_game_e75_dnn_model_adam.h1', monitor='val_acc', mode='max', verbose=1, save_best_only=True)
model_dnn_e75_adam.compile(optimizer='adam', loss='binary_crossentropy', metrics=['acc'])
history = model_dnn_e75_adam.fit(x_train, y_train, epochs=15, callbacks=[es, mc], batch_size=64, validation_split=0.2)

Epoch 1/15
Epoch 00001: val_acc improved from -inf to 0.74912, saving model to movie_game_e75_dnn_model_adam.h1
INFO:tensorflow:Assets written to: movie_game_e75_dnn_model_adam.h1/assets
Epoch 2/15
Epoch 00002: val_acc did not improve from 0.74912
Epoch 3/15
Epoch 00003: val_acc improved from 0.74912 to 0.74964, saving model to movie_game_e75_dnn_model_adam.h1
INFO:tensorflow:Assets written to: movie_game_e75_dnn_model_adam.h1/assets
Epoch 4/15
Epoch 00004: val_acc did not improve from 0.74964
Epoch 5/15
Epoch 00005: val_acc did not improve from 0.74964
Epoch 6/15
Epoch 00006: val_acc did not improve from 0.74964
Epoch 7/15
Epoch 00007: val_acc did not improve from 0.74964
Epoch 8/15
Epoch 00008: val_acc did not improve from 0.74964
Epoch 00008: early stopping


In [35]:
loaded_model = load_model('movie_game_e75_dnn_model_adam.h1')
print("\n DNN e75 adam 게임 영화 테스트 정확도: %.4f" % (loaded_model.evaluate(x_test, y_test)[1]))
print("\n DNN e75 adam 맛집 테스트 정확도: %.4f" % (loaded_model.evaluate(review_test, review_label)[1]))


 DNN e75 adam 게임 영화 테스트 정확도: 0.8071

 DNN e75 adam 맛집 테스트 정확도: 0.8326


In [52]:
model_dnn_e75_rmsprop = Sequential()
model_dnn_e75_rmsprop.add(Embedding(vocab_size, embedding_dim, input_length=input_length))
model_dnn_e75_rmsprop.add(GlobalAveragePooling1D())
model_dnn_e75_rmsprop.add(Dense(units=16, activation='relu'))
model_dnn_e75_rmsprop.add(Dense(units=4, activation='relu'))
model_dnn_e75_rmsprop.add(Dense(units=1, activation='sigmoid'))
es = EarlyStopping(monitor='val_loss', mode='min', verbose=1, patience=7)
mc = ModelCheckpoint('movie_game_e75_dnn_model_rmsprop.h1', monitor='val_acc', mode='max', verbose=1, save_best_only=True)
model_dnn_e75_rmsprop.compile(optimizer='rmsprop', loss='binary_crossentropy', metrics=['acc'])
history = model_dnn_e75_rmsprop.fit(x_train, y_train, epochs=15, callbacks=[es, mc], batch_size=64, validation_split=0.2)

Epoch 1/15
Epoch 00001: val_acc improved from -inf to 0.74764, saving model to movie_game_e75_dnn_model_rmsprop.h1
INFO:tensorflow:Assets written to: movie_game_e75_dnn_model_rmsprop.h1/assets
Epoch 2/15
Epoch 00002: val_acc improved from 0.74764 to 0.75322, saving model to movie_game_e75_dnn_model_rmsprop.h1
INFO:tensorflow:Assets written to: movie_game_e75_dnn_model_rmsprop.h1/assets
Epoch 3/15
Epoch 00003: val_acc did not improve from 0.75322
Epoch 4/15
Epoch 00004: val_acc did not improve from 0.75322
Epoch 5/15
Epoch 00005: val_acc improved from 0.75322 to 0.75854, saving model to movie_game_e75_dnn_model_rmsprop.h1
INFO:tensorflow:Assets written to: movie_game_e75_dnn_model_rmsprop.h1/assets
Epoch 6/15
Epoch 00006: val_acc did not improve from 0.75854
Epoch 7/15
Epoch 00007: val_acc did not improve from 0.75854
Epoch 8/15
Epoch 00008: val_acc did not improve from 0.75854
Epoch 9/15
Epoch 00009: val_acc did not improve from 0.75854
Epoch 10/15
Epoch 00010: val_acc did not improve 

In [53]:
loaded_model = load_model('movie_game_e75_dnn_model_rmsprop.h1')
print("\n DNN e75 rmsprop 게임 영화 테스트 정확도: %.4f" % (loaded_model.evaluate(x_test, y_test)[1]))
print("\n DNN e75 rmsprop 맛집 테스트 정확도: %.4f" % (loaded_model.evaluate(review_test, review_label)[1]))


 DNN e75 rmsprop 게임 영화 테스트 정확도: 0.8095

 DNN e75 rmsprop 맛집 테스트 정확도: 0.8741


####- case 3. embedding demension = 50

In [82]:
embedding_dim = 50

model_dnn_e50_adam = Sequential()
model_dnn_e50_adam.add(Embedding(vocab_size, embedding_dim, input_length=input_length))
model_dnn_e50_adam.add(GlobalAveragePooling1D())
model_dnn_e50_adam.add(Dense(units=16, activation='relu'))
model_dnn_e50_adam.add(Dense(units=4, activation='relu'))
model_dnn_e50_adam.add(Dense(units=1, activation='sigmoid'))
es = EarlyStopping(monitor='val_loss', mode='min', verbose=1, patience=7)
mc = ModelCheckpoint('movie_game_e50_dnn_model_adam.h1', monitor='val_acc', mode='max', verbose=1, save_best_only=True)
model_dnn_e50_adam.compile(optimizer='adam', loss='binary_crossentropy', metrics=['acc'])
history = model_dnn_e50_adam.fit(x_train, y_train, epochs=15, callbacks=[es, mc], batch_size=64, validation_split=0.2)

Epoch 1/15
Epoch 00001: val_acc improved from -inf to 0.73643, saving model to movie_game_e50_dnn_model_adam.h1
INFO:tensorflow:Assets written to: movie_game_e50_dnn_model_adam.h1/assets
Epoch 2/15
Epoch 00002: val_acc improved from 0.73643 to 0.75624, saving model to movie_game_e50_dnn_model_adam.h1
INFO:tensorflow:Assets written to: movie_game_e50_dnn_model_adam.h1/assets
Epoch 3/15
Epoch 00003: val_acc did not improve from 0.75624
Epoch 4/15
Epoch 00004: val_acc did not improve from 0.75624
Epoch 5/15
Epoch 00005: val_acc did not improve from 0.75624
Epoch 6/15
Epoch 00006: val_acc did not improve from 0.75624
Epoch 7/15
Epoch 00007: val_acc did not improve from 0.75624
Epoch 8/15
Epoch 00008: val_acc did not improve from 0.75624
Epoch 9/15
Epoch 00009: val_acc did not improve from 0.75624
Epoch 00009: early stopping


In [83]:
loaded_model = load_model('movie_game_e50_dnn_model_adam.h1')
print("\n DNN e50 adam 게임 영화 테스트 정확도: %.4f" % (loaded_model.evaluate(x_test, y_test)[1]))
print("\n DNN e50 adam 맛집 테스트 정확도: %.4f" % (loaded_model.evaluate(review_test, review_label)[1]))


 DNN e50 adam 게임 영화 테스트 정확도: 0.8101

 DNN e50 adam 맛집 테스트 정확도: 0.8455


In [84]:
model_dnn_e50_rmsprop = Sequential()
model_dnn_e50_rmsprop.add(Embedding(vocab_size, embedding_dim, input_length=input_length))
model_dnn_e50_rmsprop.add(GlobalAveragePooling1D())
model_dnn_e50_rmsprop.add(Dense(units=16, activation='relu'))
model_dnn_e50_rmsprop.add(Dense(units=4, activation='relu'))
model_dnn_e50_rmsprop.add(Dense(units=1, activation='sigmoid'))
es = EarlyStopping(monitor='val_loss', mode='min', verbose=1, patience=7)
mc = ModelCheckpoint('movie_game_e50_dnn_model_rmsprop.h1', monitor='val_acc', mode='max', verbose=1, save_best_only=True)
model_dnn_e50_rmsprop.compile(optimizer='rmsprop', loss='binary_crossentropy', metrics=['acc'])
history = model_dnn_e50_rmsprop.fit(x_train, y_train, epochs=15, callbacks=[es, mc], batch_size=64, validation_split=0.2)

Epoch 1/15
Epoch 00001: val_acc improved from -inf to 0.72944, saving model to movie_game_e50_dnn_model_rmsprop.h1
INFO:tensorflow:Assets written to: movie_game_e50_dnn_model_rmsprop.h1/assets
Epoch 2/15
Epoch 00002: val_acc improved from 0.72944 to 0.74711, saving model to movie_game_e50_dnn_model_rmsprop.h1
INFO:tensorflow:Assets written to: movie_game_e50_dnn_model_rmsprop.h1/assets
Epoch 3/15
Epoch 00003: val_acc improved from 0.74711 to 0.75547, saving model to movie_game_e50_dnn_model_rmsprop.h1
INFO:tensorflow:Assets written to: movie_game_e50_dnn_model_rmsprop.h1/assets
Epoch 4/15
Epoch 00004: val_acc did not improve from 0.75547
Epoch 5/15
Epoch 00005: val_acc did not improve from 0.75547
Epoch 6/15
Epoch 00006: val_acc did not improve from 0.75547
Epoch 7/15
Epoch 00007: val_acc improved from 0.75547 to 0.75870, saving model to movie_game_e50_dnn_model_rmsprop.h1
INFO:tensorflow:Assets written to: movie_game_e50_dnn_model_rmsprop.h1/assets
Epoch 8/15
Epoch 00008: val_acc did 

In [85]:
loaded_model = load_model('movie_game_e50_dnn_model_rmsprop.h1')
print("\n DNN e50 rmsprop 게임 영화 테스트 정확도: %.4f" % (loaded_model.evaluate(x_test, y_test)[1]))
print("\n DNN e50 rmsprop 맛집 테스트 정확도: %.4f" % (loaded_model.evaluate(review_test, review_label)[1]))



 DNN e50 rmsprop 게임 영화 테스트 정확도: 0.8096

 DNN e50 rmsprop 맛집 테스트 정확도: 0.8752


##2.LSTM Model

###Hidden state = 64

In [None]:
embedding_dim = 100
hidden_units = 64
epoch = 5

print("lstm model with adam optimizer")
lstm_model_adam = Sequential()
lstm_model_adam.add(Embedding(vocab_size, embedding_dim))
lstm_model_adam.add(LSTM(hidden_units))
lstm_model_adam.add(Dense(1, activation='sigmoid'))

lstm_model_adam.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
lstm_model_adam_history = lstm_model_adam.fit(x_train, y_train, epochs=epoch, batch_size=64, validation_split=0.2)
print("\n Test accuracy: %.4f" % (lstm_model_adam.evaluate(x_test, y_test)[1]))

lstm_model_adam.save('lstm_model_adam.h5')


print("lstm model with rmsprop optimizer")
lstm_model_rmsprop = Sequential()
lstm_model_rmsprop.add(Embedding(vocab_size, embedding_dim))
lstm_model_rmsprop.add(LSTM(hidden_units))
lstm_model_rmsprop.add(Dense(1, activation='sigmoid'))

lstm_model_rmsprop.compile(optimizer='rmsprop', loss='binary_crossentropy', metrics=['accuracy'])
lstm_model_rmsprop_history = lstm_model_rmsprop.fit(x_train, y_train, epochs=epoch, batch_size=64, validation_split=0.2)
print("\n Test accuracy: %.4f" % (lstm_model_rmsprop.evaluate(x_test, y_test)[1]))

#saving
lstm_model_rmsprop.save('lstm_model_rmsprop.h5')

In [None]:
print("\n LSTM Adam 맛집테스트 정확도 %.4f" % (lstm_model_adam.evaluate(review_test, review_label)[1]))
print("\n LSTM RMSProp 맛집테스트 정확도: %.4f" % (lstm_model_rmsprop.evaluate(review_test, review_label)[1]))

###Hidden state = 128

In [None]:
embedding_dim = 100
hidden_units = 128
epoch = 5

print("lstm model with adam optimizer")
lstm_model_adam2 = Sequential()
lstm_model_adam2.add(Embedding(vocab_size, embedding_dim))
lstm_model_adam2.add(LSTM(hidden_units))
lstm_model_adam2.add(Dense(1, activation='sigmoid'))

lstm_model_adam2.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
lstm_model_adam_history2 = lstm_model_adam2.fit(x_train, y_train, epochs=epoch, batch_size=64, validation_split=0.2)
print("\n Test accuracy: %.4f" % (lstm_model_adam2.evaluate(x_test, y_test)[1]))

lstm_model_adam2.save('lstm_model_adam2.h5')


print("lstm model with rmsprop optimizer")
lstm_model_rmsprop2 = Sequential()
lstm_model_rmsprop2.add(Embedding(vocab_size, embedding_dim))
lstm_model_rmsprop2.add(LSTM(hidden_units))
lstm_model_rmsprop2.add(Dense(1, activation='sigmoid'))

lstm_model_rmsprop2.compile(optimizer='rmsprop', loss='binary_crossentropy', metrics=['accuracy'])
lstm_model_rmsprop_history2 = lstm_model_rmsprop2.fit(x_train, y_train, epochs=epoch, batch_size=64, validation_split=0.2)
print("\n Test accuracy: %.4f" % (lstm_model_rmsprop2.evaluate(x_test, y_test)[1]))

#saving
lstm_model_rmsprop2.save('lstm_model_rmsprop2.h5')

In [None]:
print("\n LSTM Adam 맛집테스트 정확도 %.4f" % (lstm_model_adam2.evaluate(review_test, review_label)[1]))
print("\n LSTM RMSProp 맛집테스트 정확도: %.4f" % (lstm_model_rmsprop2.evaluate(review_test, review_label)[1]))

##3.Bidirectional LSTM Model

####Hidden state = 64

In [None]:
embedding_dim = 100
hidden_units = 64
input_length = max_len

model_bilstm_adam = Sequential()
model_bilstm_adam.add(Embedding(vocab_size, embedding_dim))
model_bilstm_adam.add(Bidirectional(LSTM(hidden_units)))
model_bilstm_adam.add(Dense(1, activation='sigmoid'))
es = EarlyStopping(monitor='val_loss', mode='min', verbose=1, patience=7)
mc = ModelCheckpoint('movie_game_e50_h64_bilstm_model_adam.h1', monitor='val_acc', mode='max', verbose=1, save_best_only=True)

model_bilstm_adam.compile(optimizer='adam', loss='binary_crossentropy', metrics=['acc'])
history = model_bilstm_adam.fit(x_train, y_train, epochs=15, callbacks=[es, mc], batch_size=64, validation_split=0.2)

In [None]:
loaded_model = load_model('movie_game_e50_h64_bilstm_model_adam.h1')
print("\n 테스트 정확도: %.4f" % (loaded_model.evaluate(x_test, y_test)[1]))
print("\n BiLSTM Adam 맛집 테스트 정확도: %.4f" % (loaded_model.evaluate(review_test, review_label)[1]))

In [None]:
model_bilstm_adam = Sequential()
model_bilstm_adam.add(Embedding(vocab_size, embedding_dim))
model_bilstm_adam.add(Bidirectional(LSTM(hidden_units)))
model_bilstm_adam.add(Dense(1, activation='sigmoid'))
es = EarlyStopping(monitor='val_loss', mode='min', verbose=1, patience=7)
mc = ModelCheckpoint('movie_game_e50_h64_bilstm_model_rmsprop.h1', monitor='val_acc', mode='max', verbose=1, save_best_only=True)

model_bilstm_adam.compile(optimizer='rmsprop', loss='binary_crossentropy', metrics=['acc'])
history = model_bilstm_adam.fit(x_train, y_train, epochs=15, callbacks=[es, mc], batch_size=64, validation_split=0.2)

In [None]:
loaded_model = load_model('movie_game_e50_h64_bilstm_model_rmsprop.h1')
print("\n 테스트 정확도: %.4f" % (loaded_model.evaluate(x_test, y_test)[1]))
print("\n BiLSTM RMSProp 맛집 테스트 정확도: %.4f" % (loaded_model.evaluate(review_test, review_label)[1]))

####Hidden state = 128

In [None]:
embedding_dim = 100
hidden_units = 128
input_length = max_len

model_bilstm_adam = Sequential()
model_bilstm_adam.add(Embedding(vocab_size, embedding_dim))
model_bilstm_adam.add(Bidirectional(LSTM(hidden_units)))
model_bilstm_adam.add(Dense(1, activation='sigmoid'))
es = EarlyStopping(monitor='val_loss', mode='min', verbose=1, patience=7)
mc = ModelCheckpoint('movie_game_e50_h128_bilstm_model_adam.h1', monitor='val_acc', mode='max', verbose=1, save_best_only=True)

model_bilstm_adam.compile(optimizer='adam', loss='binary_crossentropy', metrics=['acc'])
history = model_bilstm_adam.fit(x_train, y_train, epochs=15, callbacks=[es, mc], batch_size=64, validation_split=0.2)

In [None]:
loaded_model = load_model('movie_game_e50_h128_bilstm_model_adam.h1')
print("\n 테스트 정확도: %.4f" % (loaded_model.evaluate(x_test, y_test)[1]))
print("\n BiLSTM Adam 맛집 테스트 정확도: %.4f" % (loaded_model.evaluate(review_test, review_label)[1]))

In [None]:
model_bilstm_adam = Sequential()
model_bilstm_adam.add(Embedding(vocab_size, embedding_dim))
model_bilstm_adam.add(Bidirectional(LSTM(hidden_units)))
model_bilstm_adam.add(Dense(1, activation='sigmoid'))
es = EarlyStopping(monitor='val_loss', mode='min', verbose=1, patience=7)
mc = ModelCheckpoint('movie_game_e50_h128_bilstm_model_rmsprop.h1', monitor='val_acc', mode='max', verbose=1, save_best_only=True)

model_bilstm_adam.compile(optimizer='rmsprop', loss='binary_crossentropy', metrics=['acc'])
history = model_bilstm_adam.fit(x_train, y_train, epochs=15, callbacks=[es, mc], batch_size=64, validation_split=0.2)

In [None]:
loaded_model = load_model('movie_game_e50_h128_bilstm_model_rmsprop.h1')
print("\n 테스트 정확도: %.4f" % (loaded_model.evaluate(x_test, y_test)[1]))
print("\n BiLSTM RMSProp 맛집 테스트 정확도: %.4f" % (loaded_model.evaluate(review_test, review_label)[1]))

####Hidden state = 256

In [None]:
embedding_dim = 100
hidden_units = 256
input_length = max_len

model_bilstm_adam = Sequential()
model_bilstm_adam.add(Embedding(vocab_size, embedding_dim))
model_bilstm_adam.add(Bidirectional(LSTM(hidden_units)))
model_bilstm_adam.add(Dense(1, activation='sigmoid'))
es = EarlyStopping(monitor='val_loss', mode='min', verbose=1, patience=7)
mc = ModelCheckpoint('movie_game_e50_h256_bilstm_model_adam.h1', monitor='val_acc', mode='max', verbose=1, save_best_only=True)

model_bilstm_adam.compile(optimizer='adam', loss='binary_crossentropy', metrics=['acc'])
history = model_bilstm_adam.fit(x_train, y_train, epochs=15, callbacks=[es, mc], batch_size=64, validation_split=0.2)

In [None]:
loaded_model = load_model('movie_game_e50_h256_bilstm_model_adam.h1')
print("\n 테스트 정확도: %.4f" % (loaded_model.evaluate(x_test, y_test)[1]))
print("\n BiLSTM Adam 맛집 테스트 정확도: %.4f" % (loaded_model.evaluate(review_test, review_label)[1]))

In [None]:
model_bilstm_adam = Sequential()
model_bilstm_adam.add(Embedding(vocab_size, embedding_dim))
model_bilstm_adam.add(Bidirectional(LSTM(hidden_units)))
model_bilstm_adam.add(Dense(1, activation='sigmoid'))
es = EarlyStopping(monitor='val_loss', mode='min', verbose=1, patience=7)
mc = ModelCheckpoint('movie_game_e50_h256_bilstm_model_rmsprop.h1', monitor='val_acc', mode='max', verbose=1, save_best_only=True)

model_bilstm_adam.compile(optimizer='rmsprop', loss='binary_crossentropy', metrics=['acc'])
history = model_bilstm_adam.fit(x_train, y_train, epochs=15, callbacks=[es, mc], batch_size=64, validation_split=0.2)

In [None]:
loaded_model = load_model('movie_game_e50_h256_bilstm_model_rmsprop.h1')
print("\n 테스트 정확도: %.4f" % (loaded_model.evaluate(x_test, y_test)[1]))
print("\n BiLSTM Adam 맛집 테스트 정확도: %.4f" % (loaded_model.evaluate(review_test, review_label)[1]))