In [0]:
from keras.models import Sequential
from keras.datasets import imdb
from keras.layers import Dense, Activation, Embedding, LSTM, GRU, SimpleRNN
from keras.optimizers import Adadelta
from keras.utils import np_utils, to_categorical
from keras.preprocessing import sequence
import numpy as np

from matplotlib import pyplot as plt

In [0]:
max_features = 20000

(X_train, y_train), (X_test, y_test) = imdb.load_data(num_words=max_features)
maxlen=180
X_train = sequence.pad_sequences(X_train, maxlen=maxlen)
X_test = sequence.pad_sequences(X_test, maxlen=maxlen)

In [0]:
print(X_train.shape) # ⇛ 25000件のデータが180次元ある
print(X_train[0]) # ⇛ 0件目のデータをprint
print(y_train) # ⇛ positive, negaitveの二値のデータが入っている.

In [0]:
word_index = imdb.get_word_index()
reverse_word_index = dict([(value, key) for (key, value) in word_index.items()])

for k in range(40):
  decoded_review = ' '.join([reverse_word_index.get(i - 3, '?') for i in X_test[k]])
  if y_test[k] == 1:
    print('positive')
  else:
    print('negative')
  print(decoded_review)

In [0]:
y_train = to_categorical(y_train, 2) # ラベル ⇛ one_hotベクトルへの変換.

In [0]:
model = Sequential()
model.add(Embedding(input_dim=max_features, output_dim=100, embeddings_initializer='glorot_uniform', input_length=maxlen))
model.add(LSTM(32, return_sequences=False))
model.add(Dense(2))
model.add(Activation('softmax'))

model.compile(loss='categorical_crossentropy',
              optimizer='adam',
              metrics=['accuracy'])

lstm = model.fit(X_train, y_train, batch_size=256, epochs=5, validation_split=0.1)

In [0]:
model.summary()

In [0]:
# 実際に文章 ⇛ id化 ⇛ モデルを用いたpredictをテストする

# positive
#test_sentence = "the trademark altman audio style is evident here and there i think what really makes this film work is the brilliant performance by sandy dennis it's definitely one of her darker characters but she plays it so perfectly and convincingly that it's scary michael burns does a good job as the mute young man regular altman player michael murphy has a small part the ? moody set fits the content of the story very well in short this movie is a powerful study of loneliness sexual repression and desperation be patient ? up the atmosphere and pay attention to the wonderfully written script br br i praise robert altman this is one of his many films that deals with unconventional fascinating subject matter this film is disturbing but it's sincere and it's sure to elicit a strong emotional response from the viewer if you want to see an unusual film some might even say bizarre this is worth the time br br unfortunately it's very difficult to find in video stores you may have to buy it off the internet"

# negative
test_sentence = "this movie is very bad. don't go to watch this. so boaring. this is such a stupid movie, shit."


# 数字に変換するメソッド
def split_text(text):
  result = []
  for word in text.split():
    word = word.lower()  # 小文字しか対応していないので、小文字化
    
    # 単語が変換すればindex化, 存在しなければ0
    try:
      index = word_index[word]
    except(KeyError):
      index = 0
    result.append(index)
      
  return result

x = [split_text(test_sentence)]
x = sequence.pad_sequences(x, maxlen=maxlen)
print(x) # ⇛ paddingされたid版の文章が見れる


y = model.predict(x)
print(y) # [ネガティブの確率、ポジティブの確率]