<a href="https://colab.research.google.com/github/data-better/ASL/blob/master/10%EA%B0%95_sentiment_keras.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

- [케라스 창시자에게 배우는 딥러닝: 파이썬과 케라스(keras)로 배우는 딥러닝]에도 나오는 예제
- 인터넷 예시 참조

In [None]:
import numpy as np
import keras
from keras import layers, models, datasets
from keras.preprocessing import sequence

Using TensorFlow backend.


# Data Loader

In [None]:
class Data:
  def __init__(self, max_features = 10000, maxlen = 80):
    #ValueError: Object arrays cannot be loaded when allow_pickle=False
    #https://stackoverflow.com/questions/55890813/how-to-fix-object-arrays-cannot-be-loaded-when-allow-pickle-false-for-imdb-loa/56062555
    np_load_old = np.load
    # modify the default parameters of np.load
    np.load = lambda *a,**k: np_load_old(*a, allow_pickle=True, **k)
    
    (x_train, y_train), (x_test, y_test) = datasets.imdb.load_data(num_words = max_features)
    x_train = sequence.pad_sequences(x_train, maxlen = maxlen)
    x_test = sequence.pad_sequences(x_test, maxlen = maxlen)
    self.x_train, self.y_train = x_train, y_train
    self.x_test, self.y_test = x_test[:1000, :], y_test[:1000]
    np.load = np_load_old
    


# Define Model

In [None]:
class LSTM(models.Model):
  def __init__(self, max_features, maxlen):
    x = layers.Input((maxlen,))
    h = layers.Embedding(max_features, 64)(x)
    h = layers.LSTM(32, dropout = .2, recurrent_dropout = .2)(h)
    y = layers.Dense(1, activation = 'sigmoid')(h)
    super().__init__(x, y)
    
    self.compile(loss = 'binary_crossentropy', optimizer = 'adam', metrics = ['accuracy'])

## Note

### Check number of parameters

In [None]:
ls = LSTM(max_features = 10000, maxlen = 80)
ls.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_4 (InputLayer)         (None, 80)                0         
_________________________________________________________________
embedding_4 (Embedding)      (None, 80, 64)            640000    
_________________________________________________________________
lstm_4 (LSTM)                (None, 32)                12416     
_________________________________________________________________
dense_4 (Dense)              (None, 1)                 33        
Total params: 652,449
Trainable params: 652,449
Non-trainable params: 0
_________________________________________________________________


In [None]:
# embedding
w_emb = 10000 * 64

# LSTM
w_xh = 64 * 32 * 4
w_hh = 32 * 32 * 4 
b_h = 32 *4

print('LSTM Param = {}'.format(w_xh + w_hh + b_h))

#dense layer
dense_4 = 32 + 1


12416

# Trainer

In [None]:
class Machine:
  def __init__(self, model = LSTM, max_features = 10000, maxlen = 80):
    self.data = Data(max_features, maxlen)
    self.model = model(max_features, maxlen)
    
  def run(self, epoch = 3, batch_size = 32):
    data = self.data
    model = self.model
    
    print('Training stage')
    model.fit(data.x_train, data.y_train, batch_size = batch_size,
              epochs = epoch, validation_data = (data.x_test, data.y_test),
              verbose = 2)
    
    loss, acc = model.evaluate(data.x_test, data.y_test, batch_size = batch_size, verbose = 2)
    
    print('Test performance: accuracy = {0}, loss = {1}'.format(acc, loss))

# Model Fitting

In [None]:
m = Machine()
m.run()

Downloading data from https://s3.amazonaws.com/text-datasets/imdb.npz


W0812 05:56:47.413540 140657280669568 deprecation_wrapper.py:119] From /usr/local/lib/python3.6/dist-packages/keras/backend/tensorflow_backend.py:74: The name tf.get_default_graph is deprecated. Please use tf.compat.v1.get_default_graph instead.

W0812 05:56:47.438755 140657280669568 deprecation_wrapper.py:119] From /usr/local/lib/python3.6/dist-packages/keras/backend/tensorflow_backend.py:517: The name tf.placeholder is deprecated. Please use tf.compat.v1.placeholder instead.

W0812 05:56:47.443462 140657280669568 deprecation_wrapper.py:119] From /usr/local/lib/python3.6/dist-packages/keras/backend/tensorflow_backend.py:4138: The name tf.random_uniform is deprecated. Please use tf.random.uniform instead.

W0812 05:56:47.602604 140657280669568 deprecation_wrapper.py:119] From /usr/local/lib/python3.6/dist-packages/keras/backend/tensorflow_backend.py:133: The name tf.placeholder_with_default is deprecated. Please use tf.compat.v1.placeholder_with_default instead.

W0812 05:56:47.622495 

Training stage
Train on 25000 samples, validate on 1000 samples
Epoch 1/3
 - 253s - loss: 0.4770 - acc: 0.7734 - val_loss: 0.3575 - val_acc: 0.8440
Epoch 2/3
 - 246s - loss: 0.3339 - acc: 0.8603 - val_loss: 0.3583 - val_acc: 0.8300
Epoch 3/3
 - 226s - loss: 0.2753 - acc: 0.8886 - val_loss: 0.3850 - val_acc: 0.8350
Test performance: accuracy = 0.835, loss = 0.38503733015060426


## Check prediction

In [None]:
y_hat = m.model.predict(m.data.x_test[:2,:])
y = m.data.y_test[:2]
for (a,b) in zip(y, y_hat):
  print('y = {}, y_hat = {}'.format(a, b))

y = 0, y_hat = [0.19915646]
y = 1, y_hat = [0.97466034]


# Define Bidirectional LSTM

In [None]:
class BI_LSTM(models.Model):
  def __init__(self, max_features, maxlen):
    
    x = layers.Input((maxlen,))
    h = layers.Embedding(max_features, 64)(x)
    h = layers.Bidirectional(layers.LSTM(16, dropout = .2, recurrent_dropout = .2))(h)
    y = layers.Dense(1, activation = 'sigmoid')(h)
    super().__init__(x, y)
    
    self.compile(loss = 'binary_crossentropy', optimizer = 'adam', metrics = ['accuracy'])

# Train Bidirectional LSTM

In [None]:
m1 = Machine(model = BI_LSTM)
m1.run()

Training stage
Train on 25000 samples, validate on 1000 samples
Epoch 1/3
 - 445s - loss: 0.4689 - acc: 0.7763 - val_loss: 0.3624 - val_acc: 0.8330
Epoch 2/3
 - 463s - loss: 0.3089 - acc: 0.8736 - val_loss: 0.3651 - val_acc: 0.8450
Epoch 3/3
 - 467s - loss: 0.2419 - acc: 0.9028 - val_loss: 0.3890 - val_acc: 0.8330
Test performance: accuracy = 0.833, loss = 0.3889838485121727


### Check prediction

In [None]:
y_hat = m1.model.predict(m1.data.x_test[20:22,:])
y = m.data.y_test[20:22]
for (a,b) in zip(y, y_hat):
  print('y = {}, y_hat = {}'.format(a, b))

y = 1, y_hat = [0.9979255]
y = 1, y_hat = [0.9850507]
