In [1]:
import numpy as np
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LSTM
from keras.layers import Dropout
from keras.optimizers import SGD, Adam
from keras.layers.embeddings import Embedding

from sklearn.model_selection import train_test_split
from keras.wrappers.scikit_learn import KerasClassifier
from sklearn.model_selection import GridSearchCV
from sklearn.utils import class_weight

from pdb import set_trace as t
import util


Using TensorFlow backend.


In [2]:
from IPython.display import HTML
HTML('''<script>
code_show_err=false; 
function code_toggle_err() {
 if (code_show_err){
 $('div.output_stderr').hide();
 } else {
 $('div.output_stderr').show();
 }
 code_show_err = !code_show_err
} 
$( document ).ready(code_toggle_err);
</script>
To toggle on/off output_stderr, click <a href="javascript:code_toggle_err()">here</a>.''')

In [3]:
reload(util)

<module 'util' from 'util.pyc'>

In [3]:
vocab_size = 50000
max_review_length = 500
sentences, what_labels, _, _, _ = util.load_data(vocab_size, max_review_length)


In [4]:
X_train, X_test, y_train, y_test = train_test_split(sentences, what_labels, test_size = 0.33)
class_rebalance = class_weight.compute_class_weight('balanced', np.unique(y_train), y_train)

In [6]:
def create_model(learn_rate=0.0005, decay=0):
    embedding_vector_length = 32
    dropout_rate = 0.2

    model = Sequential()
    model.add(Embedding(vocab_size, embedding_vector_length, input_length=max_review_length))
    model.add(Dropout(dropout_rate))
    model.add(LSTM(100))
    model.add(Dropout(dropout_rate))
    model.add(Dense(1, activation='sigmoid'))
    optimizer = Adam(lr=learn_rate, decay=decay)
    model.compile(loss='binary_crossentropy', optimizer=optimizer, metrics=['accuracy'])
    
    return model

In [7]:
# model.fit(X_train, y_train, class_weight = class_rebalance,
#           validation_data=(X_test, y_test), 
#           epochs=1, batch_size=64, verbose = 1)

model = KerasClassifier(build_fn=create_model, 
                        class_weight = class_rebalance,
                        epochs=3, batch_size=64, verbose = 1)

# define the grid search parameters
learn_rate = [0.0005, 0.001, 0.002]
decay = [0, 0.005]
param_grid = dict(learn_rate=learn_rate, decay=decay)
grid = GridSearchCV(estimator=model, param_grid=param_grid, n_jobs=-1)
grid_result = grid.fit(X_train, y_train)



Epoch 1/3
Epoch 1/3
Epoch 1/3
Epoch 1/3
Epoch 1/3
Epoch 1/3
Epoch 1/3
Epoch 1/3
Epoch 1/3
Epoch 1/3
Epoch 1/3
Epoch 1/3
Epoch 1/3
Epoch 1/3
   64/18561 [..............................] - ETA: 655s - loss: 0.6919 - acc: 0.6406Epoch 1/3
Epoch 1/3
Epoch 1/3
Epoch 1/3
Epoch 2/3
Epoch 2/3
Epoch 2/3
Epoch 2/3
Epoch 2/3
Epoch 2/3
Epoch 2/3
Epoch 2/3
Epoch 2/3
Epoch 2/3
Epoch 2/3

Epoch 2/3
Epoch 2/3
Epoch 2/3
Epoch 2/3
Epoch 2/3
Epoch 3/3
Epoch 3/3
Epoch 3/3
Epoch 3/3
Epoch 3/3
Epoch 3/3
Epoch 3/3
Epoch 3/3
Epoch 3/3
Epoch 3/3
Epoch 3/3
Epoch 3/3
  576/18562 [..............................] - ETA: 694s - loss: 0.1442 - acc: 0.9479Epoch 3/3
Epoch 3/3
  512/18561 [..............................] - ETA: 691s - loss: 0.1932 - acc: 0.9238
Epoch 3/3
Epoch 3/3
 1984/18562 [==>...........................] - ETA: 190s




Epoch 1/3
Epoch 2/3
Epoch 3/3


In [8]:
# summarize results
print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))
means = grid_result.cv_results_['mean_test_score']
stds = grid_result.cv_results_['std_test_score']
params = grid_result.cv_results_['params']
for mean, stdev, param in zip(means, stds, params):
    print("%f (%f) with: %r" % (mean, stdev, param))

Best: 0.929711 using {'learn_rate': 0.0005, 'decay': 0}
0.929711 (0.002700) with: {'learn_rate': 0.0005, 'decay': 0}
0.929351 (0.001104) with: {'learn_rate': 0.001, 'decay': 0}
0.926658 (0.002928) with: {'learn_rate': 0.002, 'decay': 0}
0.914661 (0.005802) with: {'learn_rate': 0.0005, 'decay': 0.005}
0.928417 (0.002746) with: {'learn_rate': 0.001, 'decay': 0.005}
0.929567 (0.001577) with: {'learn_rate': 0.002, 'decay': 0.005}


In [10]:
# Final evaluation of the model
scores = grid_result.best_estimator_.evaluate(X_test, y_test, verbose=0)
print("Accuracy: %.2f%%" % (scores[1]*100))

AttributeError: 'KerasClassifier' object has no attribute 'evaluate'

In [None]:
from IPython.display import SVG
from keras.utils.vis_utils import model_to_dot

SVG(model_to_dot(model).create(prog='dot', format='svg'))

# Running test notes

In [None]:
SGD: lr, momentum
Adam: lr, decay, lower lr than 0.0005 5e-4
Dropout
Feature sizes
Hidden layer sizes

In [None]:
SGD

Best: 0.634473 using {'learn_rate': 0.3, 'momentum': 0.9}
0.595683 (0.005809) with: {'learn_rate': 0.001, 'momentum': 0.0}
0.595683 (0.005809) with: {'learn_rate': 0.001, 'momentum': 0.2}
0.595683 (0.005809) with: {'learn_rate': 0.001, 'momentum': 0.4}
0.595683 (0.005809) with: {'learn_rate': 0.001, 'momentum': 0.6}
0.595683 (0.005809) with: {'learn_rate': 0.001, 'momentum': 0.8}
0.595683 (0.005809) with: {'learn_rate': 0.001, 'momentum': 0.9}
0.595683 (0.005809) with: {'learn_rate': 0.01, 'momentum': 0.0}
0.595683 (0.005809) with: {'learn_rate': 0.01, 'momentum': 0.2}
0.595683 (0.005809) with: {'learn_rate': 0.01, 'momentum': 0.4}
0.595683 (0.005809) with: {'learn_rate': 0.01, 'momentum': 0.6}
0.595683 (0.005809) with: {'learn_rate': 0.01, 'momentum': 0.8}
0.595683 (0.005809) with: {'learn_rate': 0.01, 'momentum': 0.9}
0.595683 (0.005809) with: {'learn_rate': 0.1, 'momentum': 0.0}
0.595683 (0.005809) with: {'learn_rate': 0.1, 'momentum': 0.2}
0.595683 (0.005809) with: {'learn_rate': 0.1, 'momentum': 0.4}
0.595683 (0.005809) with: {'learn_rate': 0.1, 'momentum': 0.6}
0.595683 (0.005809) with: {'learn_rate': 0.1, 'momentum': 0.8}
0.596006 (0.005782) with: {'learn_rate': 0.1, 'momentum': 0.9}
0.595683 (0.005809) with: {'learn_rate': 0.2, 'momentum': 0.0}
0.543711 (0.068321) with: {'learn_rate': 0.2, 'momentum': 0.2}
0.595683 (0.005809) with: {'learn_rate': 0.2, 'momentum': 0.4}
0.530853 (0.086421) with: {'learn_rate': 0.2, 'momentum': 0.6}
0.607176 (0.016544) with: {'learn_rate': 0.2, 'momentum': 0.8}
0.571690 (0.113638) with: {'learn_rate': 0.2, 'momentum': 0.9}
0.467567 (0.090206) with: {'learn_rate': 0.3, 'momentum': 0.0}
0.467567 (0.090206) with: {'learn_rate': 0.3, 'momentum': 0.2}
0.526902 (0.092007) with: {'learn_rate': 0.3, 'momentum': 0.4}
0.595755 (0.005727) with: {'learn_rate': 0.3, 'momentum': 0.6}
0.601789 (0.005178) with: {'learn_rate': 0.3, 'momentum': 0.8}
0.634473 (0.027064) with: {'learn_rate': 0.3, 'momentum': 0.9}

In [None]:
Adam

with 1 epoch
Best: 0.919079 using {'learn_rate': 0.001, 'decay': 0.0}
0.919079 (0.002999) with: {'learn_rate': 0.001, 'decay': 0.0}
0.908520 (0.007281) with: {'learn_rate': 0.01, 'decay': 0.0}
0.877128 (0.020741) with: {'learn_rate': 0.1, 'decay': 0.0}
0.869047 (0.019981) with: {'learn_rate': 0.3, 'decay': 0.0}
0.894943 (0.001242) with: {'learn_rate': 0.001, 'decay': 0.02}
0.903024 (0.005885) with: {'learn_rate': 0.01, 'decay': 0.02}
0.596832 (0.422036) with: {'learn_rate': 0.1, 'decay': 0.02}
0.894656 (0.000881) with: {'learn_rate': 0.3, 'decay': 0.02}
0.894656 (0.000881) with: {'learn_rate': 0.001, 'decay': 0.04}
0.900259 (0.007373) with: {'learn_rate': 0.01, 'decay': 0.04}
0.895446 (0.001685) with: {'learn_rate': 0.1, 'decay': 0.04}
0.894656 (0.000881) with: {'learn_rate': 0.3, 'decay': 0.04}
0.894656 (0.000881) with: {'learn_rate': 0.001, 'decay': 0.06}
0.902773 (0.005021) with: {'learn_rate': 0.01, 'decay': 0.06}
0.894727 (0.000968) with: {'learn_rate': 0.1, 'decay': 0.06}
0.894799 (0.001048) with: {'learn_rate': 0.3, 'decay': 0.06}
0.894656 (0.000881) with: {'learn_rate': 0.001, 'decay': 0.09}
0.906903 (0.005251) with: {'learn_rate': 0.01, 'decay': 0.09}
0.896344 (0.002442) with: {'learn_rate': 0.1, 'decay': 0.09}
0.894656 (0.000881) with: {'learn_rate': 0.3, 'decay': 0.09}
    
Best: 0.919295 using {'learn_rate': 0.001, 'decay': 0.005}
0.894835 (0.002537) with: {'learn_rate': 0.0001, 'decay': 0.005}
0.898463 (0.002219) with: {'learn_rate': 0.0005, 'decay': 0.005}
0.919295 (0.003680) with: {'learn_rate': 0.001, 'decay': 0.005}
0.905754 (0.014361) with: {'learn_rate': 0.01, 'decay': 0.005}
0.894835 (0.002537) with: {'learn_rate': 0.0001, 'decay': 0.01}
0.894835 (0.002537) with: {'learn_rate': 0.0005, 'decay': 0.01}
0.908843 (0.002461) with: {'learn_rate': 0.001, 'decay': 0.01}
0.914302 (0.008678) with: {'learn_rate': 0.01, 'decay': 0.01}
0.894835 (0.002537) with: {'learn_rate': 0.0001, 'decay': 0.02}
0.894835 (0.002537) with: {'learn_rate': 0.0005, 'decay': 0.02}
0.896056 (0.002202) with: {'learn_rate': 0.001, 'decay': 0.02}
0.904820 (0.012220) with: {'learn_rate': 0.01, 'decay': 0.02}
    
with 3 epochs

Best: 0.929711 using {'learn_rate': 0.0005, 'decay': 0}
0.929711 (0.002700) with: {'learn_rate': 0.0005, 'decay': 0}
0.929351 (0.001104) with: {'learn_rate': 0.001, 'decay': 0}
0.926658 (0.002928) with: {'learn_rate': 0.002, 'decay': 0}
0.914661 (0.005802) with: {'learn_rate': 0.0005, 'decay': 0.005}
0.928417 (0.002746) with: {'learn_rate': 0.001, 'decay': 0.005}
0.929567 (0.001577) with: {'learn_rate': 0.002, 'decay': 0.005}