In [1]:
import pandas as pd

In [2]:
data = pd.read_csv('data/data.tsv', delimiter='\t', header=None, names=['sentence', 'polarity'])
test_data = pd.read_csv('data/test_data.tsv', delimiter='\t', header=None, names=['sentence', 'polarity'])

#Preprocessing

In [3]:
from pymystem3 import Mystem
m = Mystem()

def lemmatize(text):
    my = m.analyze(text)
    lemmatized_sentence = []
    for word in my:
        if 'analysis' in word.keys():
            if word['analysis']:
                if word['analysis'][0]['gr'][0] == 'S' or word['analysis'][0]['gr'][0] == 'V' or word['analysis'][0]['gr'][0] == 'A':
                    lemmatized_sentence.append(word['analysis'][0]['lex'])
            else:
                lemmatized_sentence.append(word['text'])
    return ' '.join(lemmatized_sentence)

In [4]:
for data_frame in [data, test_data]:
    data_frame['sentence'] = data_frame['sentence'].apply(lemmatize)

In [5]:
lengths = data['sentence'].apply(len)
print(lengths.describe())

count    2733.000000
mean       67.931211
std        47.040391
min         0.000000
25%        35.000000
50%        57.000000
75%        87.000000
max       413.000000
Name: sentence, dtype: float64


In [7]:
test_data

Unnamed: 0,sentence,polarity
0,очень милый уютный ресторанчик скромный цена о...,positive
1,салат свежий зелень мясо курица нежный десерт ...,positive
2,официантка вежливый улыбчивый,positive
3,единственный немного понравиться долго ждать н...,negative
4,общий впечатление прекрасный,positive
5,советовать,positive
6,отличный ресторан быть здесь уже раз решаться ...,positive
7,я весь гость оставаться очень довольный,positive
8,кухня уровень все свежий вкусный аутентичный,positive
9,особенно хотеться отмечать спиртной напиток ви...,positive


In [107]:
test_data[test_data['sentence'].str.contains('персонал')]

Unnamed: 0,sentence,polarity
44,персонал спасибо,positive
89,девушка персонал потом мы пожалеть спрашивать ...,neutral
113,прекрасный отдельный зал см фото галерея внима...,positive
116,хотеть выражать огромный благодарность персона...,positive
175,рекомендация научать персонал работать нормально,negative
177,персонал оказываться очень внимательный вежлив...,positive
214,персонал внимательный обслуживание супер,positive
229,персонал вежливый приветливый,positive
292,отличный ресторан приятный персонал создавать ...,positive
372,первый минута мы порадовать персонал очень доб...,positive


In [8]:
from keras.preprocessing.text import Tokenizer
from keras.preprocessing import sequence
from keras.utils.np_utils import to_categorical

Using TensorFlow backend.


In [133]:
tokenizer = Tokenizer(lower=False, split=' ')
tokenizer.fit_on_texts(data['sentence'])
x_train_tokenized = tokenizer.texts_to_sequences(data['sentence'])
x_test_tokenized = tokenizer.texts_to_sequences(test_data['sentence'])

print('Padding sentences (sample X time)')
x_train_sequence = sequence.pad_sequences(x_train_tokenized)
x_test_sequence = sequence.pad_sequences(x_test_tokenized)

print('X_train shape: {}'.format(x_train_sequence.shape))
print('X_test shape: {}'.format(x_test_sequence.shape))

Padding sentences (sample X time)
X_train shape: (2733, 64)
X_test shape: (908, 53)


In [10]:
from sklearn.preprocessing import LabelEncoder

In [11]:
le = LabelEncoder()
le.fit(data['polarity'])
y_train = le.transform(data['polarity'])
y_test = le.transform(test_data['polarity'])
y_train = to_categorical(y_train)
y_test = to_categorical(y_test)


#Uni-LSTM model

In [12]:
from keras.models import Sequential
from keras.layers import Dense, Embedding
from keras.layers import LSTM, Bidirectional, Dropout, GRU

MAX_FEATURES = 20000
MAX_LEN = 80
BATCH_SIZE = 32

In [13]:
model = Sequential()
model.add(Embedding(MAX_FEATURES, 128))
model.add(LSTM(32,
               dropout_W=0.2,
               dropout_U=0.2,
               activation='tanh',
               return_sequences=True))
model.add(LSTM(64,
               dropout_W=0.2,
               dropout_U=0.2,
               activation='tanh'))
model.add(Dense(4, activation='sigmoid'))

model.compile(loss='categorical_crossentropy',
              optimizer = 'rmsprop',
              metrics=['accuracy'])

In [14]:
print('Training model:')
model.fit(x_train_sequence, y_train,
                 batch_size=BATCH_SIZE,
                 nb_epoch=10,
                 validation_data=(x_test_sequence, y_test))
score, accuracy = model.evaluate(x_test_sequence, y_test,
                                        batch_size=BATCH_SIZE)
print('\n')
print('Test score: {0:7.6f} \n Test accuracy: {1:7.6f}'.format(score, accuracy))
model.save('uni-lstm.h5')

Training model:


Train on 2733 samples, validate on 908 samples
Epoch 1/10


  32/2733 [..............................] - ETA: 359s - loss: 1.3761 - acc: 0.3125

  64/2733 [..............................] - ETA: 198s - loss: 1.3008 - acc: 0.5312

  96/2733 [>.............................] - ETA: 144s - loss: 1.2621 - acc: 0.5833

 128/2733 [>.............................] - ETA: 117s - loss: 1.2393 - acc: 0.6016

 160/2733 [>.............................] - ETA: 100s - loss: 1.2185 - acc: 0.6375

 192/2733 [=>............................] - ETA: 89s - loss: 1.1920 - acc: 0.6510 

 224/2733 [=>............................] - ETA: 80s - loss: 1.1708 - acc: 0.6562

 256/2733 [=>............................] - ETA: 74s - loss: 1.1465 - acc: 0.6602

 288/2733 [==>...........................] - ETA: 69s - loss: 1.1320 - acc: 0.6562

 320/2733 [==>...........................] - ETA: 65s - loss: 1.1147 - acc: 0.6625

 352/2733 [==>...........................] - ETA: 62s - loss: 1.0898 - acc: 0.6761

 384/2733 [===>..........................] - ETA: 59s - loss: 1.0710 - acc: 0.6823

 416/2733 [===>..........................] - ETA: 57s - loss: 1.0559 - acc: 0.6899

 448/2733 [===>..........................] - ETA: 54s - loss: 1.0556 - acc: 0.6875

 480/2733 [====>.........................] - ETA: 52s - loss: 1.0404 - acc: 0.6937

 512/2733 [====>.........................] - ETA: 51s - loss: 1.0307 - acc: 0.6934

 544/2733 [====>.........................] - ETA: 49s - loss: 1.0256 - acc: 0.6875

 576/2733 [=====>........................] - ETA: 48s - loss: 1.0119 - acc: 0.6927

 608/2733 [=====>........................] - ETA: 46s - loss: 1.0019 - acc: 0.6957








































































































































Epoch 2/10


  32/2733 [..............................] - ETA: 45s - loss: 0.8223 - acc: 0.6875

  64/2733 [..............................] - ETA: 43s - loss: 0.8434 - acc: 0.6719

  96/2733 [>.............................] - ETA: 42s - loss: 0.8335 - acc: 0.6771

 128/2733 [>.............................] - ETA: 41s - loss: 0.8267 - acc: 0.6875

 160/2733 [>.............................] - ETA: 41s - loss: 0.8496 - acc: 0.6813

 192/2733 [=>............................] - ETA: 40s - loss: 0.8682 - acc: 0.6667

 224/2733 [=>............................] - ETA: 39s - loss: 0.8548 - acc: 0.6786

 256/2733 [=>............................] - ETA: 39s - loss: 0.8351 - acc: 0.6914

 288/2733 [==>...........................] - ETA: 38s - loss: 0.8282 - acc: 0.6944

 320/2733 [==>...........................] - ETA: 38s - loss: 0.8263 - acc: 0.6937

 352/2733 [==>...........................] - ETA: 37s - loss: 0.8174 - acc: 0.6960

 384/2733 [===>..........................] - ETA: 37s - loss: 0.8134 - acc: 0.6979

 416/2733 [===>..........................] - ETA: 36s - loss: 0.8161 - acc: 0.6947

 448/2733 [===>..........................] - ETA: 36s - loss: 0.8153 - acc: 0.6942

 480/2733 [====>.........................] - ETA: 35s - loss: 0.8123 - acc: 0.6958

 512/2733 [====>.........................] - ETA: 35s - loss: 0.8079 - acc: 0.6973

 544/2733 [====>.........................] - ETA: 34s - loss: 0.8008 - acc: 0.7022

 576/2733 [=====>........................] - ETA: 33s - loss: 0.8013 - acc: 0.7014

 608/2733 [=====>........................] - ETA: 33s - loss: 0.7915 - acc: 0.7056








































































































































Epoch 3/10


  32/2733 [..............................] - ETA: 51s - loss: 0.6684 - acc: 0.7812

  64/2733 [..............................] - ETA: 47s - loss: 0.7787 - acc: 0.7188

  96/2733 [>.............................] - ETA: 47s - loss: 0.6690 - acc: 0.7708

 128/2733 [>.............................] - ETA: 45s - loss: 0.6515 - acc: 0.7734

 160/2733 [>.............................] - ETA: 44s - loss: 0.6090 - acc: 0.7937

 192/2733 [=>............................] - ETA: 43s - loss: 0.6099 - acc: 0.7969

 224/2733 [=>............................] - ETA: 42s - loss: 0.6522 - acc: 0.7768

 256/2733 [=>............................] - ETA: 41s - loss: 0.6608 - acc: 0.7695

 288/2733 [==>...........................] - ETA: 40s - loss: 0.6368 - acc: 0.7812

 320/2733 [==>...........................] - ETA: 40s - loss: 0.6508 - acc: 0.7781

 352/2733 [==>...........................] - ETA: 39s - loss: 0.6488 - acc: 0.7784

 384/2733 [===>..........................] - ETA: 38s - loss: 0.6520 - acc: 0.7812

 416/2733 [===>..........................] - ETA: 38s - loss: 0.6351 - acc: 0.7909

 448/2733 [===>..........................] - ETA: 37s - loss: 0.6303 - acc: 0.7924

 480/2733 [====>.........................] - ETA: 37s - loss: 0.6619 - acc: 0.7771

 512/2733 [====>.........................] - ETA: 37s - loss: 0.6603 - acc: 0.7773

 544/2733 [====>.........................] - ETA: 36s - loss: 0.6707 - acc: 0.7721

 576/2733 [=====>........................] - ETA: 35s - loss: 0.6776 - acc: 0.7674

 608/2733 [=====>........................] - ETA: 35s - loss: 0.6734 - acc: 0.7697








































































































































Epoch 4/10


  32/2733 [..............................] - ETA: 42s - loss: 0.6202 - acc: 0.7500

  64/2733 [..............................] - ETA: 40s - loss: 0.6029 - acc: 0.7812

  96/2733 [>.............................] - ETA: 43s - loss: 0.6773 - acc: 0.7188

 128/2733 [>.............................] - ETA: 44s - loss: 0.6782 - acc: 0.7109

 160/2733 [>.............................] - ETA: 45s - loss: 0.6717 - acc: 0.7312

 192/2733 [=>............................] - ETA: 43s - loss: 0.6735 - acc: 0.7396

 224/2733 [=>............................] - ETA: 43s - loss: 0.6813 - acc: 0.7277

 256/2733 [=>............................] - ETA: 43s - loss: 0.6751 - acc: 0.7344

 288/2733 [==>...........................] - ETA: 42s - loss: 0.6513 - acc: 0.7431

 320/2733 [==>...........................] - ETA: 41s - loss: 0.6456 - acc: 0.7406

 352/2733 [==>...........................] - ETA: 40s - loss: 0.6293 - acc: 0.7528

 384/2733 [===>..........................] - ETA: 39s - loss: 0.6194 - acc: 0.7578

 416/2733 [===>..........................] - ETA: 38s - loss: 0.6166 - acc: 0.7572

 448/2733 [===>..........................] - ETA: 38s - loss: 0.6035 - acc: 0.7679

 480/2733 [====>.........................] - ETA: 38s - loss: 0.6081 - acc: 0.7625

 512/2733 [====>.........................] - ETA: 37s - loss: 0.6122 - acc: 0.7637

 544/2733 [====>.........................] - ETA: 36s - loss: 0.6050 - acc: 0.7665

 576/2733 [=====>........................] - ETA: 36s - loss: 0.5976 - acc: 0.7726

 608/2733 [=====>........................] - ETA: 35s - loss: 0.6029 - acc: 0.7681








































































































































Epoch 5/10


  32/2733 [..............................] - ETA: 54s - loss: 0.8181 - acc: 0.7500

  64/2733 [..............................] - ETA: 52s - loss: 0.5662 - acc: 0.8281

  96/2733 [>.............................] - ETA: 49s - loss: 0.5840 - acc: 0.8021

 128/2733 [>.............................] - ETA: 48s - loss: 0.5948 - acc: 0.7969

 160/2733 [>.............................] - ETA: 46s - loss: 0.5708 - acc: 0.7812

 192/2733 [=>............................] - ETA: 44s - loss: 0.5730 - acc: 0.7812

 224/2733 [=>............................] - ETA: 44s - loss: 0.5161 - acc: 0.8080

 256/2733 [=>............................] - ETA: 43s - loss: 0.5082 - acc: 0.8086

 288/2733 [==>...........................] - ETA: 42s - loss: 0.4988 - acc: 0.8160

 320/2733 [==>...........................] - ETA: 41s - loss: 0.5111 - acc: 0.8063

 352/2733 [==>...........................] - ETA: 40s - loss: 0.5273 - acc: 0.7983

 384/2733 [===>..........................] - ETA: 39s - loss: 0.5120 - acc: 0.8073

 416/2733 [===>..........................] - ETA: 39s - loss: 0.5267 - acc: 0.8029

 448/2733 [===>..........................] - ETA: 38s - loss: 0.5340 - acc: 0.8013

 480/2733 [====>.........................] - ETA: 39s - loss: 0.5345 - acc: 0.8021

 512/2733 [====>.........................] - ETA: 38s - loss: 0.5195 - acc: 0.8066

 544/2733 [====>.........................] - ETA: 38s - loss: 0.5101 - acc: 0.8107

 576/2733 [=====>........................] - ETA: 37s - loss: 0.5043 - acc: 0.8142

 608/2733 [=====>........................] - ETA: 36s - loss: 0.4942 - acc: 0.8191








































































































































Epoch 6/10


  32/2733 [..............................] - ETA: 47s - loss: 0.6466 - acc: 0.7188

  64/2733 [..............................] - ETA: 45s - loss: 0.5173 - acc: 0.8125

  96/2733 [>.............................] - ETA: 45s - loss: 0.4388 - acc: 0.8333

 128/2733 [>.............................] - ETA: 45s - loss: 0.4638 - acc: 0.8203

 160/2733 [>.............................] - ETA: 44s - loss: 0.4532 - acc: 0.8250

 192/2733 [=>............................] - ETA: 42s - loss: 0.5065 - acc: 0.8177

 224/2733 [=>............................] - ETA: 42s - loss: 0.4850 - acc: 0.8259

 256/2733 [=>............................] - ETA: 41s - loss: 0.4801 - acc: 0.8281

 288/2733 [==>...........................] - ETA: 41s - loss: 0.4894 - acc: 0.8264

 320/2733 [==>...........................] - ETA: 40s - loss: 0.4886 - acc: 0.8281

 352/2733 [==>...........................] - ETA: 39s - loss: 0.4772 - acc: 0.8295

 384/2733 [===>..........................] - ETA: 38s - loss: 0.4757 - acc: 0.8281

 416/2733 [===>..........................] - ETA: 38s - loss: 0.4706 - acc: 0.8317

 448/2733 [===>..........................] - ETA: 37s - loss: 0.4785 - acc: 0.8281

 480/2733 [====>.........................] - ETA: 37s - loss: 0.4823 - acc: 0.8313

 512/2733 [====>.........................] - ETA: 36s - loss: 0.4693 - acc: 0.8359

 544/2733 [====>.........................] - ETA: 36s - loss: 0.4612 - acc: 0.8382

 576/2733 [=====>........................] - ETA: 35s - loss: 0.4607 - acc: 0.8368

 608/2733 [=====>........................] - ETA: 34s - loss: 0.4596 - acc: 0.8372








































































































































Epoch 7/10


  32/2733 [..............................] - ETA: 43s - loss: 0.3035 - acc: 0.8750

  64/2733 [..............................] - ETA: 41s - loss: 0.2962 - acc: 0.8906

  96/2733 [>.............................] - ETA: 41s - loss: 0.3653 - acc: 0.8750

 128/2733 [>.............................] - ETA: 40s - loss: 0.3699 - acc: 0.8750

 160/2733 [>.............................] - ETA: 39s - loss: 0.3491 - acc: 0.8875

 192/2733 [=>............................] - ETA: 39s - loss: 0.3428 - acc: 0.8906

 224/2733 [=>............................] - ETA: 38s - loss: 0.3519 - acc: 0.8839

 256/2733 [=>............................] - ETA: 38s - loss: 0.3351 - acc: 0.8906

 288/2733 [==>...........................] - ETA: 38s - loss: 0.3618 - acc: 0.8785

 320/2733 [==>...........................] - ETA: 37s - loss: 0.3448 - acc: 0.8844

 352/2733 [==>...........................] - ETA: 37s - loss: 0.3436 - acc: 0.8835

 384/2733 [===>..........................] - ETA: 36s - loss: 0.3751 - acc: 0.8672

 416/2733 [===>..........................] - ETA: 36s - loss: 0.3794 - acc: 0.8702

 448/2733 [===>..........................] - ETA: 35s - loss: 0.3818 - acc: 0.8705

 480/2733 [====>.........................] - ETA: 35s - loss: 0.3916 - acc: 0.8625

 512/2733 [====>.........................] - ETA: 34s - loss: 0.3933 - acc: 0.8613

 544/2733 [====>.........................] - ETA: 34s - loss: 0.3823 - acc: 0.8658

 576/2733 [=====>........................] - ETA: 33s - loss: 0.3852 - acc: 0.8681

 608/2733 [=====>........................] - ETA: 33s - loss: 0.3857 - acc: 0.8651








































































































































Epoch 8/10


  32/2733 [..............................] - ETA: 45s - loss: 0.3867 - acc: 0.8438

  64/2733 [..............................] - ETA: 43s - loss: 0.3570 - acc: 0.8750

  96/2733 [>.............................] - ETA: 43s - loss: 0.3592 - acc: 0.8646

 128/2733 [>.............................] - ETA: 41s - loss: 0.3523 - acc: 0.8516

 160/2733 [>.............................] - ETA: 40s - loss: 0.3681 - acc: 0.8562

 192/2733 [=>............................] - ETA: 40s - loss: 0.3677 - acc: 0.8594

 224/2733 [=>............................] - ETA: 39s - loss: 0.3717 - acc: 0.8616

 256/2733 [=>............................] - ETA: 39s - loss: 0.3817 - acc: 0.8594

 288/2733 [==>...........................] - ETA: 38s - loss: 0.3935 - acc: 0.8542

 320/2733 [==>...........................] - ETA: 38s - loss: 0.4096 - acc: 0.8469

 352/2733 [==>...........................] - ETA: 37s - loss: 0.3812 - acc: 0.8608

 384/2733 [===>..........................] - ETA: 36s - loss: 0.3763 - acc: 0.8620

 416/2733 [===>..........................] - ETA: 36s - loss: 0.3612 - acc: 0.8678

 448/2733 [===>..........................] - ETA: 36s - loss: 0.3680 - acc: 0.8616

 480/2733 [====>.........................] - ETA: 35s - loss: 0.3664 - acc: 0.8625

 512/2733 [====>.........................] - ETA: 35s - loss: 0.3832 - acc: 0.8555

 544/2733 [====>.........................] - ETA: 34s - loss: 0.3829 - acc: 0.8585

 576/2733 [=====>........................] - ETA: 34s - loss: 0.3827 - acc: 0.8576

 608/2733 [=====>........................] - ETA: 33s - loss: 0.3806 - acc: 0.8569








































































































































Epoch 9/10


  32/2733 [..............................] - ETA: 59s - loss: 0.5323 - acc: 0.8438

  64/2733 [..............................] - ETA: 50s - loss: 0.3889 - acc: 0.8750

  96/2733 [>.............................] - ETA: 52s - loss: 0.3479 - acc: 0.8958

 128/2733 [>.............................] - ETA: 56s - loss: 0.3394 - acc: 0.8906

 160/2733 [>.............................] - ETA: 58s - loss: 0.3359 - acc: 0.8812

 192/2733 [=>............................] - ETA: 59s - loss: 0.3439 - acc: 0.8698

 224/2733 [=>............................] - ETA: 56s - loss: 0.3579 - acc: 0.8571

 256/2733 [=>............................] - ETA: 53s - loss: 0.3649 - acc: 0.8633

 288/2733 [==>...........................] - ETA: 51s - loss: 0.3888 - acc: 0.8576

 320/2733 [==>...........................] - ETA: 49s - loss: 0.3839 - acc: 0.8594

 352/2733 [==>...........................] - ETA: 48s - loss: 0.3664 - acc: 0.8693

 384/2733 [===>..........................] - ETA: 47s - loss: 0.3447 - acc: 0.8802

 416/2733 [===>..........................] - ETA: 45s - loss: 0.3375 - acc: 0.8822

 448/2733 [===>..........................] - ETA: 44s - loss: 0.3335 - acc: 0.8839

 480/2733 [====>.........................] - ETA: 43s - loss: 0.3323 - acc: 0.8854

 512/2733 [====>.........................] - ETA: 42s - loss: 0.3354 - acc: 0.8828

 544/2733 [====>.........................] - ETA: 41s - loss: 0.3363 - acc: 0.8805

 576/2733 [=====>........................] - ETA: 40s - loss: 0.3313 - acc: 0.8837

 608/2733 [=====>........................] - ETA: 39s - loss: 0.3446 - acc: 0.8766








































































































































Epoch 10/10


  32/2733 [..............................] - ETA: 52s - loss: 0.1909 - acc: 0.9375

  64/2733 [..............................] - ETA: 50s - loss: 0.2473 - acc: 0.9062

  96/2733 [>.............................] - ETA: 46s - loss: 0.2405 - acc: 0.9167

 128/2733 [>.............................] - ETA: 45s - loss: 0.2632 - acc: 0.9062

 160/2733 [>.............................] - ETA: 44s - loss: 0.2727 - acc: 0.9062

 192/2733 [=>............................] - ETA: 43s - loss: 0.2757 - acc: 0.9062

 224/2733 [=>............................] - ETA: 42s - loss: 0.2627 - acc: 0.9107

 256/2733 [=>............................] - ETA: 41s - loss: 0.2796 - acc: 0.8906

 288/2733 [==>...........................] - ETA: 40s - loss: 0.2602 - acc: 0.8993

 320/2733 [==>...........................] - ETA: 40s - loss: 0.2473 - acc: 0.9062

 352/2733 [==>...........................] - ETA: 39s - loss: 0.2790 - acc: 0.8892

 384/2733 [===>..........................] - ETA: 39s - loss: 0.2841 - acc: 0.8880

 416/2733 [===>..........................] - ETA: 38s - loss: 0.2848 - acc: 0.8846

 448/2733 [===>..........................] - ETA: 38s - loss: 0.2814 - acc: 0.8862

 480/2733 [====>.........................] - ETA: 38s - loss: 0.2816 - acc: 0.8875

 512/2733 [====>.........................] - ETA: 38s - loss: 0.3020 - acc: 0.8828

 544/2733 [====>.........................] - ETA: 37s - loss: 0.2944 - acc: 0.8860

 576/2733 [=====>........................] - ETA: 36s - loss: 0.2957 - acc: 0.8854

 608/2733 [=====>........................] - ETA: 36s - loss: 0.2880 - acc: 0.8898








































































































































 32/908 [>.............................] - ETA: 1s

 64/908 [=>............................]

 - ETA: 1s

 96/908 [==>...........................] - ETA: 1s

128/908 [===>..........................] - ETA: 1s

160/908 [====>.........................] - ETA: 1s

192/908 [=====>........................] - ETA: 1s






















































Test score: 0.919888 
 Test accuracy: 0.740088


#Bi-LSTM Model

In [16]:
model = Sequential()
model.add(Embedding(MAX_FEATURES, 128))
model.add(Bidirectional(LSTM(64, activation='tanh'), merge_mode='concat'))
model.add(Dropout(0.5))
model.add(Dense(4, activation='sigmoid'))
model.compile(loss='binary_crossentropy', optimizer='adamax', metrics=['accuracy'])
model.compile(loss='categorical_crossentropy',
              optimizer = 'rmsprop',
              metrics=['accuracy'])

In [17]:
print('Training model:')
model.fit(x_train_sequence, y_train,
                 batch_size=BATCH_SIZE,
                 nb_epoch=10,
                 validation_data=(x_test_sequence, y_test))
score, accuracy = model.evaluate(x_test_sequence, y_test,
                                        batch_size=BATCH_SIZE)
print('\n')
print('Test score: {0:7.6f} \n Test accuracy: {1:7.6f}'.format(score, accuracy))
model.save('bi-lstm.h5')

Training model:


Train on 2733 samples, validate on 908 samples
Epoch 1/10


  32/2733 [..............................] - ETA: 289s - loss: 1.4358 - acc: 0.0000e+00

  64/2733 [..............................] - ETA: 155s - loss: 1.3669 - acc: 0.3750    

  96/2733 [>.............................] - ETA: 109s - loss: 1.2981 - acc: 0.5000

 128/2733 [>.............................] - ETA: 86s - loss: 1.2402 - acc: 0.5312 

 160/2733 [>.............................] - ETA: 72s - loss: 1.1800 - acc: 0.5687

 192/2733 [=>............................] - ETA: 63s - loss: 1.1326 - acc: 0.5938

 224/2733 [=>............................] - ETA: 56s - loss: 1.0838 - acc: 0.6250

 256/2733 [=>............................] - ETA: 51s - loss: 1.0367 - acc: 0.6445

 288/2733 [==>...........................] - ETA: 47s - loss: 0.9944 - acc: 0.6667

 320/2733 [==>...........................] - ETA: 43s - loss: 0.9974 - acc: 0.6656

 352/2733 [==>...........................] - ETA: 41s - loss: 0.9722 - acc: 0.6761

 384/2733 [===>..........................] - ETA: 38s - loss: 0.9599 - acc: 0.6797

 416/2733 [===>..........................] - ETA: 36s - loss: 0.9395 - acc: 0.6899

 448/2733 [===>..........................] - ETA: 35s - loss: 0.9509 - acc: 0.6808

 480/2733 [====>.........................] - ETA: 33s - loss: 0.9387 - acc: 0.6813

 512/2733 [====>.........................] - ETA: 32s - loss: 0.9248 - acc: 0.6875

 544/2733 [====>.........................] - ETA: 30s - loss: 0.9133 - acc: 0.6893

 576/2733 [=====>........................] - ETA: 29s - loss: 0.9024 - acc: 0.6927

 608/2733 [=====>........................] - ETA: 28s - loss: 0.9024 - acc: 0.6908








































































































































Epoch 2/10


  32/2733 [..............................] - ETA: 26s - loss: 0.9071 - acc: 0.6562

  64/2733 [..............................] - ETA: 24s - loss: 0.7143 - acc: 0.7656

  96/2733 [>.............................] - ETA: 23s - loss: 0.6959 - acc: 0.7708

 128/2733 [>.............................] - ETA: 22s - loss: 0.6741 - acc: 0.7812

 160/2733 [>.............................] - ETA: 22s - loss: 0.6661 - acc: 0.7875

 192/2733 [=>............................] - ETA: 23s - loss: 0.6707 - acc: 0.7812

 224/2733 [=>............................] - ETA: 23s - loss: 0.6607 - acc: 0.7902

 256/2733 [=>............................] - ETA: 22s - loss: 0.6188 - acc: 0.8086

 288/2733 [==>...........................] - ETA: 22s - loss: 0.6231 - acc: 0.8021

 320/2733 [==>...........................] - ETA: 22s - loss: 0.6560 - acc: 0.7844

 352/2733 [==>...........................] - ETA: 23s - loss: 0.6561 - acc: 0.7841

 384/2733 [===>..........................] - ETA: 23s - loss: 0.6693 - acc: 0.7734

 416/2733 [===>..........................] - ETA: 24s - loss: 0.6767 - acc: 0.7668

 448/2733 [===>..........................] - ETA: 23s - loss: 0.6877 - acc: 0.7679

 480/2733 [====>.........................] - ETA: 23s - loss: 0.6868 - acc: 0.7688

 512/2733 [====>.........................] - ETA: 22s - loss: 0.6700 - acc: 0.7773

 544/2733 [====>.........................] - ETA: 22s - loss: 0.6661 - acc: 0.7776

 576/2733 [=====>........................] - ETA: 21s - loss: 0.6688 - acc: 0.7743

 608/2733 [=====>........................] - ETA: 20s - loss: 0.6814 - acc: 0.7648








































































































































Epoch 3/10


  32/2733 [..............................] - ETA: 24s - loss: 0.4649 - acc: 0.8438

  64/2733 [..............................] - ETA: 23s - loss: 0.5399 - acc: 0.8125

  96/2733 [>.............................] - ETA: 22s - loss: 0.5904 - acc: 0.7917

 128/2733 [>.............................] - ETA: 22s - loss: 0.5600 - acc: 0.8047

 160/2733 [>.............................] - ETA: 21s - loss: 0.5149 - acc: 0.8187

 192/2733 [=>............................] - ETA: 21s - loss: 0.5661 - acc: 0.7812

 224/2733 [=>............................] - ETA: 21s - loss: 0.5557 - acc: 0.7857

 256/2733 [=>............................] - ETA: 21s - loss: 0.5539 - acc: 0.7930

 288/2733 [==>...........................] - ETA: 20s - loss: 0.5450 - acc: 0.8021

 320/2733 [==>...........................] - ETA: 21s - loss: 0.5439 - acc: 0.8031

 352/2733 [==>...........................] - ETA: 21s - loss: 0.5467 - acc: 0.8011

 384/2733 [===>..........................] - ETA: 21s - loss: 0.5530 - acc: 0.7969

 416/2733 [===>..........................] - ETA: 20s - loss: 0.5696 - acc: 0.7837

 448/2733 [===>..........................] - ETA: 20s - loss: 0.5737 - acc: 0.7768

 480/2733 [====>.........................] - ETA: 19s - loss: 0.5708 - acc: 0.7792

 512/2733 [====>.........................] - ETA: 19s - loss: 0.5647 - acc: 0.7832

 544/2733 [====>.........................] - ETA: 19s - loss: 0.5559 - acc: 0.7904

 576/2733 [=====>........................] - ETA: 18s - loss: 0.5449 - acc: 0.7934

 608/2733 [=====>........................] - ETA: 18s - loss: 0.5467 - acc: 0.7928








































































































































Epoch 4/10


  32/2733 [..............................] - ETA: 24s - loss: 0.5716 - acc: 0.8125

  64/2733 [..............................] - ETA: 22s - loss: 0.4524 - acc: 0.8594

  96/2733 [>.............................] - ETA: 23s - loss: 0.4657 - acc: 0.8438

 128/2733 [>.............................] - ETA: 22s - loss: 0.4756 - acc: 0.8438

 160/2733 [>.............................] - ETA: 22s - loss: 0.4365 - acc: 0.8688

 192/2733 [=>............................] - ETA: 22s - loss: 0.4189 - acc: 0.8802

 224/2733 [=>............................] - ETA: 21s - loss: 0.4250 - acc: 0.8750

 256/2733 [=>............................] - ETA: 21s - loss: 0.4310 - acc: 0.8672

 288/2733 [==>...........................] - ETA: 21s - loss: 0.4371 - acc: 0.8611

 320/2733 [==>...........................] - ETA: 20s - loss: 0.4646 - acc: 0.8500

 352/2733 [==>...........................] - ETA: 20s - loss: 0.4491 - acc: 0.8523

 384/2733 [===>..........................] - ETA: 20s - loss: 0.4538 - acc: 0.8490

 416/2733 [===>..........................] - ETA: 20s - loss: 0.4580 - acc: 0.8486

 448/2733 [===>..........................] - ETA: 19s - loss: 0.4593 - acc: 0.8482

 480/2733 [====>.........................] - ETA: 19s - loss: 0.4628 - acc: 0.8479

 512/2733 [====>.........................] - ETA: 19s - loss: 0.4603 - acc: 0.8477

 544/2733 [====>.........................] - ETA: 18s - loss: 0.4482 - acc: 0.8511

 576/2733 [=====>........................] - ETA: 18s - loss: 0.4622 - acc: 0.8472

 608/2733 [=====>........................] - ETA: 18s - loss: 0.4485 - acc: 0.8536








































































































































Epoch 5/10


  32/2733 [..............................] - ETA: 21s - loss: 0.5264 - acc: 0.8125

  64/2733 [..............................] - ETA: 21s - loss: 0.4041 - acc: 0.8594

  96/2733 [>.............................] - ETA: 21s - loss: 0.3341 - acc: 0.8854

 128/2733 [>.............................] - ETA: 20s - loss: 0.3344 - acc: 0.8828

 160/2733 [>.............................] - ETA: 20s - loss: 0.3792 - acc: 0.8625

 192/2733 [=>............................] - ETA: 20s - loss: 0.4011 - acc: 0.8438

 224/2733 [=>............................] - ETA: 20s - loss: 0.3932 - acc: 0.8527

 256/2733 [=>............................] - ETA: 20s - loss: 0.3961 - acc: 0.8555

 288/2733 [==>...........................] - ETA: 19s - loss: 0.4086 - acc: 0.8472

 320/2733 [==>...........................] - ETA: 19s - loss: 0.4029 - acc: 0.8469

 352/2733 [==>...........................] - ETA: 19s - loss: 0.3988 - acc: 0.8551

 384/2733 [===>..........................] - ETA: 19s - loss: 0.4037 - acc: 0.8490

 416/2733 [===>..........................] - ETA: 19s - loss: 0.3978 - acc: 0.8534

 448/2733 [===>..........................] - ETA: 19s - loss: 0.3967 - acc: 0.8571

 480/2733 [====>.........................] - ETA: 19s - loss: 0.3982 - acc: 0.8521

 512/2733 [====>.........................] - ETA: 19s - loss: 0.4043 - acc: 0.8516

 544/2733 [====>.........................] - ETA: 18s - loss: 0.4061 - acc: 0.8456

 576/2733 [=====>........................] - ETA: 18s - loss: 0.3973 - acc: 0.8507

 608/2733 [=====>........................] - ETA: 18s - loss: 0.3875 - acc: 0.8553








































































































































Epoch 6/10


  32/2733 [..............................] - ETA: 22s - loss: 0.2723 - acc: 0.9062

  64/2733 [..............................] - ETA: 22s - loss: 0.3667 - acc: 0.8594

  96/2733 [>.............................] - ETA: 21s - loss: 0.3481 - acc: 0.8750

 128/2733 [>.............................] - ETA: 21s - loss: 0.3185 - acc: 0.8906

 160/2733 [>.............................] - ETA: 23s - loss: 0.3096 - acc: 0.8938

 192/2733 [=>............................] - ETA: 23s - loss: 0.3092 - acc: 0.8906

 224/2733 [=>............................] - ETA: 23s - loss: 0.3024 - acc: 0.8973

 256/2733 [=>............................] - ETA: 22s - loss: 0.3175 - acc: 0.8945

 288/2733 [==>...........................] - ETA: 22s - loss: 0.3233 - acc: 0.8958

 320/2733 [==>...........................] - ETA: 22s - loss: 0.3380 - acc: 0.8938

 352/2733 [==>...........................] - ETA: 21s - loss: 0.3245 - acc: 0.8977

 384/2733 [===>..........................] - ETA: 21s - loss: 0.3509 - acc: 0.8906

 416/2733 [===>..........................] - ETA: 21s - loss: 0.3488 - acc: 0.8942

 448/2733 [===>..........................] - ETA: 20s - loss: 0.3497 - acc: 0.8929

 480/2733 [====>.........................] - ETA: 20s - loss: 0.3475 - acc: 0.8938

 512/2733 [====>.........................] - ETA: 19s - loss: 0.3336 - acc: 0.9004

 544/2733 [====>.........................] - ETA: 19s - loss: 0.3219 - acc: 0.9026

 576/2733 [=====>........................] - ETA: 19s - loss: 0.3371 - acc: 0.9010

 608/2733 [=====>........................] - ETA: 18s - loss: 0.3323 - acc: 0.9013








































































































































Epoch 7/10


  32/2733 [..............................] - ETA: 23s - loss: 0.3102 - acc: 0.9062

  64/2733 [..............................] - ETA: 23s - loss: 0.3928 - acc: 0.8594

  96/2733 [>.............................] - ETA: 22s - loss: 0.3619 - acc: 0.8542

 128/2733 [>.............................] - ETA: 22s - loss: 0.3321 - acc: 0.8828

 160/2733 [>.............................] - ETA: 22s - loss: 0.3007 - acc: 0.9000

 192/2733 [=>............................] - ETA: 21s - loss: 0.2816 - acc: 0.9062

 224/2733 [=>............................] - ETA: 21s - loss: 0.2702 - acc: 0.9062

 256/2733 [=>............................] - ETA: 21s - loss: 0.2640 - acc: 0.9102

 288/2733 [==>...........................] - ETA: 21s - loss: 0.2515 - acc: 0.9167

 320/2733 [==>...........................] - ETA: 20s - loss: 0.2732 - acc: 0.9062

 352/2733 [==>...........................] - ETA: 20s - loss: 0.2843 - acc: 0.9034

 384/2733 [===>..........................] - ETA: 19s - loss: 0.2770 - acc: 0.9062

 416/2733 [===>..........................] - ETA: 19s - loss: 0.2751 - acc: 0.9062

 448/2733 [===>..........................] - ETA: 19s - loss: 0.2729 - acc: 0.9062

 480/2733 [====>.........................] - ETA: 18s - loss: 0.2954 - acc: 0.9000

 512/2733 [====>.........................] - ETA: 18s - loss: 0.2889 - acc: 0.9004

 544/2733 [====>.........................] - ETA: 18s - loss: 0.2751 - acc: 0.9062

 576/2733 [=====>........................] - ETA: 18s - loss: 0.2723 - acc: 0.9045

 608/2733 [=====>........................] - ETA: 18s - loss: 0.2759 - acc: 0.9046








































































































































Epoch 8/10


  32/2733 [..............................] - ETA: 25s - loss: 0.2933 - acc: 0.9062

  64/2733 [..............................] - ETA: 25s - loss: 0.2585 - acc: 0.9219

  96/2733 [>.............................] - ETA: 24s - loss: 0.2179 - acc: 0.9271

 128/2733 [>.............................] - ETA: 24s - loss: 0.1883 - acc: 0.9453

 160/2733 [>.............................] - ETA: 23s - loss: 0.2518 - acc: 0.9313

 192/2733 [=>............................] - ETA: 24s - loss: 0.2490 - acc: 0.9323

 224/2733 [=>............................] - ETA: 23s - loss: 0.2314 - acc: 0.9375

 256/2733 [=>............................] - ETA: 23s - loss: 0.2379 - acc: 0.9414

 288/2733 [==>...........................] - ETA: 23s - loss: 0.2230 - acc: 0.9479

 320/2733 [==>...........................] - ETA: 23s - loss: 0.2286 - acc: 0.9437

 352/2733 [==>...........................] - ETA: 23s - loss: 0.2382 - acc: 0.9375

 384/2733 [===>..........................] - ETA: 22s - loss: 0.2287 - acc: 0.9401

 416/2733 [===>..........................] - ETA: 22s - loss: 0.2178 - acc: 0.9423

 448/2733 [===>..........................] - ETA: 21s - loss: 0.2137 - acc: 0.9420

 480/2733 [====>.........................] - ETA: 21s - loss: 0.2166 - acc: 0.9396

 512/2733 [====>.........................] - ETA: 20s - loss: 0.2301 - acc: 0.9316

 544/2733 [====>.........................] - ETA: 20s - loss: 0.2308 - acc: 0.9283

 576/2733 [=====>........................] - ETA: 19s - loss: 0.2242 - acc: 0.9306

 608/2733 [=====>........................] - ETA: 19s - loss: 0.2205 - acc: 0.9326








































































































































Epoch 9/10


  32/2733 [..............................] - ETA: 21s - loss: 0.1202 - acc: 0.9688

  64/2733 [..............................] - ETA: 20s - loss: 0.1234 - acc: 0.9844

  96/2733 [>.............................] - ETA: 20s - loss: 0.1582 - acc: 0.9688

 128/2733 [>.............................] - ETA: 20s - loss: 0.1333 - acc: 0.9766

 160/2733 [>.............................] - ETA: 19s - loss: 0.1291 - acc: 0.9688

 192/2733 [=>............................] - ETA: 19s - loss: 0.1411 - acc: 0.9583

 224/2733 [=>............................] - ETA: 19s - loss: 0.1732 - acc: 0.9509

 256/2733 [=>............................] - ETA: 20s - loss: 0.1910 - acc: 0.9414

 288/2733 [==>...........................] - ETA: 20s - loss: 0.1782 - acc: 0.9479

 320/2733 [==>...........................] - ETA: 20s - loss: 0.1730 - acc: 0.9500

 352/2733 [==>...........................] - ETA: 20s - loss: 0.1807 - acc: 0.9489

 384/2733 [===>..........................] - ETA: 20s - loss: 0.1799 - acc: 0.9505

 416/2733 [===>..........................] - ETA: 20s - loss: 0.1854 - acc: 0.9423

 448/2733 [===>..........................] - ETA: 20s - loss: 0.1787 - acc: 0.9442

 480/2733 [====>.........................] - ETA: 20s - loss: 0.1851 - acc: 0.9375

 512/2733 [====>.........................] - ETA: 19s - loss: 0.1800 - acc: 0.9395

 544/2733 [====>.........................] - ETA: 19s - loss: 0.1842 - acc: 0.9393

 576/2733 [=====>........................] - ETA: 18s - loss: 0.1839 - acc: 0.9392

 608/2733 [=====>........................] - ETA: 18s - loss: 0.1784 - acc: 0.9408








































































































































Epoch 10/10


  32/2733 [..............................] - ETA: 31s - loss: 0.1963 - acc: 0.9375

  64/2733 [..............................] - ETA: 33s - loss: 0.2249 - acc: 0.9219

  96/2733 [>.............................] - ETA: 33s - loss: 0.1738 - acc: 0.9479

 128/2733 [>.............................] - ETA: 30s - loss: 0.1480 - acc: 0.9531

 160/2733 [>.............................] - ETA: 28s - loss: 0.1620 - acc: 0.9437

 192/2733 [=>............................] - ETA: 27s - loss: 0.1382 - acc: 0.9531

 224/2733 [=>............................] - ETA: 26s - loss: 0.1283 - acc: 0.9598

 256/2733 [=>............................] - ETA: 25s - loss: 0.1649 - acc: 0.9492

 288/2733 [==>...........................] - ETA: 24s - loss: 0.1542 - acc: 0.9514

 320/2733 [==>...........................] - ETA: 23s - loss: 0.1507 - acc: 0.9531

 352/2733 [==>...........................] - ETA: 23s - loss: 0.1474 - acc: 0.9545

 384/2733 [===>..........................] - ETA: 22s - loss: 0.1486 - acc: 0.9505

 416/2733 [===>..........................] - ETA: 22s - loss: 0.1761 - acc: 0.9495

 448/2733 [===>..........................] - ETA: 22s - loss: 0.1657 - acc: 0.9531

 480/2733 [====>.........................] - ETA: 21s - loss: 0.1714 - acc: 0.9479

 512/2733 [====>.........................] - ETA: 21s - loss: 0.1631 - acc: 0.9512

 544/2733 [====>.........................] - ETA: 20s - loss: 0.1640 - acc: 0.9485

 576/2733 [=====>........................] - ETA: 20s - loss: 0.1675 - acc: 0.9462

 608/2733 [=====>........................] - ETA: 19s - loss: 0.1669 - acc: 0.9457








































































































































 32/908 [>.............................] - ETA: 1s

 96/908 [==>...........................] - ETA: 1s

160/908 [====>.........................] - ETA: 1s





































Test score: 0.928017 
 Test accuracy: 0.737885


#GRU model

In [45]:
model = Sequential()
model.add(Embedding(MAX_FEATURES, 128))
model.add(GRU(32, dropout_W=0.2, dropout_U=0.2, return_sequences=True))
model.add(GRU(64, dropout_W=0.2, dropout_U=0.2, return_sequences=True))
model.add(GRU(128, dropout_W=0.2, dropout_U=0.2))
model.add(Dense(4, activation='sigmoid'))
model.compile(loss='binary_crossentropy',
              optimizer='rmsprop',
              metrics=['accuracy'])

In [46]:
print('Training model:')
model.fit(x_train_sequence, y_train,
                 batch_size=BATCH_SIZE,
                 nb_epoch=10,
                 validation_data=(x_test_sequence, y_test))
score, accuracy = model.evaluate(x_test_sequence, y_test,
                                        batch_size=BATCH_SIZE)
print('\n')
print('Test score: {0:7.6f} \n Test accuracy: {1:7.6f}'.format(score, accuracy))
model.save('gru.h5')

Training model:


Train on 2733 samples, validate on 908 samples
Epoch 1/10


  32/2733 [..............................] - ETA: 444s - loss: 0.6862 - acc: 0.6484

  64/2733 [..............................] - ETA: 275s - loss: 0.6433 - acc: 0.7852

  96/2733 [>.............................] - ETA: 205s - loss: 0.5812 - acc: 0.8203

 128/2733 [>.............................] - ETA: 168s - loss: 0.5314 - acc: 0.8301

 160/2733 [>.............................] - ETA: 151s - loss: 0.4817 - acc: 0.8484

 192/2733 [=>............................] - ETA: 141s - loss: 0.4583 - acc: 0.8555

 224/2733 [=>............................] - ETA: 128s - loss: 0.4330 - acc: 0.8627

 256/2733 [=>............................] - ETA: 118s - loss: 0.4250 - acc: 0.8623

 288/2733 [==>...........................] - ETA: 111s - loss: 0.4222 - acc: 0.8568

 320/2733 [==>...........................] - ETA: 105s - loss: 0.4235 - acc: 0.8508

 352/2733 [==>...........................] - ETA: 101s - loss: 0.4152 - acc: 0.8537

 384/2733 [===>..........................] - ETA: 95s - loss: 0.4003 - acc: 0.8607 

 416/2733 [===>..........................] - ETA: 90s - loss: 0.3896 - acc: 0.8642

 448/2733 [===>..........................] - ETA: 85s - loss: 0.3905 - acc: 0.8616

 480/2733 [====>.........................] - ETA: 81s - loss: 0.3890 - acc: 0.8604

 512/2733 [====>.........................] - ETA: 78s - loss: 0.3811 - acc: 0.8643

 544/2733 [====>.........................] - ETA: 75s - loss: 0.3805 - acc: 0.8631

 576/2733 [=====>........................] - ETA: 72s - loss: 0.3805 - acc: 0.8611

 608/2733 [=====>........................] - ETA: 69s - loss: 0.3784 - acc: 0.8610








































































































































Epoch 2/10


  32/2733 [..............................] - ETA: 55s - loss: 0.3041 - acc: 0.8672

  64/2733 [..............................] - ETA: 51s - loss: 0.3434 - acc: 0.8516

  96/2733 [>.............................] - ETA: 49s - loss: 0.3128 - acc: 0.8724

 128/2733 [>.............................] - ETA: 50s - loss: 0.3151 - acc: 0.8691

 160/2733 [>.............................] - ETA: 53s - loss: 0.3053 - acc: 0.8703

 192/2733 [=>............................] - ETA: 52s - loss: 0.3068 - acc: 0.8698

 224/2733 [=>............................] - ETA: 52s - loss: 0.3058 - acc: 0.8694

 256/2733 [=>............................] - ETA: 50s - loss: 0.3082 - acc: 0.8682

 288/2733 [==>...........................] - ETA: 49s - loss: 0.3050 - acc: 0.8724

 320/2733 [==>...........................] - ETA: 48s - loss: 0.2976 - acc: 0.8781

 352/2733 [==>...........................] - ETA: 47s - loss: 0.2842 - acc: 0.8842

 384/2733 [===>..........................] - ETA: 47s - loss: 0.2720 - acc: 0.8919

 416/2733 [===>..........................] - ETA: 46s - loss: 0.2756 - acc: 0.8906

 448/2733 [===>..........................] - ETA: 45s - loss: 0.2741 - acc: 0.8895

 480/2733 [====>.........................] - ETA: 44s - loss: 0.2712 - acc: 0.8896

 512/2733 [====>.........................] - ETA: 44s - loss: 0.2732 - acc: 0.8872

 544/2733 [====>.........................] - ETA: 43s - loss: 0.2734 - acc: 0.8874

 576/2733 [=====>........................] - ETA: 42s - loss: 0.2717 - acc: 0.8880

 608/2733 [=====>........................] - ETA: 42s - loss: 0.2675 - acc: 0.8919








































































































































Epoch 3/10


  32/2733 [..............................] - ETA: 59s - loss: 0.1916 - acc: 0.9219

  64/2733 [..............................] - ETA: 54s - loss: 0.2318 - acc: 0.8945

  96/2733 [>.............................] - ETA: 51s - loss: 0.2269 - acc: 0.9036

 128/2733 [>.............................] - ETA: 49s - loss: 0.2303 - acc: 0.9023

 160/2733 [>.............................] - ETA: 48s - loss: 0.2062 - acc: 0.9172

 192/2733 [=>............................] - ETA: 47s - loss: 0.2056 - acc: 0.9167

 224/2733 [=>............................] - ETA: 46s - loss: 0.2191 - acc: 0.9141

 256/2733 [=>............................] - ETA: 45s - loss: 0.2179 - acc: 0.9150

 288/2733 [==>...........................] - ETA: 45s - loss: 0.2086 - acc: 0.9201

 320/2733 [==>...........................] - ETA: 44s - loss: 0.2133 - acc: 0.9187

 352/2733 [==>...........................] - ETA: 43s - loss: 0.2098 - acc: 0.9183

 384/2733 [===>..........................] - ETA: 43s - loss: 0.2066 - acc: 0.9193

 416/2733 [===>..........................] - ETA: 42s - loss: 0.2051 - acc: 0.9189

 448/2733 [===>..........................] - ETA: 41s - loss: 0.2033 - acc: 0.9191

 480/2733 [====>.........................] - ETA: 40s - loss: 0.2041 - acc: 0.9182

 512/2733 [====>.........................] - ETA: 40s - loss: 0.2057 - acc: 0.9170

 544/2733 [====>.........................] - ETA: 39s - loss: 0.2087 - acc: 0.9168

 576/2733 [=====>........................] - ETA: 39s - loss: 0.2097 - acc: 0.9180

 608/2733 [=====>........................] - ETA: 38s - loss: 0.2112 - acc: 0.9182








































































































































Epoch 4/10


  32/2733 [..............................] - ETA: 51s - loss: 0.2284 - acc: 0.8984

  64/2733 [..............................] - ETA: 54s - loss: 0.2448 - acc: 0.8867

  96/2733 [>.............................] - ETA: 55s - loss: 0.2087 - acc: 0.9115

 128/2733 [>.............................] - ETA: 53s - loss: 0.1767 - acc: 0.9297

 160/2733 [>.............................] - ETA: 51s - loss: 0.1701 - acc: 0.9313

 192/2733 [=>............................] - ETA: 50s - loss: 0.1829 - acc: 0.9258

 224/2733 [=>............................] - ETA: 48s - loss: 0.1831 - acc: 0.9286

 256/2733 [=>............................] - ETA: 47s - loss: 0.1841 - acc: 0.9297

 288/2733 [==>...........................] - ETA: 46s - loss: 0.1824 - acc: 0.9288

 320/2733 [==>...........................] - ETA: 45s - loss: 0.1920 - acc: 0.9234

 352/2733 [==>...........................] - ETA: 44s - loss: 0.1881 - acc: 0.9247

 384/2733 [===>..........................] - ETA: 43s - loss: 0.1858 - acc: 0.9258

 416/2733 [===>..........................] - ETA: 43s - loss: 0.1849 - acc: 0.9279

 448/2733 [===>..........................] - ETA: 42s - loss: 0.1819 - acc: 0.9286

 480/2733 [====>.........................] - ETA: 41s - loss: 0.1823 - acc: 0.9297

 512/2733 [====>.........................] - ETA: 41s - loss: 0.1868 - acc: 0.9272

 544/2733 [====>.........................] - ETA: 40s - loss: 0.1828 - acc: 0.9283

 576/2733 [=====>........................] - ETA: 39s - loss: 0.1810 - acc: 0.9297

 608/2733 [=====>........................] - ETA: 39s - loss: 0.1867 - acc: 0.9268








































































































































Epoch 5/10


  32/2733 [..............................] - ETA: 52s - loss: 0.1342 - acc: 0.9375

  64/2733 [..............................] - ETA: 51s - loss: 0.1116 - acc: 0.9531

  96/2733 [>.............................] - ETA: 49s - loss: 0.1214 - acc: 0.9557

 128/2733 [>.............................] - ETA: 47s - loss: 0.1286 - acc: 0.9512

 160/2733 [>.............................] - ETA: 46s - loss: 0.1755 - acc: 0.9313

 192/2733 [=>............................] - ETA: 46s - loss: 0.1708 - acc: 0.9323

 224/2733 [=>............................] - ETA: 46s - loss: 0.1861 - acc: 0.9275

 256/2733 [=>............................] - ETA: 46s - loss: 0.1791 - acc: 0.9297

 288/2733 [==>...........................] - ETA: 46s - loss: 0.1709 - acc: 0.9340

 320/2733 [==>...........................] - ETA: 45s - loss: 0.1609 - acc: 0.9383

 352/2733 [==>...........................] - ETA: 44s - loss: 0.1583 - acc: 0.9375

 384/2733 [===>..........................] - ETA: 44s - loss: 0.1581 - acc: 0.9368

 416/2733 [===>..........................] - ETA: 43s - loss: 0.1531 - acc: 0.9393

 448/2733 [===>..........................] - ETA: 43s - loss: 0.1536 - acc: 0.9386

 480/2733 [====>.........................] - ETA: 42s - loss: 0.1493 - acc: 0.9406

 512/2733 [====>.........................] - ETA: 41s - loss: 0.1496 - acc: 0.9409

 544/2733 [====>.........................] - ETA: 41s - loss: 0.1466 - acc: 0.9421

 576/2733 [=====>........................] - ETA: 40s - loss: 0.1454 - acc: 0.9423

 608/2733 [=====>........................] - ETA: 40s - loss: 0.1462 - acc: 0.9428








































































































































Epoch 6/10


  32/2733 [..............................] - ETA: 53s - loss: 0.0916 - acc: 0.9688

  64/2733 [..............................] - ETA: 50s - loss: 0.0915 - acc: 0.9648

  96/2733 [>.............................] - ETA: 47s - loss: 0.0799 - acc: 0.9714

 128/2733 [>.............................] - ETA: 47s - loss: 0.1061 - acc: 0.9590

 160/2733 [>.............................] - ETA: 47s - loss: 0.1143 - acc: 0.9563

 192/2733 [=>............................] - ETA: 46s - loss: 0.1171 - acc: 0.9531

 224/2733 [=>............................] - ETA: 47s - loss: 0.1181 - acc: 0.9542

 256/2733 [=>............................] - ETA: 47s - loss: 0.1186 - acc: 0.9541

 288/2733 [==>...........................] - ETA: 46s - loss: 0.1175 - acc: 0.9557

 320/2733 [==>...........................] - ETA: 45s - loss: 0.1234 - acc: 0.9531

 352/2733 [==>...........................] - ETA: 44s - loss: 0.1188 - acc: 0.9560

 384/2733 [===>..........................] - ETA: 43s - loss: 0.1144 - acc: 0.9577

 416/2733 [===>..........................] - ETA: 43s - loss: 0.1079 - acc: 0.9603

 448/2733 [===>..........................] - ETA: 42s - loss: 0.1070 - acc: 0.9615

 480/2733 [====>.........................] - ETA: 42s - loss: 0.1063 - acc: 0.9609

 512/2733 [====>.........................] - ETA: 41s - loss: 0.1106 - acc: 0.9580

 544/2733 [====>.........................] - ETA: 40s - loss: 0.1134 - acc: 0.9563

 576/2733 [=====>........................] - ETA: 41s - loss: 0.1215 - acc: 0.9531

 608/2733 [=====>........................] - ETA: 41s - loss: 0.1212 - acc: 0.9539








































































































































Epoch 7/10


  32/2733 [..............................] - ETA: 50s - loss: 0.1885 - acc: 0.9297

  64/2733 [..............................] - ETA: 50s - loss: 0.1159 - acc: 0.9609

  96/2733 [>.............................] - ETA: 49s - loss: 0.1190 - acc: 0.9609

 128/2733 [>.............................] - ETA: 48s - loss: 0.1261 - acc: 0.9531

 160/2733 [>.............................] - ETA: 49s - loss: 0.1224 - acc: 0.9547

 192/2733 [=>............................] - ETA: 48s - loss: 0.1108 - acc: 0.9609

 224/2733 [=>............................] - ETA: 48s - loss: 0.1045 - acc: 0.9643

 256/2733 [=>............................] - ETA: 47s - loss: 0.1039 - acc: 0.9639

 288/2733 [==>...........................] - ETA: 46s - loss: 0.1031 - acc: 0.9627

 320/2733 [==>...........................] - ETA: 45s - loss: 0.1011 - acc: 0.9641

 352/2733 [==>...........................] - ETA: 44s - loss: 0.1059 - acc: 0.9624

 384/2733 [===>..........................] - ETA: 44s - loss: 0.1034 - acc: 0.9629

 416/2733 [===>..........................] - ETA: 43s - loss: 0.1090 - acc: 0.9615

 448/2733 [===>..........................] - ETA: 43s - loss: 0.1081 - acc: 0.9626

 480/2733 [====>.........................] - ETA: 42s - loss: 0.1127 - acc: 0.9604

 512/2733 [====>.........................] - ETA: 41s - loss: 0.1095 - acc: 0.9614

 544/2733 [====>.........................] - ETA: 41s - loss: 0.1058 - acc: 0.9623

 576/2733 [=====>........................] - ETA: 40s - loss: 0.1051 - acc: 0.9627

 608/2733 [=====>........................] - ETA: 40s - loss: 0.1065 - acc: 0.9622








































































































































Epoch 8/10


  32/2733 [..............................] - ETA: 58s - loss: 0.0922 - acc: 0.9688

  64/2733 [..............................] - ETA: 52s - loss: 0.0758 - acc: 0.9727

  96/2733 [>.............................] - ETA: 51s - loss: 0.0740 - acc: 0.9714

 128/2733 [>.............................] - ETA: 51s - loss: 0.0647 - acc: 0.9785

 160/2733 [>.............................] - ETA: 49s - loss: 0.0578 - acc: 0.9828

 192/2733 [=>............................] - ETA: 47s - loss: 0.0713 - acc: 0.9779

 224/2733 [=>............................] - ETA: 46s - loss: 0.0679 - acc: 0.9799

 256/2733 [=>............................] - ETA: 45s - loss: 0.0634 - acc: 0.9824

 288/2733 [==>...........................] - ETA: 44s - loss: 0.0590 - acc: 0.9844

 320/2733 [==>...........................] - ETA: 44s - loss: 0.0673 - acc: 0.9789

 352/2733 [==>...........................] - ETA: 44s - loss: 0.0637 - acc: 0.9801

 384/2733 [===>..........................] - ETA: 43s - loss: 0.0755 - acc: 0.9753

 416/2733 [===>..........................] - ETA: 43s - loss: 0.0779 - acc: 0.9736

 448/2733 [===>..........................] - ETA: 42s - loss: 0.0784 - acc: 0.9721

 480/2733 [====>.........................] - ETA: 42s - loss: 0.0798 - acc: 0.9714

 512/2733 [====>.........................] - ETA: 41s - loss: 0.0910 - acc: 0.9683

 544/2733 [====>.........................] - ETA: 40s - loss: 0.0897 - acc: 0.9692

 576/2733 [=====>........................] - ETA: 40s - loss: 0.0874 - acc: 0.9701

 608/2733 [=====>........................] - ETA: 39s - loss: 0.0873 - acc: 0.9700








































































































































Epoch 9/10


  32/2733 [..............................] - ETA: 47s - loss: 0.0645 - acc: 0.9766

  64/2733 [..............................] - ETA: 49s - loss: 0.0665 - acc: 0.9805

  96/2733 [>.............................] - ETA: 47s - loss: 0.0567 - acc: 0.9844

 128/2733 [>.............................] - ETA: 46s - loss: 0.0490 - acc: 0.9863

 160/2733 [>.............................] - ETA: 45s - loss: 0.0474 - acc: 0.9859

 192/2733 [=>............................] - ETA: 45s - loss: 0.0554 - acc: 0.9818

 224/2733 [=>............................] - ETA: 44s - loss: 0.0661 - acc: 0.9754

 256/2733 [=>............................] - ETA: 43s - loss: 0.0745 - acc: 0.9688

 288/2733 [==>...........................] - ETA: 42s - loss: 0.0793 - acc: 0.9670

 320/2733 [==>...........................] - ETA: 42s - loss: 0.0791 - acc: 0.9672

 352/2733 [==>...........................] - ETA: 42s - loss: 0.0744 - acc: 0.9702

 384/2733 [===>..........................] - ETA: 41s - loss: 0.0696 - acc: 0.9727

 416/2733 [===>..........................] - ETA: 40s - loss: 0.0713 - acc: 0.9724

 448/2733 [===>..........................] - ETA: 40s - loss: 0.0681 - acc: 0.9738

 480/2733 [====>.........................] - ETA: 39s - loss: 0.0715 - acc: 0.9740

 512/2733 [====>.........................] - ETA: 39s - loss: 0.0738 - acc: 0.9736

 544/2733 [====>.........................] - ETA: 38s - loss: 0.0755 - acc: 0.9733

 576/2733 [=====>........................] - ETA: 38s - loss: 0.0749 - acc: 0.9740

 608/2733 [=====>........................] - ETA: 37s - loss: 0.0718 - acc: 0.9749








































































































































Epoch 10/10


  32/2733 [..............................] - ETA: 53s - loss: 0.0596 - acc: 0.9766

  64/2733 [..............................] - ETA: 52s - loss: 0.0676 - acc: 0.9805

  96/2733 [>.............................] - ETA: 51s - loss: 0.0830 - acc: 0.9688

 128/2733 [>.............................] - ETA: 49s - loss: 0.0777 - acc: 0.9668

 160/2733 [>.............................] - ETA: 48s - loss: 0.0708 - acc: 0.9703

 192/2733 [=>............................] - ETA: 47s - loss: 0.0764 - acc: 0.9727

 224/2733 [=>............................] - ETA: 46s - loss: 0.0676 - acc: 0.9766

 256/2733 [=>............................] - ETA: 45s - loss: 0.0605 - acc: 0.9795

 288/2733 [==>...........................] - ETA: 44s - loss: 0.0655 - acc: 0.9783

 320/2733 [==>...........................] - ETA: 44s - loss: 0.0665 - acc: 0.9781

 352/2733 [==>...........................] - ETA: 43s - loss: 0.0699 - acc: 0.9766

 384/2733 [===>..........................] - ETA: 43s - loss: 0.0715 - acc: 0.9766

 416/2733 [===>..........................] - ETA: 43s - loss: 0.0711 - acc: 0.9760

 448/2733 [===>..........................] - ETA: 42s - loss: 0.0699 - acc: 0.9754

 480/2733 [====>.........................] - ETA: 41s - loss: 0.0725 - acc: 0.9745

 512/2733 [====>.........................] - ETA: 41s - loss: 0.0696 - acc: 0.9756

 544/2733 [====>.........................] - ETA: 40s - loss: 0.0691 - acc: 0.9761

 576/2733 [=====>........................] - ETA: 39s - loss: 0.0696 - acc: 0.9757

 608/2733 [=====>........................] - ETA: 39s - loss: 0.0676 - acc: 0.9762








































































































































 32/908 [>.............................] - ETA: 2s

 64/908 [=>............................] - ETA: 2s

 96/908 [==>...........................] - ETA: 2s

128/908 [===>..........................] - ETA: 2s

160/908 [====>.........................] - ETA: 1s

192/908 [=====>........................] - ETA: 1s


















































Test score: 0.443774 
 Test accuracy: 0.874449


#Analisys

###Evaluating word counts in test corpus

In [36]:
test_corpus = ' '.join(test_data['sentence'].tolist()).split()
word_counts = {}
for word in test_corpus:
    if word in tokenizer.word_index and word in word_counts:
        word_counts[word] += 1
    else:
        word_counts[word] = 1

In [None]:
lexicon_df = pd.DataFrame.from_dict(word_counts, orient='index')
lexicon_df.reset_index(level=0, inplace=True)
lexicon_df = lexicon_df.rename(columns={'index': 'word', 0: 'count'})
lexicon_df = lexicon_df.loc[lexicon_df['count'] > 3]

## Bi-directional LSTM

In [147]:
influential_words = []
for word in lexicon_df['word']:
    data_with_word = test_data[test_data['sentence'].str.contains(' ' + word + ' ')]
    x_test_with_word = sequence.pad_sequences(tokenizer.texts_to_sequences(data_with_word['sentence'].tolist()))
    y_test_of_selected = to_categorical(le.transform(data_with_word['polarity']), 4)
    score_with_word, accuracy_with_word = model.evaluate(x_test_with_word, y_test_of_selected, batch_size=BATCH_SIZE)
    data_without_word = data_with_word['sentence'].str.replace(word, '')
    x_test_without_word = sequence.pad_sequences(tokenizer.texts_to_sequences(data_without_word.tolist()))
    score_without_word, accuracy_without_word = model.evaluate(x_test_without_word, y_test_of_selected, batch_size=BATCH_SIZE)
    influential_words.append({'word': word, 'included': accuracy_with_word, 'erased': accuracy_without_word, 'delta': accuracy_with_word - accuracy_without_word })

 32/175 [====>.........................] - ETA: 0s











 32/175 [====>.........................] - ETA: 0s




















































































































































































































































































 32/175 [====>.........................] - ETA: 0s





 32/175 [====>.........................] - ETA: 0s





























 - 0s








 32/554 [>.............................] - ETA: 0s



 96/554 [====>.........................] - ETA: 0s




















 32/554 [>.............................] - ETA: 0s

 96/554 [====>.........................] - ETA: 0s





























































































































































 32/150 [=====>........................] - ETA: 0s






 32/150 [=====>........................] - ETA: 0s











































 32/448 [=>............................] - ETA: 0s

 64/448 [===>..........................] - ETA: 0s
















 32/448 [=>............................] - ETA: 0s

 96/448 [=====>........................] - ETA: 0s























































































































 - 0s







































































 32/193 [===>..........................] - ETA: 0s









 32/193 [===>..........................] - ETA: 0s





























































































 32/219 [===>..........................] - ETA: 0s










 32/219 [===>..........................] - ETA: 0s


























































 - 0s








































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































 - 0s








 - ETA: 0s






















































































































































































































 - 0s





























































































































































































































































 32/853 [>.............................] - ETA: 1s

 64/853 [=>............................] - ETA: 1s

128/853 [===>..........................] - ETA: 1s

160/853 [====>.........................] - ETA: 1s























 32/853 [>.............................] - ETA: 1s

 96/853 [==>...........................] - ETA: 0s

160/853 [====>.........................] - ETA: 0s


























 32/638 [>.............................] - ETA: 1s

 96/638 [===>..........................] - ETA: 0s



















 32/638 [>.............................] - ETA: 0s

 96/638 [===>..........................] - ETA: 0s













































































































































































































































































































































































































































































































 32/790 [>.............................] - ETA: 1s

 96/790 [==>...........................] - ETA: 0s

160/790 [=====>........................] - ETA: 0s
























 32/790 [>.............................] - ETA: 0s

 96/790 [==>...........................] - ETA: 0s

160/790 [=====>........................] - ETA: 0s








































































































 - 0s























































































 - 0s


















































































In [151]:
results = pd.DataFrame(influential_words)

### Words that do not influence classification accuracy

In [186]:
from math import isclose, log
results[results['delta'].apply(lambda x: isclose(x, 0.))]

Unnamed: 0,delta,erased,included,word
3,0.0,0.750000,0.750000,ресторанчик
5,0.0,0.909091,0.909091,огромный
9,0.0,0.710526,0.710526,салат
11,0.0,0.545455,0.545455,мясо
12,0.0,0.666667,0.666667,курица
13,0.0,0.571429,0.571429,нежный
14,0.0,0.909091,0.909091,десерт
16,0.0,1.000000,1.000000,вежливый
17,0.0,0.750000,0.750000,улыбчивый
18,0.0,0.600000,0.600000,единственный


###Words that influence classification accuracy

In [179]:
influential = results[results['delta'].apply(lambda x: not isclose(x, 0.))]

In [180]:
influential[influential['delta'] > 0]

Unnamed: 0,delta,erased,included,word
0,5.714286e-03,0.840000,0.845714,очень
1,1.428571e-01,0.714286,0.857143,милый
2,4.761904e-02,0.761905,0.809524,уютный
7,1.428571e-02,0.785714,0.800000,вкусный
8,1.612905e-02,0.758065,0.774194,блюдо
10,1.666667e-01,0.833333,1.000000,свежий
15,1.333333e-01,0.733333,0.866667,официантка
20,1.408451e-02,0.816901,0.830986,понравиться
22,5.555556e-02,0.638889,0.694444,ждать
24,1.666666e-01,0.666667,0.833333,видимо


In [182]:
influential[influential['delta'] < 0]

Unnamed: 0,delta,erased,included,word
4,-4.761904e-02,0.619048,0.571429,цена
6,-1.363636e-01,0.818182,0.681818,порция
21,-4.761904e-02,0.619048,0.571429,долго
25,-4.761905e-02,0.738095,0.690476,большой
29,-1.402462e-08,0.627451,0.627451,стол
35,-3.703701e-02,0.814815,0.777778,общий
36,-4.545450e-02,0.772727,0.727273,впечатление
40,-2.597403e-02,0.792208,0.766234,ресторан
43,-1.212121e-01,0.818182,0.696970,уже
48,-1.805054e-03,0.707581,0.705776,я


###Word importance analysis

In [184]:
def get_probabilities_gold(probability_model, y):
    probabilities_gold = []
    for x,y in zip(probability_model, y):
        proba = (max(x*y))
        probabilities_gold.append(proba)
    return probabilities_gold

In [188]:
word_importance = []
for word in lexicon_df['word']:
    data_with_word = test_data[test_data['sentence'].str.contains(' ' + word + ' ')]
    x_test_with_word = sequence.pad_sequences(tokenizer.texts_to_sequences(data_with_word['sentence'].tolist()))
    y_test_of_selected = to_categorical(le.transform(data_with_word['polarity']), 4)
    probability_with_word = model.predict_proba(x_test_with_word)
    probabilities_gold_with_word = get_probabilities_gold(probability_with_word, y_test_of_selected)
    data_without_word = data_with_word['sentence'].str.replace(word, '')
    x_test_without_word = sequence.pad_sequences(tokenizer.texts_to_sequences(data_without_word.tolist()))
    probability_without_word = model.predict_proba(x_test_without_word)
    probabilities_gold_without_word = get_probabilities_gold(probability_without_word, y_test_of_selected)
    importance_summ = 0
    for proba_gold_with_word, proba_gold_without_word in zip(probabilities_gold_with_word, probabilities_gold_without_word):
        importance_summ += 1 - (log(proba_gold_without_word)/log(proba_gold_with_word))
    importance_of_word = importance_summ / len(y_test_of_selected)
    word_importance.append({'word': word, 'importance' : importance_of_word})

 32/143 [=====>........................] - ETA: 0s












 32/143 [=====>........................] - ETA: 0s




















































































 - 0s











































































































































 - 0s











































































































 32/152 [=====>........................] - ETA: 0s












 32/152 [=====>........................] - ETA: 0s


































































































































































































































































 - 0s




















































































































































































































































































































































































































































































































































































































































































































 - 0s





























































































































































































































































































































































































































































































































 - 0s























 - 0s










































































































































































































































































































































































































































































 - 0s











































































































































































































































































































 - 0s






















































































































































































































































































 - 0s






















































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































In [196]:
importance_results = pd.DataFrame(word_importance)

In [199]:
importance_results[importance_results['importance'] < 0]

Unnamed: 0,importance,word
0,-0.133472,очень
1,-1.091612,милый
2,-1.838773,уютный
6,-0.128809,порция
7,-0.877298,вкусный
9,-0.022085,салат
10,-1.547786,свежий
11,-0.020627,мясо
13,-0.596258,нежный
15,-0.701690,официантка
