In [56]:
import pandas as pd
import math
import keras
from keras.layers import Dense,Embedding, Flatten, Conv1D, GlobalMaxPooling1D, LSTM, Bidirectional, Dropout
from keras.preprocessing.text import text_to_word_sequence,Tokenizer
from keras.preprocessing.sequence import pad_sequences
from keras.utils import to_categorical

In [59]:
train = pd.read_csv("../upsampled_train_test/upsampled_train.csv").drop_duplicates().drop(["is_charade"],axis=1)
test = pd.read_csv("../upsampled_train_test/test.csv").drop_duplicates().drop(["is_charade"],axis=1)

In [60]:
sizes = train.groupby('category').count()['is_palindrome'].values

In [61]:
max_size = sizes.max()

In [62]:
class_weight = dict(enumerate(max_size/sizes))

In [63]:
tokenizer = Tokenizer()
tokenizer.fit_on_texts(pd.concat([train,test]).clue.tolist())

In [64]:
train_x = pad_sequences(tokenizer.texts_to_sequences(train.clue.tolist()),maxlen=15)
test_x = pad_sequences(tokenizer.texts_to_sequences(test.clue.tolist()),maxlen=15)

In [65]:
train_y = train[train.columns[2:-1]]*1

In [66]:
test_y = test[test.columns[2:-1]]*1

In [67]:
filepath="./models/1xBilstm-{epoch:02d}-{loss:.2f}-{categorical_accuracy:.2f}-{val_loss:.2f}-{val_categorical_accuracy:.2f}-singlelabel.hdf5"
saveModelCallBack = keras.callbacks.ModelCheckpoint(filepath, monitor='val_loss', verbose=1, save_best_only=False, save_weights_only=False, mode='auto', period=1)
tbCallBack = keras.callbacks.TensorBoard(log_dir='./Graph', histogram_freq=0, write_graph=True, write_images=True)
from keras.callbacks import Callback

class TestCallback(Callback):
    def __init__(self, test_data):
        self.test_data = test_data

    def on_epoch_end(self, epoch, logs={}):
        x, y = self.test_data
        loss, acc = self.model.evaluate(x, y, verbose=0)
        print('\nTesting loss: {}, acc: {}\n'.format(loss, acc))

callbacks_list = [saveModelCallBack,tbCallBack,TestCallback((test_x, test_y))]


In [91]:
model = keras.Sequential()
model.add(Embedding(len(tokenizer.index_word)+1, 128))
model.add(Bidirectional(LSTM(128, dropout=0.5)))
model.add(Dense(13, activation='softmax'))

In [92]:
model.compile(loss='categorical_crossentropy', optimizer='rmsprop', metrics=['categorical_accuracy'])

In [93]:
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_5 (Embedding)      (None, None, 128)         3181184   
_________________________________________________________________
bidirectional_1 (Bidirection (None, 256)               263168    
_________________________________________________________________
dense_5 (Dense)              (None, 13)                3341      
Total params: 3,447,693
Trainable params: 3,447,693
Non-trainable params: 0
_________________________________________________________________


In [94]:
history = model.fit(train_x,train_y ,validation_data=(test_x,test_y), batch_size=128, epochs=15,shuffle=True,callbacks=callbacks_list,class_weight=class_weight,initial_epoch=1)


Train on 28187 samples, validate on 3138 samples
Epoch 2/15

Epoch 00002: saving model to ./models/1xBilstm-02-39.77-0.27-2.98-0.29-singlelabel.hdf5

Testing loss: 2.979603261078445, acc: 0.2887189292543021

Epoch 3/15

Epoch 00003: saving model to ./models/1xBilstm-03-30.15-0.31-3.00-0.32-singlelabel.hdf5

Testing loss: 2.997909170263812, acc: 0.3161249203314213

Epoch 4/15

Epoch 00004: saving model to ./models/1xBilstm-04-24.38-0.34-3.60-0.27-singlelabel.hdf5

Testing loss: 3.60103999740231, acc: 0.27023581899298915

Epoch 5/15

Epoch 00005: saving model to ./models/1xBilstm-05-21.06-0.37-2.12-0.36-singlelabel.hdf5

Testing loss: 2.124628529949808, acc: 0.35532186105799873

Epoch 6/15

Epoch 00006: saving model to ./models/1xBilstm-06-18.23-0.41-2.20-0.37-singlelabel.hdf5

Testing loss: 2.203915230250799, acc: 0.3674314850223072

Epoch 7/15

KeyboardInterrupt: 

In [290]:
def simple_model():                                           
    model = keras.Sequential()
    model.add(Embedding(len(tokenizer.index_word)+1, 128,input_length=15))
    model.add(LSTM(128, dropout=0.5))
    model.add(Dense(1, activation='relu'))
    model.compile(loss='binary_crossentropy', optimizer='rmsprop', metrics=['accuracy'])
    return model

In [291]:
model = simple_model()
model.fit(train_data,train_data_out,batch_size=1024)

Epoch 1/1


<keras.callbacks.History at 0x17c519b38>

In [239]:
model.evaluate(test_data,test_data_out)



[1.9973295485616263, 0.18387507966857872]

In [293]:
from sklearn.ensemble import AdaBoostRegressor

In [294]:
from keras.wrappers.scikit_learn import KerasRegressor

In [295]:
bilstm_estimator = KerasRegressor(build_fn= simple_model, epochs=1, batch_size=1024, verbose=1)

In [297]:
import numpy as np
# train_data_out_labels = train_data_out.values.argmax(axis=1)
boosted_lstm = AdaBoostRegressor(base_estimator= bilstm_estimator)
boosted_lstm.fit(train_data, train_data_out)# scale your training data 

Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1


AdaBoostRegressor(base_estimator=<keras.wrappers.scikit_learn.KerasRegressor object at 0x18a6db8d0>,
         learning_rate=1.0, loss='linear', n_estimators=50,
         random_state=None)

In [298]:
boosted_lstm.score(test_data,test_data_out)



-3.974468899422262

In [215]:
train_data_out_labels = [np.argmax(row).astype(int) for row in train_data_out.values]

In [218]:
type(train_data_out_labels.astype(int))

AttributeError: 'list' object has no attribute 'astype'