In [1]:
import pandas as pd
import numpy as np
import pickle

from tensorflow import keras
from keras.preprocessing.sequence import pad_sequences
from keras.preprocessing.text import Tokenizer
from keras.utils import to_categorical
from keras.preprocessing.sequence import pad_sequences
from keras.layers import Embedding
from keras.layers import Input
from keras.layers import Conv1D
from keras.layers import MaxPooling1D
from keras.layers import Input, Dense, Embedding, SpatialDropout1D, Dropout, add, concatenate
from keras.layers import CuDNNLSTM, Bidirectional, GlobalMaxPooling1D, GlobalAveragePooling1D
from keras.layers import Flatten
from keras.layers import Dropout
from keras.layers import Dense
from keras.optimizers import RMSprop, Adam, Nadam
from keras.callbacks import EarlyStopping, ReduceLROnPlateau, ModelCheckpoint
from keras.models import Model
from keras.models import load_model
from keras.utils.training_utils import multi_gpu_model
#from keras.utils.training_utils import multi_gpu_model
#from keras.layers import GlobalAveragePooling2D, Dense, Input
#from keras.models import Model, model_from_json, load_model

Using TensorFlow backend.


In [2]:
# pickle filename
word_pickle_filename = '/data/jigsaw/pickle/word_14_665013221336055259.pkl'
train_vector_filename = '/data/jigsaw/pickle/train_vectorized_14_665013221336055259.pkl'
model_h5_filename = '/data/jigsaw/model/model_14_665013221336055259.h5'

In [25]:
MAXWORD = 250
BATCHSIZE = 256
EMBEDDINGS_DIMENSION = 300
NUMWORD = len(pickle.load(open(word_pickle_filename,'rb')))
LEARNING_RATE = 0.002
DROPOUT_RATE = 0.3

# List all identities
identity_columns = [
    'male', 'female', 'homosexual_gay_or_lesbian', 'christian', 'jewish',
    'muslim', 'black', 'white', 'psychiatric_or_mental_illness']

In [36]:
# Convert taget and identity columns to booleans
def convert_to_bool(df, col_name):
    df[col_name] = np.where(df[col_name] >= 0.5, True, False)
    
def convert_dataframe_to_bool(df):
    bool_df = df.copy()
    for col in ['target'] + identity_columns:
        convert_to_bool(bool_df, col)
    return bool_df

# Load data

In [37]:
data = pd.read_csv('/data/jigsaw/train.csv.zip', compression='zip')
data.loc[:, 'target_float'] = data.target.values
data.drop(columns='comment_text', inplace=True)
data = convert_dataframe_to_bool(data)

In [38]:
vectorized = pickle.load(open(train_vector_filename,'rb'))
data.loc[:,'vectorized'] = vectorized
data.head(3)

Unnamed: 0,id,target,severe_toxicity,obscene,identity_attack,insult,threat,asian,atheist,bisexual,...,funny,wow,sad,likes,disagree,sexual_explicit,identity_annotator_count,toxicity_annotator_count,target_float,vectorized
0,59848,False,0.0,0.0,0.0,0.0,0.0,,,,...,0,0,0,0,0,0.0,0,4,0.0,"[47612, 24882, 44072, 10132, 24979, 50, 41167,..."
1,59849,False,0.0,0.0,0.0,0.0,0.0,,,,...,0,0,0,0,0,0.0,0,4,0.0,"[47428, 53115, 46, 53448, 47612, 52706, 28554,..."
2,59852,False,0.0,0.0,0.0,0.0,0.0,,,,...,0,0,0,0,0,0.0,0,4,0.0,"[47612, 24882, 46000, 1686, 50442, 12507, 3711..."


In [39]:
# shuffle data
data = data.sample(frac=1.0)

In [40]:
# split data
n_train = int(data.shape[0]*0.8)
train = data[:n_train]
valid = data[n_train:]
train.shape, valid.shape

((1443899, 46), (360975, 46))

# Build model

In [41]:
EPOCHS = 16
STEPS = int(train.shape[0]/BATCHSIZE)

In [61]:
#y_aux_train
def generator(vector, label, aux, batch_size=256):
    start = 0
    while True:
        if start >= len(vector):
            start %= batch_size
        batch_x = vector[start:start+batch_size]
        batch_x = pad_sequences(batch_x, padding='post', maxlen=MAXWORD)
        
        _tmp_y = label[start:start+batch_size]
        _tmp_y = np.where(_tmp_y, 1, 0)
        batch_aux = aux[start:start+batch_size]
        #batch_y = label[start:start+batch_size]
        
        #batch_y = np.zeros((len(_tmp_y),2))
        #batch_y[np.arange(len(_tmp_y)), _tmp_y] = 1
        batch_y = _tmp_y

        start += batch_size
        yield batch_x, [batch_y, batch_aux]

In [62]:
aux_columns = ['target_float', 'severe_toxicity', 'obscene', 'identity_attack', 'insult', 'threat']
train_gen = generator(train.vectorized.values, train.target.values, train[aux_columns].values, batch_size=BATCHSIZE)
valid_gen = generator(valid.vectorized.values, valid.target.values, train[aux_columns].values, batch_size=BATCHSIZE)

In [63]:
x, (y, z) = next(train_gen)

In [64]:
x.shape, y.shape, z.shape

((256, 250), (256,), (256, 6))

In [70]:
NUM_MODELS = 2
BATCH_SIZE = 512
LSTM_UNITS = 128
DENSE_HIDDEN_UNITS = 4 * LSTM_UNITS
EPOCHS = 4
MAX_LEN = 220
num_aux_targets = 6

# Create model layers.
def build_model():
    """Returns (input_layer, output_layer)"""
    sequence_input = Input(shape=(MAXWORD,), dtype='int32')
    x = Embedding(NUMWORD,
                                EMBEDDINGS_DIMENSION,
                                #weights=[embedding_matrix],
                                input_length=MAXWORD,
                                trainable=True)(sequence_input)
    #x = embedding_layer(sequence_input)
    #x = Conv1D(128, 2, activation='relu', padding='same')(x)
    #x = MaxPooling1D(5, padding='same')(x)
    #x = Conv1D(128, 3, activation='relu', padding='same')(x)
    #x = MaxPooling1D(5, padding='same')(x)
    #x = Conv1D(128, 4, activation='relu', padding='same')(x)
    #x = MaxPooling1D(40, padding='same')(x)
    #x = Flatten()(x)
    #x = Dropout(DROPOUT_RATE)(x)
    #x = Dense(128, activation='relu')(x)
    #preds = Dense(2, activation='softmax')(x)
    x = SpatialDropout1D(0.3)(x)
    x = Bidirectional(CuDNNLSTM(LSTM_UNITS, return_sequences=True))(x)
    x = Bidirectional(CuDNNLSTM(LSTM_UNITS, return_sequences=True))(x)

    hidden = concatenate([
        GlobalMaxPooling1D()(x),
        GlobalAveragePooling1D()(x),
    ])
    hidden = add([hidden, Dense(DENSE_HIDDEN_UNITS, activation='relu')(hidden)])
    hidden = add([hidden, Dense(DENSE_HIDDEN_UNITS, activation='relu')(hidden)])
    result = Dense(1, activation='sigmoid')(hidden)
    aux_result = Dense(num_aux_targets, activation='sigmoid')(hidden)
    model = Model(inputs=sequence_input, outputs=[result, aux_result])
    
    return model

In [71]:
# build model
model = build_model()
model.summary()

__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_3 (InputLayer)            (None, 250)          0                                            
__________________________________________________________________________________________________
embedding_3 (Embedding)         (None, 250, 300)     16034700    input_3[0][0]                    
__________________________________________________________________________________________________
spatial_dropout1d_3 (SpatialDro (None, 250, 300)     0           embedding_3[0][0]                
__________________________________________________________________________________________________
bidirectional_5 (Bidirectional) (None, 250, 256)     440320      spatial_dropout1d_3[0][0]        
__________________________________________________________________________________________________
bidirectio

In [72]:
# Compile model.
print('compiling model')
try:
    model = multi_gpu_model(model, gpus=2)
    print('used multi-gpu model')
except:
    pass
model.compile(
        loss='binary_crossentropy',
        optimizer=Adam(clipnorm=0.1),
        metrics=['accuracy'])

compiling model


In [73]:
callbacks = [
    ReduceLROnPlateau(monitor='binary_crossentropy', factor=0.75, patience=3, min_delta=0.001,
                          mode='max', min_lr=1e-5, verbose=1),
    ModelCheckpoint(model_h5_filename, monitor='binary_crossentropy', mode='max', save_best_only=True,
                    save_weights_only=True),
]

In [74]:
hist = model.fit_generator(
    train_gen, steps_per_epoch=STEPS, epochs=EPOCHS, verbose=1,
    validation_data=valid_gen, validation_steps=100,
    callbacks = callbacks
)

Epoch 1/4


InvalidArgumentError: No OpKernel was registered to support Op 'CudnnRNN' with these attrs.  Registered devices: [CPU,XLA_CPU,XLA_GPU], Registered kernels:
  device='GPU'; T in [DT_DOUBLE]
  device='GPU'; T in [DT_FLOAT]
  device='GPU'; T in [DT_HALF]

	 [[node bidirectional_1/CudnnRNN (defined at /usr/local/lib/python3.5/dist-packages/tensorflow/contrib/cudnn_rnn/python/ops/cudnn_rnn_ops.py:922)  = CudnnRNN[T=DT_FLOAT, direction="unidirectional", dropout=0, input_mode="linear_input", is_training=true, rnn_mode="lstm", seed=87654321, seed2=0](bidirectional_1/transpose, bidirectional_1/ExpandDims_1, bidirectional_1/ExpandDims_2, bidirectional_1/concat)]]

Caused by op 'bidirectional_1/CudnnRNN', defined at:
  File "/usr/lib/python3.5/runpy.py", line 184, in _run_module_as_main
    "__main__", mod_spec)
  File "/usr/lib/python3.5/runpy.py", line 85, in _run_code
    exec(code, run_globals)
  File "/usr/local/lib/python3.5/dist-packages/ipykernel_launcher.py", line 16, in <module>
    app.launch_new_instance()
  File "/usr/local/lib/python3.5/dist-packages/traitlets/config/application.py", line 658, in launch_instance
    app.start()
  File "/usr/local/lib/python3.5/dist-packages/ipykernel/kernelapp.py", line 486, in start
    self.io_loop.start()
  File "/usr/local/lib/python3.5/dist-packages/tornado/platform/asyncio.py", line 127, in start
    self.asyncio_loop.run_forever()
  File "/usr/lib/python3.5/asyncio/base_events.py", line 345, in run_forever
    self._run_once()
  File "/usr/lib/python3.5/asyncio/base_events.py", line 1312, in _run_once
    handle._run()
  File "/usr/lib/python3.5/asyncio/events.py", line 125, in _run
    self._callback(*self._args)
  File "/usr/local/lib/python3.5/dist-packages/tornado/platform/asyncio.py", line 117, in _handle_events
    handler_func(fileobj, events)
  File "/usr/local/lib/python3.5/dist-packages/tornado/stack_context.py", line 276, in null_wrapper
    return fn(*args, **kwargs)
  File "/usr/local/lib/python3.5/dist-packages/zmq/eventloop/zmqstream.py", line 450, in _handle_events
    self._handle_recv()
  File "/usr/local/lib/python3.5/dist-packages/zmq/eventloop/zmqstream.py", line 480, in _handle_recv
    self._run_callback(callback, msg)
  File "/usr/local/lib/python3.5/dist-packages/zmq/eventloop/zmqstream.py", line 432, in _run_callback
    callback(*args, **kwargs)
  File "/usr/local/lib/python3.5/dist-packages/tornado/stack_context.py", line 276, in null_wrapper
    return fn(*args, **kwargs)
  File "/usr/local/lib/python3.5/dist-packages/ipykernel/kernelbase.py", line 283, in dispatcher
    return self.dispatch_shell(stream, msg)
  File "/usr/local/lib/python3.5/dist-packages/ipykernel/kernelbase.py", line 233, in dispatch_shell
    handler(stream, idents, msg)
  File "/usr/local/lib/python3.5/dist-packages/ipykernel/kernelbase.py", line 399, in execute_request
    user_expressions, allow_stdin)
  File "/usr/local/lib/python3.5/dist-packages/ipykernel/ipkernel.py", line 208, in do_execute
    res = shell.run_cell(code, store_history=store_history, silent=silent)
  File "/usr/local/lib/python3.5/dist-packages/ipykernel/zmqshell.py", line 537, in run_cell
    return super(ZMQInteractiveShell, self).run_cell(*args, **kwargs)
  File "/usr/local/lib/python3.5/dist-packages/IPython/core/interactiveshell.py", line 2728, in run_cell
    interactivity=interactivity, compiler=compiler, result=result)
  File "/usr/local/lib/python3.5/dist-packages/IPython/core/interactiveshell.py", line 2850, in run_ast_nodes
    if self.run_code(code, result):
  File "/usr/local/lib/python3.5/dist-packages/IPython/core/interactiveshell.py", line 2910, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-47-0fb854363fae>", line 2, in <module>
    model = build_model()
  File "<ipython-input-46-386cb74b0046>", line 30, in build_model
    x = Bidirectional(CuDNNLSTM(LSTM_UNITS, return_sequences=True))(x)
  File "/usr/local/lib/python3.5/dist-packages/keras/layers/wrappers.py", line 427, in __call__
    return super(Bidirectional, self).__call__(inputs, **kwargs)
  File "/usr/local/lib/python3.5/dist-packages/keras/engine/base_layer.py", line 457, in __call__
    output = self.call(inputs, **kwargs)
  File "/usr/local/lib/python3.5/dist-packages/keras/layers/wrappers.py", line 522, in call
    y = self.forward_layer.call(inputs, **kwargs)
  File "/usr/local/lib/python3.5/dist-packages/keras/layers/cudnn_recurrent.py", line 90, in call
    output, states = self._process_batch(inputs, initial_state)
  File "/usr/local/lib/python3.5/dist-packages/keras/layers/cudnn_recurrent.py", line 517, in _process_batch
    is_training=True)
  File "/usr/local/lib/python3.5/dist-packages/tensorflow/contrib/cudnn_rnn/python/ops/cudnn_rnn_ops.py", line 1544, in __call__
    input_data, input_h, input_c, params, is_training=is_training)
  File "/usr/local/lib/python3.5/dist-packages/tensorflow/contrib/cudnn_rnn/python/ops/cudnn_rnn_ops.py", line 1435, in __call__
    seed=self._seed)
  File "/usr/local/lib/python3.5/dist-packages/tensorflow/contrib/cudnn_rnn/python/ops/cudnn_rnn_ops.py", line 922, in _cudnn_rnn
    outputs, output_h, output_c, _ = gen_cudnn_rnn_ops.cudnn_rnn(**args)
  File "/usr/local/lib/python3.5/dist-packages/tensorflow/python/ops/gen_cudnn_rnn_ops.py", line 116, in cudnn_rnn
    is_training=is_training, name=name)
  File "/usr/local/lib/python3.5/dist-packages/tensorflow/python/framework/op_def_library.py", line 787, in _apply_op_helper
    op_def=op_def)
  File "/usr/local/lib/python3.5/dist-packages/tensorflow/python/util/deprecation.py", line 488, in new_func
    return func(*args, **kwargs)
  File "/usr/local/lib/python3.5/dist-packages/tensorflow/python/framework/ops.py", line 3274, in create_op
    op_def=op_def)
  File "/usr/local/lib/python3.5/dist-packages/tensorflow/python/framework/ops.py", line 1770, in __init__
    self._traceback = tf_stack.extract_stack()

InvalidArgumentError (see above for traceback): No OpKernel was registered to support Op 'CudnnRNN' with these attrs.  Registered devices: [CPU,XLA_CPU,XLA_GPU], Registered kernels:
  device='GPU'; T in [DT_DOUBLE]
  device='GPU'; T in [DT_FLOAT]
  device='GPU'; T in [DT_HALF]

	 [[node bidirectional_1/CudnnRNN (defined at /usr/local/lib/python3.5/dist-packages/tensorflow/contrib/cudnn_rnn/python/ops/cudnn_rnn_ops.py:922)  = CudnnRNN[T=DT_FLOAT, direction="unidirectional", dropout=0, input_mode="linear_input", is_training=true, rnn_mode="lstm", seed=87654321, seed2=0](bidirectional_1/transpose, bidirectional_1/ExpandDims_1, bidirectional_1/ExpandDims_2, bidirectional_1/concat)]]
