In [1]:
# use autoload to keep updated with underlying code
%load_ext autoreload
%autoreload 2

In [13]:
import pandas as pd
import numpy as np
import tensorflow as tf

from toxic_text.models.keras.rnn import UnifiedAbuseRNN
from toxic_text.train.experiment import setup_training_data, create_embedding_matrix

In [3]:
from tensorflow.python.client import device_lib

def get_available_devices():  
    local_device_protos = device_lib.list_local_devices()
    return [x.name for x in local_device_protos]

In [4]:
get_available_devices()

['/device:CPU:0', '/device:GPU:0']

In [56]:
args = {
    'seq_length': 50,
    'max_words': 25000,
    'train': '../dataset/preprocessed_train.csv',
    'word2vec': "/home/eric/Documents/word_vectors/GloVe/glove_6B_300d.vec"
}

# Load Data

In [57]:
meta_train = pd.read_csv('../dataset/meta_train.csv').as_matrix()

In [58]:
x_train, y_train, tokenizer, vocab_size = setup_training_data(args)

In [80]:
embedding_matrix, embedding_dim = create_embedding_matrix(args, tokenizer=tokenizer,
                                                          vocab_size=vocab_size)

Loading word vectors at /home/eric/Documents/word_vectors/GloVe/glove_6B_300d.vec...


# Create Model

In [81]:
with tf.device('/gpu:0'):
    model = UnifiedAbuseRNN(
        input_length=args['seq_length'],
        vocab_size=vocab_size,
        nb_metadata_features=meta_train.shape[1],
        embedding_weights=embedding_matrix,
        embedding_dim=embedding_dim
    )

In [82]:
model.model_a.summary()

__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
word_input (InputLayer)         (None, 50)           0                                            
__________________________________________________________________________________________________
embedding_12 (Embedding)        (None, 50, 300)      7500300     word_input[0][0]                 
__________________________________________________________________________________________________
bidirectional_11 (Bidirectional (None, 50, 100)      105600      embedding_12[0][0]               
__________________________________________________________________________________________________
global_max_pooling1d_11 (Global (None, 100)          0           bidirectional_11[0][0]           
__________________________________________________________________________________________________
global_ave

In [83]:
model.model_b.summary()

__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
word_input (InputLayer)         (None, 50)           0                                            
__________________________________________________________________________________________________
embedding_13 (Embedding)        (None, 50, 300)      7500300     word_input[0][0]                 
__________________________________________________________________________________________________
bidirectional_12 (Bidirectional (None, 50, 100)      105600      embedding_13[0][0]               
__________________________________________________________________________________________________
global_max_pooling1d_12 (Global (None, 100)          0           bidirectional_12[0][0]           
__________________________________________________________________________________________________
global_ave

In [None]:
model.train([x_train, meta_train], y_train, 10, 128, learning_rate=0.001)

[(128, 50), (128, 6)]


In [None]:
model.model_a.get_layer('embedding_4').get_weights()