# CapsNetS2I - Capsule Neural Network Architecture for Joint Intent Detection and Slot Filling

<img src="high-level-arch.png" width="50%" height="50%">

### Import necessary modules and libraries

In [1]:
import model
import data_loader
import flags
import tensorflow as tf
import os
import warnings
warnings.filterwarnings('ignore')
tf.logging.set_verbosity(tf.logging.ERROR)


For more information, please see:
  * https://github.com/tensorflow/community/blob/master/rfcs/20180907-contrib-sunset.md
  * https://github.com/tensorflow/addons
If you depend on functionality not listed there, please file an issue.



### Load data
First, we define the paths for the pre-trained word embeddings, train and test datasets

In [2]:
word2vec_path = '../data-capsnets/word-vec/cc.ro.300.vec'
training_data_path = '../data-capsnets/scenario0/train.txt'
test_data_path = '../data-capsnets/scenario0/test.txt'

#### Load pre-trained word embeddings
There are a total of ... word vectors, so loading these takes a while (~ 10 minutes)

In [3]:
print('------------------load word2vec begin-------------------')
w2v = data_loader.load_w2v(word2vec_path)
print('------------------load word2vec end---------------------')

#### Load train and test datasets

In [4]:
data = data_loader.read_input_data(w2v, training_data=training_data_path, test_data=test_data_path, test=True)

------------------read datasets begin-------------------
------------------read datasets end---------------------


#### Visualize the contents of the data dictionary

`intents_dict` - maps ids to intent labels
<br>
`slots_dict` - maps ids to slot labels
<br>
For slots we use the **IOB** (Inside, Outside, Beginning) notation - useful for slots that span multiple words (i.e. _"douazeci de grade"_ --> B-grade I-grade I-grade)

In [28]:
import pprint
pp = pprint.PrettyPrinter(indent=4)

print('Intent class dictionary')
pp.pprint(data['intents_dict'])
print('Slots class dictionary')
pp.pprint(data['slots_dict'])
print('Max sentence length: %d words\n' % data['max_len'])

test_sample_idx = 110
print('Test sample')
print(data['x_text_te'][test_sample_idx])
print('Intent: %s' % data['y_intents_te'][test_sample_idx])
print('Slots: %s' % data['y_slots_te'][test_sample_idx])
print('Sample containing word embedding indices:')
print(data['x_te'][test_sample_idx])

Intent class dictionary
{   0: 'stingeLumina',
    1: 'seteazaTemperatura',
    2: 'scadeIntensitateLumina',
    3: 'schimbaCanalTV',
    4: 'schimbaIntensitateMuzica',
    5: 'opresteMuzica',
    6: 'opresteTV',
    7: 'cresteIntensitateLumina',
    8: 'pornesteTV',
    9: 'scadeTemperatura',
    10: 'puneMuzica',
    11: 'cresteTemperatura',
    12: 'aprindeLumina'}
Slots class dictionary
{   0: 'O',
    1: 'B-loc',
    2: 'B-grade',
    3: 'I-grade',
    4: 'B-nivel',
    5: 'B-canal',
    6: 'B-artist',
    7: 'I-loc'}
Max sentence length: 15 words

Test sample
['Salut' 'as' 'vrea' 'sa' 'maresti' 'intensitatea' 'luminii' 'in'
 'pivnita' '0' '0' '0' '0' '0' '0']
Intent: 7
Slots: [0 0 0 0 0 0 0 0 1 0 0 0 0 0 0]
Sample containing word embedding indices:
[ 13871   1949    540     59 440964  10840   5334     50 215040      0
      0      0      0      0      0]


## Set TensorFlow application flags
These flags contain application-wide information such as the scenario, the checkpoint directory containing the saved model, as well as the hyperparameters of the model: learning rate, batch size, number of epochs, the dimensionality of the prediction and 
output vectors of the capsule neural network model, etc.

In [19]:
vocab_size, word_emb_size = data['embedding'].shape
_, max_sentence_length = data['x_tr'].shape
intents_number = len(data['intents_dict'])
slots_number = len(data['slots_dict'])
hidden_size = 64
    
FLAGS = tf.app.flags.FLAGS

tf.app.flags.DEFINE_string('ckpt_dir', './saved_models/', 'check point dir')
tf.app.flags.DEFINE_string('scenario_num', '0', 'Scenario number')
tf.app.flags.DEFINE_string('errors_dir', './errors/', 'Errors dir')
tf.app.flags.DEFINE_float('keep_prob', 0.8, 'embedding dropout keep rate for training')
tf.app.flags.DEFINE_integer('hidden_size', hidden_size, 'embedding vector size')
tf.app.flags.DEFINE_integer('batch_size', 32, 'batch size')
tf.app.flags.DEFINE_integer('num_epochs', 20, 'num of epochs')
tf.app.flags.DEFINE_integer('vocab_size', vocab_size, 'vocab size of word vectors')
tf.app.flags.DEFINE_integer('max_sentence_length', max_sentence_length, 'max number of words in one sentence')
tf.app.flags.DEFINE_integer('intents_nr', intents_number, 'intents_number')  #
tf.app.flags.DEFINE_integer('slots_nr', slots_number, 'slots_number')  #
tf.app.flags.DEFINE_integer('word_emb_size', word_emb_size, 'embedding size of word vectors')
tf.app.flags.DEFINE_boolean('use_embedding', True, 'whether to use embedding or not.')
tf.app.flags.DEFINE_float('learning_rate', 0.01, 'learning rate')
tf.app.flags.DEFINE_integer('slot_routing_num', 2, 'slot routing num')
tf.app.flags.DEFINE_integer('intent_routing_num', 3, 'intent routing num')
tf.app.flags.DEFINE_integer('intent_output_dim', 16, 'intent output dimension')
tf.app.flags.DEFINE_integer('slot_output_dim', 2 * hidden_size, 'slot output dimension')
tf.app.flags.DEFINE_integer('d_a', 20, 'self attention weight hidden units number')
tf.app.flags.DEFINE_integer('r', 5, 'number of self attention heads')
tf.app.flags.DEFINE_float('alpha', 0.0001, 'coefficient for self attention loss')
tf.app.flags.DEFINE_integer('n_splits', 3, 'Number of cross-validation splits')
tf.app.flags.DEFINE_string('f', '', 'kernel')

#### Reset flags
We should run this whenever we want to make a change in one flag, as redefinition of an existing flag is not possible.

In [10]:
for name in list(FLAGS):
  delattr(FLAGS, name)

## Load trained model from checkpoint directory

In [20]:
tf.reset_default_graph()
config = tf.ConfigProto()

sess = tf.Session(config=config)

# Instantiate Model
capsnet = model.CapsNet(FLAGS)

ckpt_dir = FLAGS.ckpt_dir + 'scenario' + FLAGS.scenario_num + '/'
if os.path.exists(ckpt_dir):
    print('Restoring Variables from Checkpoint for testing')
    saver = tf.train.Saver()
    saver.restore(sess, tf.train.latest_checkpoint(ckpt_dir))
else:
    print('No trained model exists in checkpoint dir!')

Restoring Variables from Checkpoint for testing


In [59]:
import numpy as np

text = 'Buna Pepper as vrea sa aprinzi lumina in hol'
text_len = np.asarray(len(text.split(' ')))
max_len = data['max_len']

# Map words to their corresponding word embedding indices
text_vec = []
for w in text.split(' '):
    if w in w2v.vocab:
        text_vec.append(w2v.vocab[w].index)
    else:
        print('Word %s not in vocabulary!' % w)
        break
text_vec = np.asarray(text_vec)
print('Sentence with w2v indices:')
print(text_vec)

Sentence with w2v indices:
[ 6511 63018  1949   540    59 88692  1639    50 15079]


In [60]:
# Pad sentence with 0s so that it fills the maximum sentence length
text_vec_pad = np.append(text_vec, np.zeros((max_len - text_len,), dtype=np.int64))
print('Padded sentence:')
print(text_vec_pad)

Padded sentence:
[ 6511 63018  1949   540    59 88692  1639    50 15079     0     0     0
     0     0     0]


Expand dimensions of the input data to fit the dimensions that the model expects:
<br>
`input_x` - shape (batch_size, max_len)
<br>
`sentence_len` - shape (batch_size,)

In [64]:
print('text_vec_pad shape: ' + str(text_vec_pad.shape))
text_vec_pad_dim = np.expand_dims(text_vec_pad, axis=0)
print('text_vec_pad_dim shape: ' + str(text_vec_pad_dim.shape))

print('text_len shape: ' + str(text_len.shape))
text_len_dim = np.expand_dims(text_len, axis=0)
print('text_len_dim shape: ' + str(text_len_dim.shape))

text_vec_pad shape: (15,)
text_vec_pad_dim shape: (1, 15)
text_len shape: ()
text_len_dim shape: (1,)


## Obtain prediction

In [71]:
import util

# Feed the sample to the model to obtain slot and intent predictions
[intent_outputs, slots_outputs, slot_weights_c] = sess.run([
            capsnet.intent_output_vectors, capsnet.slot_output_vectors, capsnet.slot_weights_c],
            feed_dict={capsnet.input_x: text_vec_pad_dim, capsnet.sentences_length: text_len_dim,
                       capsnet.keep_prob: 1.0})

intent_outputs_reduced_dim = tf.squeeze(intent_outputs, axis=[1, 4])
intent_outputs_norm = util.safe_norm(intent_outputs_reduced_dim)
slot_weights_c_reduced_dim = tf.squeeze(slot_weights_c, axis=[3, 4])

[intent_predictions, slot_predictions] = sess.run([intent_outputs_norm, slot_weights_c_reduced_dim])

Each of the **13 IntentCaps** output a **16-dimensional vector**.
<br>
For slot filling, we are interested in the **routing weights** between **WordCaps** and **SlotCaps** -- in this way we predict the slot type of each individual word. There is a routing weight associated to each word capsule - slot capsule pair (15 x 8 total)

In [73]:
print('IntentCaps output shape: ' + str(intent_outputs.shape))
print('WordCaps - SlotCaps routing weights: ' + str(slot_weights_c.shape))

IntentCaps output shape: (1, 1, 13, 16, 1)
WordCaps - SlotCaps routing weights: (1, 15, 8, 1, 1)


In [83]:
intent_pred = np.argmax(intent_predictions, axis=1)
slots_pred = np.argmax(slot_predictions, axis=2)

intent_label = data['intents_dict'][intent_pred[0]]
slot_labels = [data['slots_dict'][x] for x in slots_pred[0]]

print(text)
print('Intent prediction: ' + intent_label)
print('Slots prediction: ' + str(slot_labels[:text_len]))

Buna Pepper as vrea sa aprinzi lumina in hol
Intent prediction: aprindeLumina
Slots prediction: ['O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'B-loc']


## More predictions

In [104]:
text = 'Da drumul la lumina in hol'

text_len = np.asarray(len(text.split(' ')))
max_len = data['max_len']

# Map words to their corresponding word embedding indices
text_vec = []
for w in text.split(' '):
    if w in w2v.vocab:
        text_vec.append(w2v.vocab[w].index)
    else:
        print('Word %s not in vocabulary!' % w)
        break
text_vec = np.asarray(text_vec)

# Pad sentence with 0s so that it fills the maximum sentence length
text_vec_pad = np.append(text_vec, np.zeros((max_len - text_len,), dtype=np.int64))
text_vec_pad_dim = np.expand_dims(text_vec_pad, axis=0)
text_len_dim = np.expand_dims(text_len, axis=0)

# Feed the sample to the model to obtain slot and intent predictions
[intent_outputs, slots_outputs, slot_weights_c] = sess.run([
            capsnet.intent_output_vectors, capsnet.slot_output_vectors, capsnet.slot_weights_c],
            feed_dict={capsnet.input_x: text_vec_pad_dim, capsnet.sentences_length: text_len_dim,
                       capsnet.keep_prob: 1.0})

intent_outputs_reduced_dim = tf.squeeze(intent_outputs, axis=[1, 4])
intent_outputs_norm = util.safe_norm(intent_outputs_reduced_dim)
slot_weights_c_reduced_dim = tf.squeeze(slot_weights_c, axis=[3, 4])

[intent_predictions, slot_predictions] = sess.run([intent_outputs_norm, slot_weights_c_reduced_dim])

intent_pred = np.argmax(intent_predictions, axis=1)
slots_pred = np.argmax(slot_predictions, axis=2)

intent_label = data['intents_dict'][intent_pred[0]]
slot_labels = [data['slots_dict'][x] for x in slots_pred[0]]

print(text)
print('Intent prediction: ' + intent_label)
print('Slots prediction: ' + str(slot_labels[:text_len]))

Da drumul la lumina in hol
Intent prediction: cresteIntensitateLumina
Slots prediction: ['O', 'O', 'O', 'O', 'O', 'B-loc']
