# Gradient Based Constrained Decoding Demo

Licensed under the Apache License, Version 2.0.

This method is based upon [Gradient-based Inference for Networks
with Output Constraints](https://arxiv.org/pdf/1707.08608.pdf) by Lee et al.

In [20]:
import json
import numpy as np
import random
import tensorflow as tf

import inference.gradient_decoding as gradient_decoding
import scripts.multiwoz_synthetic_data_util as data_util
import scripts.multiwoz_synthetic_gradient_decoding_util as gradient_decoding_util
import scripts.util as util
import models.multiwoz_synthetic.psl_model as psl_model

# Dataset and Task

We study constrained decoding through the task of dialog structure prediction. Dialog structure is the high level representation of the flow of a dialog, where nodes represent abstract topics or dialog acts that statements would fit into and edges represent topic changes.

To verify our method we ideally would like to test it over a multi-goal oriented dialog corpus such as [MultiWoZ 2.0](https://arxiv.org/pdf/1907.01669.pdf), created by Mihail Eric et. al. Unfortunately, this corpus does not have a ground truth dialog structure, therefore, we use a [Synthetic Multi-WoZ](https://almond-static.stanford.edu/papers/multiwoz-acl2020.pdf) dataset created by Giovanni Campagna et. al.

In [16]:
# ========================================================================
# Constants
# ========================================================================
DATA_PATH = ''
RULE_WEIGHTS = gradient_decoding_util.RULE_WEIGHTS
RULE_NAMES = gradient_decoding_util.RULE_NAMES

ALPHAS = [0.1]
GRAD_STEPS = [10, 50, 100, 500]
LEARNING_RATES = [0.0001, 0.0005, 0.001, 0.01]

# ========================================================================
# Seed Data
# ========================================================================
SEED = random.randint(-10000000, 10000000)
print("Seed: %d" % SEED)
tf.random.set_seed(SEED)

# ========================================================================
# Load Data and Config
# ========================================================================
DATA = data_util.load_json(DATA_PATH)
CONFIG = gradient_decoding_util.CONFIG

Seed: 2977202


# Neural Model

Below is a simple neural model for supervised structure prediction.

In [11]:
#@title Create Neural Model
def build_model(input_size, learning_rate=0.001):
    """Build simple neural model for class prediction."""
    input_layer = tf.keras.layers.Input(input_size)
    hidden_layer_1 = tf.keras.layers.Dense(1024)(input_layer)
    hidden_layer_2 = tf.keras.layers.Dense(
      512, activation='sigmoid')(
          hidden_layer_1)
    output = tf.keras.layers.Dense(
      9, activation='softmax',
      kernel_regularizer=tf.keras.regularizers.l2(1.0))(
          hidden_layer_2)

    model = tf.keras.Model(input_layer, output)

    model.compile(
      optimizer=tf.keras.optimizers.Adam(learning_rate=learning_rate),
      loss='categorical_crossentropy',
      metrics=['accuracy'])

    return model

In [12]:
train_ds, test_ds = gradient_decoding_util.prepare_dataset(DATA, CONFIG)

2021-11-12 11:39:29.128471: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:939] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2021-11-12 11:39:29.129262: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory
2021-11-12 11:39:29.129312: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcublas.so.11'; dlerror: libcublas.so.11: cannot open shared object file: No such file or directory
2021-11-12 11:39:29.129354: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcublasLt.so.11'; dlerror: libcublasLt.so.11: cannot open shared object file: No such file or directory
2021-11-12 11:39:29.129395: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Co

In [14]:
def run_non_constrained(train_ds, test_ds, test_labels, config, learning_rate):
    model = build_model([config['max_dialog_size'], config['max_utterance_size']], learning_rate=learning_rate)
    model.fit(train_ds, epochs=config['train_epochs'])

    logits = model.predict(test_ds)
    predictions = tf.math.argmax(logits, axis=-1)

    confusion_matrix = util.class_confusion_matrix(predictions, test_labels, config)
    metrics, cat_accuracy = util.print_metrics(confusion_matrix)

    return model, metrics, cat_accuracy

test_model, metrics, cat_accuracy = run_non_constrained(train_ds, test_ds, DATA['train_truth_dialog'], CONFIG, 0.0001)

Categorical Accuracy: 0.4948
Class: accept          Precision: 0.0000  Recall: 0.0000  F1: 0.0000
Class: cancel          Precision: 0.0114  Recall: 0.1176  F1: 0.0207
Class: end             Precision: 0.0194  Recall: 0.1552  F1: 0.0344
Class: greet           Precision: 0.0282  Recall: 0.0217  F1: 0.0245
Class: info_question   Precision: 0.0043  Recall: 0.0909  F1: 0.0082
Class: init_request    Precision: 0.7613  Recall: 0.8632  F1: 0.8091
Class: insist          Precision: 0.5226  Recall: 0.0548  F1: 0.0992
Class: second_request  Precision: 0.5995  Recall: 0.6464  F1: 0.6220
Class: slot_question   Precision: 0.0000  Recall: 0.0000  F1: 0.0000


# Gradient Based Constraint Decoding

Rules:

1. !FirstStatement(S) -> !State(S, 'greet')
2. FirstStatement(S) & HasGreetWord(S) -> State(S, 'greet')
3. FirstStatement(S) & !HasGreetWord(S) -> State(S, 'init_request')
4. PreviousStatement(S1, S2) & State(S2, 'init_request') -> State(S1, 'second_request')
5. PreviousStatement(S1, S2) & !State(S2, 'greet') -> !State(S1, 'init_request')
6. PreviousStatement(S1, S2) & State(S2, 'greet') -> State(S1, 'init_request')
7. LastStatement(S) & HasEndWord(S) -> State(S, 'end')
8. LastStatement(S) & HasAcceptWord(S) -> State(S, 'accept')
9. PreviousStatement(S1, S2) & State(S1, 'end') & HasCancelWord(S2) -> State(S2, 'cancel')
10. PreviousStatement(S1, S2) & State(S2, 'second_request') & HasInfoQuestionWord(S1) -> State(S1, 'info_question')
11. LastStatement(S) & HasInsistWord(S) -> State(S, 'insist')
12. PreviousStatement(S1, S2) & State(S2, 'second_request') & HasSlotQuestionWord(S1) -> State(S1, 'slot_question')

In [22]:
def run_constrained(test_model, rule_weights, rule_names, test_ds, test_labels, config, alpha, grad_step):
    psl_constraints = psl_model.PSLModelMultiWoZ(rule_weights, rule_names, config=config)
    logits = gradient_decoding.evaluate_constrained_model(test_model, test_ds, psl_constraints, grad_steps=grad_step, alpha=alpha)
    predictions = tf.math.argmax(tf.concat(logits, axis=0), axis=-1)

    confusion_matrix = util.class_confusion_matrix(predictions, test_labels, config)
    metrics, cat_accuracy = util.print_metrics(confusion_matrix)

    return predictions, metrics, cat_accuracy

predictions, metrics, cat_accuracy = run_constrained(test_model, RULE_WEIGHTS, RULE_NAMES, test_ds, DATA['test_truth_dialog'], CONFIG, 0.1, 5)

<keras.metrics.Mean object at 0x7fd1c9c63850> 12.6262856
<keras.metrics.MeanMetricWrapper object at 0x7fd1c9b74d90> 0.548742831
Categorical Accuracy: 0.5753
Class: accept          Precision: 0.7939  Recall: 0.0882  F1: 0.1587
Class: cancel          Precision: 0.0000  Recall: 0.0000  F1: 0.0000
Class: end             Precision: 0.7763  Recall: 0.4925  F1: 0.6027
Class: greet           Precision: 1.0000  Recall: 0.9167  F1: 0.9565
Class: info_question   Precision: 0.0000  Recall: 0.0000  F1: 0.0000
Class: init_request    Precision: 0.7460  Recall: 0.9286  F1: 0.8274
Class: insist          Precision: 0.8316  Recall: 0.3468  F1: 0.4895
Class: second_request  Precision: 0.5551  Recall: 0.8269  F1: 0.6642
Class: slot_question   Precision: 0.1990  Recall: 0.2123  F1: 0.2054


# Qualitative Analysis

In [27]:
def recover_utterances(dialog, vocab_map):
    sentences = []
    for utterance in dialog:
        sentence = ''

    for word in utterance:
        if word in [0, -1, -2, -3]:
            continue
        sentence += ' ' + vocab_map[word]

    if sentence != '':
        sentences.append(sentence)

    return sentences

def print_dialog(dialog_index, vocab_map, class_map, data, predictions):
    vocab_map = {v: k for k, v in vocab_map.items()}
    class_map = {v: k for k, v in class_map.items()}
    utterances = recover_utterances(test_data[0][dialog_index], vocab_map)

    for utterance_index in range(len(utterances)):
        key = predictions[dialog_index][utterance_index]
        print("Prediction: %s Utterance: %s" % (class_map[int(key)].ljust(15), utterances[utterance_index]))

def run_analysis(test_data, predictions):
    print("\nDialog Greet")
    print('-' * 50)
    print_dialog(27, DATA['vocab_mapping'], CONFIG['class_map'], test_data, predictions)
    print("\nDialog End")
    print('-' * 50)
    print_dialog(6, DATA['vocab_mapping'], CONFIG['class_map'], test_data, predictions)

# Run Hyperparameter Grid

In [28]:
def run_grid(train_ds, test_ds, test_data, test_labels, rule_weights, rule_names, vocab_mapping, config, alphas, grad_steps, learning_rates):
    character_size = 80

    constrained_metrics = []
    non_constrained_metrics = []
    constrained_cat_accuracies = []
    non_constrained_cat_accuracies = []

    for alpha in alphas:
        for grad_step in grad_steps:
            for learning_rate in learning_rates:
                print('\n' + '=' * character_size)
                print("Running: Alpha - %0.5f   Gradient Steps - %d   Learning Rate - %0.5f" % (alpha, grad_step, learning_rate))
                print('=' * character_size)

                print('\nNon-Constrained')
                print('-' * character_size)
                test_model, metrics, cat_accuracy = run_non_constrained(train_ds, test_ds, DATA['test_truth_dialog'], config, learning_rate=learning_rate)
                non_constrained_metrics.append(metrics)
                non_constrained_cat_accuracies.append(cat_accuracy)

                print('\nConstrained')
                print('-' * character_size)
                predictions, metrics, cat_accuracy = run_constrained(test_model, rule_weights, rule_names, test_ds, DATA['test_truth_dialog'], config, alpha=alpha, grad_step=grad_step)
                constrained_metrics.append(metrics)
                constrained_cat_accuracies.append(cat_accuracy)

                print("\nDialog Greet")
                print('-' * 50)
                print_dialog(11, DATA['vocab_mapping'], config['class_map'], test_data, predictions)
                print("\nDialog End")
                print('-' * 50)
                print_dialog(6, DATA['vocab_mapping'], config['class_map'], test_data, predictions)

    return non_constrained_metrics, constrained_metrics, non_constrained_cat_accuracies, constrained_cat_accuracies