In [2]:
# picking the most free GPU resource as cuda device
import subprocess
import sys
import os

import torch
import pandas as pd

if sys.version_info[0] < 3:
    from StringIO import StringIO
else:
    from io import StringIO

torch.cuda.empty_cache()

def get_free_gpu():
    gpu_stats = subprocess.check_output(
        ["nvidia-smi", "--format=csv", "--query-gpu=memory.used,memory.free"]
    )
    gpu_df = pd.read_csv(
        StringIO(gpu_stats.decode("utf-8")),
        names=["memory.used", "memory.free"],
        skiprows=1,
    )
    print("GPU usage:\n{}".format(gpu_df))
    gpu_df["memory.free"] = gpu_df["memory.free"].map(
        lambda x: int(x.rstrip(" MiB"))
    )
    idx = gpu_df["memory.free"].idxmax()
    print(
        "Returning GPU{} with {} free MiB".format(
            idx, gpu_df.iloc[idx]["memory.free"]
        )
    )
    return idx


cmd = "export CUDA_VISIBLE_DEVICES=1,2,3,4,5,6"
os.popen(cmd)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"using {device}")
if torch.cuda.is_available():
    free_gpu_id = get_free_gpu()
    print(f"using GPU id: {free_gpu_id}")
    torch.cuda.set_device(free_gpu_id)


using cpu


In [3]:
import math
import torch
import torch.nn as nn
import torch.nn.functional as F
from tqdm import tqdm


# Temporarily leave PositionalEncoding module here. Will be moved somewhere else.
class PositionalEncoding(nn.Module):
    r"""Inject some information about the relative or absolute position of the tokens
        in the sequence. The positional encodings have the same dimension as
        the embeddings, so that the two can be summed. Here, we use sine and cosine
        functions of different frequencies.
    .. math::
        \text{PosEncoder}(pos, 2i) = sin(pos/10000^(2i/d_model))
        \text{PosEncoder}(pos, 2i+1) = cos(pos/10000^(2i/d_model))
        \text{where pos is the word position and i is the embed idx)
    Args:
        d_model: the embed dim (required).
        dropout: the dropout value (default=0.1).
        max_len: the max. length of the incoming sequence (default=5000).
    Examples:
        >>> pos_encoder = PositionalEncoding(d_model)
    """

    def __init__(self, d_model, dropout=0.1, max_len=5000):
        super(PositionalEncoding, self).__init__()
        self.dropout = nn.Dropout(p=dropout)

        pe = torch.zeros(max_len, d_model)
        position = torch.arange(0, max_len, dtype=torch.float).unsqueeze(1)
        div_term = torch.exp(torch.arange(0, d_model, 2).float() * (-math.log(10000.0) / d_model))
        pe[:, 0::2] = torch.sin(position * div_term)
        pe[:, 1::2] = torch.cos(position * div_term)
        pe = pe.unsqueeze(0).transpose(0, 1)
        self.register_buffer('pe', pe)

    def forward(self, x):
        r"""Inputs of forward function
        Args:
            x: the sequence fed to the positional encoder model (required).
        Shape:
            x: [sequence length, batch size, embed dim]
            output: [sequence length, batch size, embed dim]
        Examples:
            >>> output = pos_encoder(x)
        """

        x = x + self.pe[:x.size(0), :]
        return self.dropout(x)

class TransformerModel(nn.Module):
    """Container module with an encoder, a recurrent or transformer module, and a decoder."""

    def __init__(self, ntoken, ninp, nhead, nhid, nlayers, dropout=0.5):
        super(TransformerModel, self).__init__()
        try:
            from torch.nn import TransformerEncoder, TransformerEncoderLayer
        except:
            raise ImportError('TransformerEncoder module does not exist in PyTorch 1.1 or lower.')
        self.model_type = 'Transformer'
        self.src_mask = None
        self.pos_encoder = PositionalEncoding(ninp, dropout)
        encoder_layers = TransformerEncoderLayer(ninp, nhead, nhid, dropout)
        self.transformer_encoder = TransformerEncoder(encoder_layers, nlayers)
        self.encoder = nn.Embedding(ntoken, ninp)
        self.ninp = ninp
        self.decoder = nn.Linear(ninp, ntoken)

        self.init_weights()

    def _generate_square_subsequent_mask(self, sz):
        mask = (torch.triu(torch.ones(sz, sz)) == 1).transpose(0, 1)
        mask = mask.float().masked_fill(mask == 0, float('-inf')).masked_fill(mask == 1, float(0.0))
        return mask

    def init_weights(self):
        initrange = 0.1
        nn.init.uniform_(self.encoder.weight, -initrange, initrange)
        nn.init.zeros_(self.decoder.weight)
        nn.init.uniform_(self.decoder.weight, -initrange, initrange)

    def forward(self, src, has_mask=True):
        if has_mask:
            device = src.device
            if self.src_mask is None or self.src_mask.size(0) != len(src):
                mask = self._generate_square_subsequent_mask(len(src)).to(device)
                self.src_mask = mask
        else:
            self.src_mask = None

        src = self.encoder(src) * math.sqrt(self.ninp)
        src = self.pos_encoder(src)
        output = self.transformer_encoder(src, self.src_mask)
        output = self.decoder(output)
        return F.log_softmax(output, dim=-1)

In [4]:
import torchtext

with open('../data/train.history_belief') as fp:
    raw_train_data = [line.split() for line in fp.read().split('\n')]
    
with open('../data/val.history_belief') as fp:
    raw_val_data = [line.split() for line in fp.read().split('\n')][:5]

In [5]:
train_vocab = torchtext.vocab.build_vocab_from_iterator(raw_train_data, specials=["<unk>", "<pad>"])

In [6]:
train_vocab.set_default_index(train_vocab["<unk>"])

In [7]:
from torch.nn.utils.rnn import pad_sequence

In [8]:
train_data = [torch.tensor(train_vocab(sent)[::-1], dtype=torch.long) for sent in raw_train_data]
val_data = [torch.tensor(train_vocab(sent)[::-1], dtype=torch.long) for sent in raw_val_data]

In [9]:
train_data = pad_sequence(train_data, padding_value=train_vocab['<pad>'])
val_data = pad_sequence(val_data, padding_value=train_vocab['<pad>'])

In [10]:
train_data = torch.flip(train_data, (0,))
val_data = torch.flip(val_data, (0,))

In [11]:
model = TransformerModel(
    ntoken=len(train_vocab),
    ninp=50,
    nhead=2,
    nhid=50,
    nlayers=2
)

In [357]:
import time
import math

criterion = nn.NLLLoss()
lr = 20
batch_size = 64

n_epochs = 2

total_loss = 0.
start_time = time.time()
ntokens = len(train_vocab)
min_val_loss = math.inf
for _ in tqdm(range(n_epochs), total=n_epochs):
    model.train()
    for batch_idx in range(0, len(train_data), batch_size):
        data = train_data[:, batch_idx*batch_size:(batch_idx+1)*batch_size]
        data, targets = data[:-1, :].T, data[1:, :].T
        model.zero_grad()
        output = model(data)
        loss = criterion(output.transpose(2, 1), targets)
        loss.backward()
        torch.nn.utils.clip_grad_norm_(model.parameters(), 0.25)
        for p in model.parameters():
            p.data.add_(p.grad, alpha=-lr)
        total_loss += loss.item()
        if batch_idx % 100 == 0 and batch_idx > 0:
            cur_loss = total_loss / 10
            elapsed = time.time() - start_time
            print('ms/batch {:5.2f} | '
                    'loss {:5.2f} | ppl {:8.2f}'.format(
                batch,
                elapsed * 1000 / 10, cur_loss, math.exp(cur_loss)))
            total_loss = 0
            start_time = time.time()
    model.eval()
    out = model(val_data[:-1, :].T)
    val_loss = criterion(out.transpose(2, 1), val_data[1:, :].T)
    if val_loss < min_val_loss:
        min_val_loss = val_loss
        lr /= 4

  0%|          | 0/2 [01:52<?, ?it/s]


KeyboardInterrupt: 

In [15]:
with open('../data/test.history_belief') as fp:
    raw_test_data = [line.split() for line in fp.read().split('\n')][:5]

In [16]:
EOT = '<|endoftext|>'

def translate(indexes):
    indexes = list(indexes)
    try:
        found = indexes.index(EOT)
    except ValueError:
        pass
    else:
        indexes = indexes[:found+1]
    return train_vocab.lookup_tokens(list(indexes))

In [17]:
INPUT_SOS = '<|context|>'
INPUT_EOS = '<|endofcontext|>'
OUTPUT_SOS = '<|belief|>'
OUTPUT_EOS = '<|endofbelief|>'

def belief_to_state_list(belief):
    belief_list = [token for token in belief if token not in [OUTPUT_SOS, OUTPUT_EOS]]
    belief_list = [slot.split() for slot in ' '.join(belief_list).split(',')]
    return belief_list

def belief_to_state_dict(belief):
    belief_list = belief_to_state_list(belief)
    state_dict = {}
    for state in belief_list:
        if len(state) < 3: continue
        domain = state[0]
        slot = state[1]
        sub_slot = None
        rest = state[2:]
        if slot == 'book':
            sub_slot = state[2]
            rest = state[3:]
        value = ' '.join(rest)
        d = state_dict.get(domain, {})
        if sub_slot:
            ss = d.get(slot, {})
            ss.update({
                sub_slot: value
            })
            d.update({slot: ss})
        else:
            d.update({slot: value})
        state_dict.update({domain: d})
    return state_dict


def match_slot(true, pred):
    pred_state = belief_to_state_dict(pred)
    true_list = belief_to_state_list(true)
    slot_matches = []
    for i, state in enumerate(true_list):
        if len(state) < 3: continue
        slot_matches.append(False)
        domain = state[0]
        if domain not in pred_state.keys(): continue
        
        slot = state[1]
        if slot not in pred_state[domain].keys(): continue
        
        if slot != 'book':
            true_value = " ".join(state[2:])
            pred_value = pred_state[domain][slot]
        else:
            sub_slot = state[2]
            if sub_slot not in pred_state[domain][slot]: continue
            true_value = " ".join(state[3:])
            pred_value = pred_state[domain][slot][sub_slot]
        
        if true_value != pred_value: continue
        slot_matches[i] = True
            
    all_match = sum(slot_matches) == len(true_list)
    
    return all_match, slot_matches

def get_accuracy(results):
    total_states = len(results)
    total_slots = sum([len(result[1]) for result in results])
    total_correct_states = sum([result[0] for result in results])
    total_correct_slots = sum([sum(result[1]) for result in results])
    return {
        'joint_accuracy': total_correct_states / total_states,
        'slot_accuracy': total_correct_slots / total_slots
    }

In [18]:
from tqdm import tqdm

In [19]:
BELIEF = '<|belief|>'

test_data = []
for raw_sent in raw_test_data[:-1]:
    indexes = train_vocab(raw_sent)
    belief_idx = indexes.index(train_vocab[BELIEF])
    data, target = indexes[:belief_idx][::-1], indexes[belief_idx:]
    test_data.append((torch.tensor(data, dtype=torch.long),
                     torch.tensor(target, dtype=torch.long)))

In [20]:
test_inputs = pad_sequence([t[0] for t in test_data], padding_value=train_vocab['<pad>'])
test_inputs = torch.flip(test_inputs, dims=(0,)).to(device)

In [21]:
test_beliefs = [t[1] for t in test_data]
max_test_target_len = max(len(t[1]) for t in test_data)

In [22]:
predictions = [torch.ones(test_inputs.shape[1], dtype=torch.long) * train_vocab[BELIEF]]
input_data = torch.cat([test_inputs, torch.ones(1, test_inputs.shape[1], dtype=torch.long) * train_vocab[BELIEF]]).to(device)
model.eval()
for _ in tqdm(range(max_test_target_len), total=max_test_target_len):
    out = model(input_data)
    predictions.append(torch.argmax(out[:, -1, :], axis=1))
    input_data = torch.cat([input_data, predictions[-1].unsqueeze(dim=1)], dim=1).to(device)

100%|██████████| 24/24 [00:03<00:00,  6.67it/s]


In [23]:
predictions = torch.stack(predictions)

RuntimeError: stack expects each tensor to be equal size, but got [4] at entry 0 and [103] at entry 1

In [31]:
test_inputs.shape

torch.Size([102, 4])

In [32]:
input_data.shape

torch.Size([103, 28])

In [34]:
predictions[1]

tensor([11463,  9129,  6649,  1465, 11496,  2222,  6903, 10668,  8855, 12591,
         6196,  2544,  4427, 15523, 13777,  5669,  4628,  5228,   207,  6762,
         9129,  4154,  3011,  7340,   998,   998, 11208,  5522, 15210,  6268,
        15582, 15582,  8719, 11208, 12248, 13677,  7950,  7745, 11208, 12473,
         1875,  1819,  1931,  2222,   334,  1931,  1931, 11347,  1931, 11017,
         1931,  8945,  5033, 13411,   206, 15228,  4215, 11496,   443,  3471,
         1022, 13677, 13777,  7711,  1022, 13777, 12694, 13777,  7021, 15920,
          334,  7021, 13677, 13442,  1192,   334,   803,  9814,   334,   334,
        10814, 10814,  9239, 15920, 11016, 11507,  7443, 15847, 11496,  8405,
         8405,  7207,  9916,  9916,   334,  9916,  8182,  7021, 10207,   334,
        13442,  4154, 15210])

In [35]:
torch.cat([input_data, predictions[-1].unsqueeze(dim=1)], dim=1).shape

torch.Size([103, 29])

In [25]:
out.shape

torch.Size([103, 27, 16070])

In [41]:
torch.argmax(out[:, -1, :], axis=1).shape

torch.Size([103])

In [46]:
input_data = torch.cat([test_inputs, torch.ones(1, test_inputs.shape[1], dtype=torch.long) * train_vocab[BELIEF]])

In [47]:
input_data.shape

torch.Size([103, 4])

In [48]:
model(input_data).shape

torch.Size([103, 4, 16070])

In [360]:
results = []
for pred, target in zip(predictions.T, test_beliefs):
    result = match_slot(translate(target), translate(pred))
    results.append(result)

In [361]:
get_accuracy(results)

{'joint_accuracy': 0.0, 'slot_accuracy': 0.0}