# 2 - NestedField, CharCNN and Inference

In [1]:
import torch
from torchtext import data

SEED = 1234

torch.manual_seed(SEED)
torch.backends.cudnn.deterministic = True

WORDS = data.Field()
CHAR_NESTING = data.Field(tokenize=list)
CHARS = data.NestedField(CHAR_NESTING)
UD_TAGS = data.Field(unk_token=None)
PTB_TAGS = data.Field(unk_token=None)

fields = [(("words", "chars"), (WORDS, CHARS)), ("udtags", UD_TAGS), ("ptbtags", PTB_TAGS)]

In [2]:
from torchtext import datasets

train_data, valid_data, test_data = datasets.UDPOS.splits(fields)

In [3]:
print(f"Number of training examples: {len(train_data)}")
print(f"Number of validation examples: {len(valid_data)}")
print(f"Number of testing examples: {len(test_data)}")

Number of training examples: 12543
Number of validation examples: 2002
Number of testing examples: 2077


In [4]:
print(vars(train_data.examples[0]))

{'words': ['Al', '-', 'Zaman', ':', 'American', 'forces', 'killed', 'Shaikh', 'Abdullah', 'al', '-', 'Ani', ',', 'the', 'preacher', 'at', 'the', 'mosque', 'in', 'the', 'town', 'of', 'Qaim', ',', 'near', 'the', 'Syrian', 'border', '.'], 'chars': [['A', 'l'], ['-'], ['Z', 'a', 'm', 'a', 'n'], [':'], ['A', 'm', 'e', 'r', 'i', 'c', 'a', 'n'], ['f', 'o', 'r', 'c', 'e', 's'], ['k', 'i', 'l', 'l', 'e', 'd'], ['S', 'h', 'a', 'i', 'k', 'h'], ['A', 'b', 'd', 'u', 'l', 'l', 'a', 'h'], ['a', 'l'], ['-'], ['A', 'n', 'i'], [','], ['t', 'h', 'e'], ['p', 'r', 'e', 'a', 'c', 'h', 'e', 'r'], ['a', 't'], ['t', 'h', 'e'], ['m', 'o', 's', 'q', 'u', 'e'], ['i', 'n'], ['t', 'h', 'e'], ['t', 'o', 'w', 'n'], ['o', 'f'], ['Q', 'a', 'i', 'm'], [','], ['n', 'e', 'a', 'r'], ['t', 'h', 'e'], ['S', 'y', 'r', 'i', 'a', 'n'], ['b', 'o', 'r', 'd', 'e', 'r'], ['.']], 'udtags': ['PROPN', 'PUNCT', 'PROPN', 'PUNCT', 'ADJ', 'NOUN', 'VERB', 'PROPN', 'PROPN', 'PROPN', 'PUNCT', 'PROPN', 'PUNCT', 'DET', 'NOUN', 'ADP', 'DET', 'N

In [5]:
print(vars(train_data.examples[0])['words'])

['Al', '-', 'Zaman', ':', 'American', 'forces', 'killed', 'Shaikh', 'Abdullah', 'al', '-', 'Ani', ',', 'the', 'preacher', 'at', 'the', 'mosque', 'in', 'the', 'town', 'of', 'Qaim', ',', 'near', 'the', 'Syrian', 'border', '.']


In [6]:
print(vars(train_data.examples[0])['chars'])

[['A', 'l'], ['-'], ['Z', 'a', 'm', 'a', 'n'], [':'], ['A', 'm', 'e', 'r', 'i', 'c', 'a', 'n'], ['f', 'o', 'r', 'c', 'e', 's'], ['k', 'i', 'l', 'l', 'e', 'd'], ['S', 'h', 'a', 'i', 'k', 'h'], ['A', 'b', 'd', 'u', 'l', 'l', 'a', 'h'], ['a', 'l'], ['-'], ['A', 'n', 'i'], [','], ['t', 'h', 'e'], ['p', 'r', 'e', 'a', 'c', 'h', 'e', 'r'], ['a', 't'], ['t', 'h', 'e'], ['m', 'o', 's', 'q', 'u', 'e'], ['i', 'n'], ['t', 'h', 'e'], ['t', 'o', 'w', 'n'], ['o', 'f'], ['Q', 'a', 'i', 'm'], [','], ['n', 'e', 'a', 'r'], ['t', 'h', 'e'], ['S', 'y', 'r', 'i', 'a', 'n'], ['b', 'o', 'r', 'd', 'e', 'r'], ['.']]


In [7]:
print(vars(train_data.examples[0])['udtags'])

['PROPN', 'PUNCT', 'PROPN', 'PUNCT', 'ADJ', 'NOUN', 'VERB', 'PROPN', 'PROPN', 'PROPN', 'PUNCT', 'PROPN', 'PUNCT', 'DET', 'NOUN', 'ADP', 'DET', 'NOUN', 'ADP', 'DET', 'NOUN', 'ADP', 'PROPN', 'PUNCT', 'ADP', 'DET', 'ADJ', 'NOUN', 'PUNCT']


In [8]:
print(vars(train_data.examples[0])['ptbtags'])

['NNP', 'HYPH', 'NNP', ':', 'JJ', 'NNS', 'VBD', 'NNP', 'NNP', 'NNP', 'HYPH', 'NNP', ',', 'DT', 'NN', 'IN', 'DT', 'NN', 'IN', 'DT', 'NN', 'IN', 'NNP', ',', 'IN', 'DT', 'JJ', 'NN', '.']


In [9]:
MIN_FREQ = 2

WORDS.build_vocab(train_data, 
                 min_freq = MIN_FREQ,
                 vectors = "glove.6B.100d", 
                 unk_init = torch.Tensor.normal_)

CHARS.build_vocab(train_data,
                  min_freq = MIN_FREQ,
                  vectors = "glove.6B.50d",
                  unk_init = torch.Tensor.normal_)

UD_TAGS.build_vocab(train_data)
PTB_TAGS.build_vocab(train_data)

In [10]:
print(f"Unique tokens in WORDS vocabulary: {len(WORDS.vocab)}")
print(f"Unique tokens in CHARS vocabulary: {len(CHARS.vocab)}")
print(f"Unique tokens in UD_TAG vocabulary: {len(UD_TAGS.vocab)}")
print(f"Unique tokens in PTB_TAG vocabulary: {len(PTB_TAGS.vocab)}")

Unique tokens in WORDS vocabulary: 9875
Unique tokens in CHARS vocabulary: 104
Unique tokens in UD_TAG vocabulary: 18
Unique tokens in PTB_TAG vocabulary: 51


In [11]:
print(CHARS.vocab.freqs.most_common())

[('e', 93412), ('t', 67775), ('a', 63699), ('o', 58826), ('n', 53490), ('i', 52122), ('s', 45756), ('r', 45690), ('h', 36688), ('l', 32140), ('d', 28818), ('u', 22188), ('c', 20832), ('m', 18554), ('y', 16337), ('f', 15411), ('g', 15110), ('w', 14414), ('p', 14291), ('.', 11491), ('b', 10806), ('v', 8175), (',', 7155), ('k', 6636), ('I', 5512), ('-', 3768), ('T', 3501), ('A', 3135), ('0', 3035), ('S', 3016), ("'", 2558), ('C', 2020), ('E', 1929), ('1', 1929), ('M', 1828), ('P', 1705), ('2', 1583), ('x', 1560), ('N', 1539), ('B', 1513), ('W', 1400), ('H', 1374), ('O', 1355), ('"', 1298), ('D', 1284), ('R', 1258), ('!', 1221), ('L', 1160), ('/', 1140), (':', 1092), ('3', 1048), ('j', 976), ('F', 944), (')', 938), ('?', 937), ('G', 930), ('q', 921), ('(', 866), ('5', 855), ('U', 763), ('4', 717), ('9', 702), ('J', 676), ('Y', 657), ('z', 608), ('6', 598), ('7', 597), ('8', 547), ('_', 539), ('K', 514), ('V', 392), ('=', 369), ('*', 310), ('$', 270), ('@', 177), ('&', 158), ('>', 151), ('Q

In [12]:
BATCH_SIZE = 64

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

train_iterator, valid_iterator, test_iterator = data.BucketIterator.splits(
    (train_data, valid_data, test_data), 
    batch_size = BATCH_SIZE,
    device = device)

In [13]:
import torch.nn as nn
import torch.nn.functional as F

class RNNPOSTagger(nn.Module):
    def __init__(self, 
                 word_vocab_size,
                 char_vocab_size,
                 word_embedding_dim, 
                 char_embedding_dim,
                 char_cnn_filter_size,
                 hidden_dim, 
                 output_dim, 
                 n_layers, 
                 bidirectional, 
                 dropout, 
                 pad_idx):
        
        super().__init__()
        
        assert char_cnn_filter_size % 2 == 1, "Kernel size must be odd!"
        
        self.word_embedding_dim = word_embedding_dim
        self.char_embedding_dim = char_embedding_dim
        
        self.word_embedding = nn.Embedding(word_vocab_size, word_embedding_dim, padding_idx = pad_idx)
        self.char_embedding = nn.Embedding(char_vocab_size, char_embedding_dim, padding_idx = pad_idx)
        
        self.char_cnn = nn.Conv1d(char_embedding_dim,
                                  word_embedding_dim,
                                  kernel_size = char_cnn_filter_size,
                                  padding = (char_cnn_filter_size - 1) // 2)
        
        self.rnn = nn.LSTM(word_embedding_dim * 2, 
                           hidden_dim, 
                           num_layers = n_layers, 
                           bidirectional = bidirectional)
        
        self.fc = nn.Linear(hidden_dim * 2 if bidirectional else hidden_dim, output_dim)
        
        self.dropout = nn.Dropout(dropout)
        
    def embed_chars(self, chars):
        
        batch_size = chars.shape[0]
        sent_len = chars.shape[1]
        word_len = chars.shape[2]
        
        embedded = self.char_embedding(chars)
        
        #chars_embedded = [batch size, sent len, char emb dim, word len]
        
        embedded = embedded.view(-1, word_len, self.char_embedding_dim)
        embedded = embedded.permute(0, 2, 1)
        
        #embedded = [batch size * sent len, char emb dim, word len]
        
        embedded = self.char_cnn(embedded)
                
        #embedded = [batch size * sent len, word emb dim, word len]
        
        embedded = embedded.view(batch_size, sent_len, self.word_embedding_dim, word_len)
                
        #embedded = [batch size, sent len, word emb dim, word len]
        
        embedded = torch.max(embedded, dim = -1).values
                
        #embeded = [batch size, sent len, word emb dim]
        
        embedded = embedded.permute(1, 0, 2)
        
        #embedded = [sent len, batch size, word emb dim]
        
        return embedded
        
    def forward(self, words, chars):

        #words = [sent len, batch size]
        #chars = [batch size, sent len, word len]
        
        words_embedded = self.dropout(self.word_embedding(words))
        chars_embedded = self.dropout(self.embed_chars(chars))
        
        #words_embedded = [sent len, batch size, word emb dim]
        #chars_embeded = [sent len, batch size, word emb dim]
        
        embedded = F.relu(torch.cat((chars_embedded, words_embedded), dim = -1))
        
        #embedded = [sent len, batch size, word emb dim * 2]
                
        outputs, (hidden, cell) = self.rnn(embedded)
        
        #output = [sent len, batch size, hid dim * n directions]
        #hidden/cell = [n layers * n directions, batch size, hid dim]
        
        predictions = self.fc(self.dropout(outputs))
        
        #predictions = [sent len, batch size, output dim]
        
        return predictions

In [14]:
WORD_INPUT_DIM = len(WORDS.vocab)
CHAR_INPUT_DIM = len(CHARS.vocab)
WORD_EMBEDDING_DIM = 100
CHAR_EMBEDDING_DIM = 50
CHAR_CNN_FILTER_SIZE = 3
HIDDEN_DIM = 128
OUTPUT_DIM = len(UD_TAGS.vocab)
N_LAYERS = 2
BIDIRECTIONAL = True
DROPOUT = 0.25
PAD_IDX = WORDS.vocab.stoi[WORDS.pad_token]

model = RNNPOSTagger(WORD_INPUT_DIM, 
                     CHAR_INPUT_DIM,
                     WORD_EMBEDDING_DIM, 
                     CHAR_EMBEDDING_DIM,
                     CHAR_CNN_FILTER_SIZE,
                     HIDDEN_DIM, 
                     OUTPUT_DIM, 
                     N_LAYERS, 
                     BIDIRECTIONAL, 
                     DROPOUT, 
                     PAD_IDX)

In [15]:
def init_weights(m):
    for name, param in m.named_parameters():
        nn.init.normal_(param.data, mean=0, std=0.1)
        
model.apply(init_weights)

RNNPOSTagger(
  (word_embedding): Embedding(9875, 100, padding_idx=1)
  (char_embedding): Embedding(104, 50, padding_idx=1)
  (char_cnn): Conv1d(50, 100, kernel_size=(3,), stride=(1,), padding=(1,))
  (rnn): LSTM(200, 128, num_layers=2, bidirectional=True)
  (fc): Linear(in_features=256, out_features=18, bias=True)
  (dropout): Dropout(p=0.25, inplace=False)
)

In [16]:
def count_parameters(model):
    return sum(p.numel() for p in model.parameters() if p.requires_grad)

print(f'The model has {count_parameters(model):,} trainable parameters')

The model has 1,745,610 trainable parameters


In [17]:
pretrained_word_embeddings = WORDS.vocab.vectors

print(pretrained_word_embeddings.shape)

torch.Size([9875, 100])


In [18]:
pretrained_char_embeddings = CHARS.vocab.vectors

print(pretrained_char_embeddings.shape)

torch.Size([104, 50])


In [19]:
model.word_embedding.weight.data.copy_(pretrained_word_embeddings)
model.char_embedding.weight.data.copy_(pretrained_char_embeddings)

tensor([[-0.2944, -0.5460, -0.0052,  ..., -1.3058, -0.5555,  2.0033],
        [-0.1977,  0.9000, -1.3029,  ...,  0.8878,  0.6009,  0.7532],
        [ 0.7383,  0.6545,  1.0873,  ..., -0.1680,  0.6562,  1.1014],
        ...,
        [-0.4286,  1.0551,  0.6042,  ..., -0.0753, -0.1357,  0.6105],
        [-0.6707,  0.6986,  0.6963,  ...,  0.0801,  0.1009,  0.9292],
        [-0.1443,  0.1088, -0.5041,  ...,  0.2690,  1.1543,  1.0493]])

In [20]:
UNK_IDX = WORDS.vocab.stoi[WORDS.unk_token]

model.word_embedding.weight.data[UNK_IDX] = torch.zeros(WORD_EMBEDDING_DIM)
model.word_embedding.weight.data[PAD_IDX] = torch.zeros(WORD_EMBEDDING_DIM)

print(model.word_embedding.weight.data)

tensor([[ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000],
        [ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000],
        [-0.3398,  0.2094,  0.4635,  ..., -0.2339,  0.4730, -0.0288],
        ...,
        [-1.0205,  2.2411,  0.2981,  ...,  1.1904, -0.9438,  0.2878],
        [-0.5972,  0.0471, -0.2406,  ..., -0.9446, -0.1126, -0.2260],
        [ 0.3393, -0.6743, -0.2552,  ...,  0.1080,  1.0404,  1.3731]])


In [21]:
model.char_embedding.weight.data[UNK_IDX] = torch.zeros(CHAR_EMBEDDING_DIM)
model.char_embedding.weight.data[PAD_IDX] = torch.zeros(CHAR_EMBEDDING_DIM)

print(model.char_embedding.weight.data)

tensor([[ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000],
        [ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000],
        [ 0.7383,  0.6545,  1.0873,  ..., -0.1680,  0.6562,  1.1014],
        ...,
        [-0.4286,  1.0551,  0.6042,  ..., -0.0753, -0.1357,  0.6105],
        [-0.6707,  0.6986,  0.6963,  ...,  0.0801,  0.1009,  0.9292],
        [-0.1443,  0.1088, -0.5041,  ...,  0.2690,  1.1543,  1.0493]])


In [22]:
import torch.optim as optim

optimizer = optim.Adam(model.parameters())

In [23]:
criterion = nn.CrossEntropyLoss()

In [24]:
model = model.to(device)
criterion = criterion.to(device)

In [25]:
def categorical_accuracy(preds, y):
    """
    Returns accuracy per batch, i.e. if you get 8/10 right, this returns 0.8, NOT 8
    """
    max_preds = preds.argmax(dim = 1, keepdim = True) # get the index of the max probability
    correct = max_preds.squeeze(1).eq(y)
    return correct.sum() / torch.FloatTensor([y.shape[0]])

In [26]:
def train(model, iterator, optimizer, criterion):
    
    epoch_loss = 0
    epoch_acc = 0
    
    model.train()
    
    for batch in iterator:
        
        words = batch.words
        chars = batch.chars
        tags = batch.udtags
        
        optimizer.zero_grad()
        
        #words = [sent len, batch size]
        #chars = [batch size, sent len, word len]
        
        predictions = model(words, chars)
        
        #predictions = [sent len, batch size, output dim]
        #tags = [sent len, batch size]
        
        predictions = predictions.view(-1, predictions.shape[-1])
        tags = tags.view(-1)
        
        #predictions = [sent len * batch size, output dim]
        #tags = [sent len * batch size]
        
        loss = criterion(predictions, tags)
                
        acc = categorical_accuracy(predictions, tags)
        
        loss.backward()
        
        optimizer.step()
        
        epoch_loss += loss.item()
        epoch_acc += acc.item()
        
    return epoch_loss / len(iterator), epoch_acc / len(iterator)

In [27]:
def evaluate(model, iterator, criterion):
    
    epoch_loss = 0
    epoch_acc = 0
    
    model.eval()
    
    with torch.no_grad():
    
        for batch in iterator:

            words = batch.words
            chars = batch.chars
            tags = batch.udtags
            
            predictions = model(words, chars)
            
            predictions = predictions.view(-1, predictions.shape[-1])
            tags = tags.view(-1)
            
            loss = criterion(predictions, tags)
            
            acc = categorical_accuracy(predictions, tags)

            epoch_loss += loss.item()
            epoch_acc += acc.item()
        
    return epoch_loss / len(iterator), epoch_acc / len(iterator)

In [28]:
import time

def epoch_time(start_time, end_time):
    elapsed_time = end_time - start_time
    elapsed_mins = int(elapsed_time / 60)
    elapsed_secs = int(elapsed_time - (elapsed_mins * 60))
    return elapsed_mins, elapsed_secs

In [29]:
N_EPOCHS = 10

best_valid_loss = float('inf')

for epoch in range(N_EPOCHS):

    start_time = time.time()
    
    train_loss, train_acc = train(model, train_iterator, optimizer, criterion)
    valid_loss, valid_acc = evaluate(model, valid_iterator, criterion)
    
    end_time = time.time()

    epoch_mins, epoch_secs = epoch_time(start_time, end_time)
    
    if valid_loss < best_valid_loss:
        best_valid_loss = valid_loss
        torch.save(model.state_dict(), 'tut2-model.pt')
    
    print(f'Epoch: {epoch+1:02} | Epoch Time: {epoch_mins}m {epoch_secs}s')
    print(f'\tTrain Loss: {train_loss:.3f} | Train Acc: {train_acc*100:.2f}%')
    print(f'\t Val. Loss: {valid_loss:.3f} |  Val. Acc: {valid_acc*100:.2f}%')

Epoch: 01 | Epoch Time: 0m 9s
	Train Loss: 0.387 | Train Acc: 88.64%
	 Val. Loss: 0.628 |  Val. Acc: 81.18%
Epoch: 02 | Epoch Time: 0m 9s
	Train Loss: 0.119 | Train Acc: 96.24%
	 Val. Loss: 0.450 |  Val. Acc: 86.73%
Epoch: 03 | Epoch Time: 0m 9s
	Train Loss: 0.088 | Train Acc: 97.22%
	 Val. Loss: 0.390 |  Val. Acc: 87.54%
Epoch: 04 | Epoch Time: 0m 9s
	Train Loss: 0.072 | Train Acc: 97.70%
	 Val. Loss: 0.351 |  Val. Acc: 88.92%
Epoch: 05 | Epoch Time: 0m 9s
	Train Loss: 0.063 | Train Acc: 97.99%
	 Val. Loss: 0.329 |  Val. Acc: 89.84%
Epoch: 06 | Epoch Time: 0m 9s
	Train Loss: 0.056 | Train Acc: 98.19%
	 Val. Loss: 0.320 |  Val. Acc: 89.41%
Epoch: 07 | Epoch Time: 0m 9s
	Train Loss: 0.051 | Train Acc: 98.35%
	 Val. Loss: 0.303 |  Val. Acc: 90.58%
Epoch: 08 | Epoch Time: 0m 9s
	Train Loss: 0.045 | Train Acc: 98.54%
	 Val. Loss: 0.284 |  Val. Acc: 91.25%
Epoch: 09 | Epoch Time: 0m 9s
	Train Loss: 0.043 | Train Acc: 98.61%
	 Val. Loss: 0.287 |  Val. Acc: 90.44%
Epoch: 10 | Epoch Time: 0m 9

In [30]:
import spacy

def tag_sentence(model, device, sentence, word_vocab, char_vocab, tag_vocab):
    
    if isinstance(sentence, str):
        nlp = spacy.load('en')
        tokens = [token.text for token in nlp(sentence)]
    else:
        tokens = sentence

    numericalized_words = [word_vocab.stoi[t] for t in tokens]

    unks = [t for t, n in zip(tokens, numericalized_words) if n == 0]
        
    chars = [list(t) for t in tokens]
    char_len = max([len(c) for c in chars])
    chars = [c + ['<pad>'] * (char_len - len(c)) for c in chars]
    

    numericalized_chars = [[char_vocab.stoi[i] for i in c] for c in chars]
    
    word_tensor = torch.LongTensor(numericalized_words)
    char_tensor = torch.LongTensor(numericalized_chars)
    
    word_tensor = word_tensor.unsqueeze(-1).to(device)
    char_tensor = char_tensor.unsqueeze(0).to(device)
    
    model.eval()
     
    predictions = model(word_tensor, char_tensor)
    
    top_predictions = predictions.argmax(-1)
    
    predicted_tags = [tag_vocab.itos[t.item()] for t in top_predictions]
    
    return tokens, predicted_tags, unks

In [31]:
example_index = 4

sentence = vars(train_data.examples[example_index])['words']
actual_tags = vars(train_data.examples[example_index])['udtags']

print(sentence)

['The', 'MoI', 'in', 'Iraq', 'is', 'equivalent', 'to', 'the', 'US', 'FBI', ',', 'so', 'this', 'would', 'be', 'like', 'having', 'J.', 'Edgar', 'Hoover', 'unwittingly', 'employ', 'at', 'a', 'high', 'level', 'members', 'of', 'the', 'Weathermen', 'bombers', 'back', 'in', 'the', '1960s', '.']


In [32]:
tokens, pred_tags, unks = tag_sentence(model, device, sentence, WORDS.vocab, CHARS.vocab, UD_TAGS.vocab)

print(unks)

['MoI', 'Edgar', 'Hoover', 'unwittingly', 'Weathermen']


In [33]:
print("Pred. Tag\tActual Tag\tCorrect?\tToken\n")

for token, pred_tag, actual_tag in zip(tokens, pred_tags, actual_tags):
    correct = '✔' if pred_tag == actual_tag else '✘'
    print(f"{pred_tag}\t\t{actual_tag}\t\t{correct}\t\t{token}")

Pred. Tag	Actual Tag	Correct?	Token

DET		DET		✔		The
PROPN		PROPN		✔		MoI
ADP		ADP		✔		in
PROPN		PROPN		✔		Iraq
AUX		AUX		✔		is
ADJ		ADJ		✔		equivalent
ADP		ADP		✔		to
DET		DET		✔		the
PROPN		PROPN		✔		US
PROPN		PROPN		✔		FBI
PUNCT		PUNCT		✔		,
ADV		ADV		✔		so
PRON		PRON		✔		this
AUX		AUX		✔		would
AUX		VERB		✘		be
SCONJ		SCONJ		✔		like
VERB		VERB		✔		having
PROPN		PROPN		✔		J.
PROPN		PROPN		✔		Edgar
PROPN		PROPN		✔		Hoover
ADV		ADV		✔		unwittingly
VERB		VERB		✔		employ
ADP		ADP		✔		at
DET		DET		✔		a
ADJ		ADJ		✔		high
NOUN		NOUN		✔		level
NOUN		NOUN		✔		members
ADP		ADP		✔		of
DET		DET		✔		the
PROPN		PROPN		✔		Weathermen
NOUN		NOUN		✔		bombers
ADV		ADV		✔		back
ADP		ADP		✔		in
DET		DET		✔		the
NOUN		NOUN		✔		1960s
PUNCT		PUNCT		✔		.


In [34]:
sentence = 'The Queen will deliver a speech about the conflict in North Korea at 1pm tomorrow.'

tokens, tags, unks = tag_sentence(model, device, sentence, WORDS.vocab, CHARS.vocab, UD_TAGS.vocab)

print(unks)

['Queen']


In [35]:
print("Pred. Tag\tToken\n")

for token, tag in zip(tokens, tags):
    print(f"{tag}\t\t{token}")

Pred. Tag	Token

DET		The
PROPN		Queen
AUX		will
VERB		deliver
DET		a
NOUN		speech
ADP		about
DET		the
NOUN		conflict
ADP		in
PROPN		North
PROPN		Korea
ADP		at
NUM		1
NOUN		pm
NOUN		tomorrow
PUNCT		.
