<a id="Bi-LSTM CRF"></a>

# Bi-LSTM CRF Model

We will leverage a Bi-LSTM CRF model as the baseline tagger.

This notebook explores the construction of the tagger.

In [1]:
from pathlib import Path
import pandas as pd

In [2]:
ROOT_DIR = Path('notebooks/eda.ipynb').resolve().parents[2]
DATA_DIR = ROOT_DIR / "data"
PREPARED_DIR = DATA_DIR / "prepared"

In [3]:
from ast import literal_eval


df = pd.read_csv(PREPARED_DIR / "master.csv")
df["tags"] = df["tags"].apply(literal_eval)
df["single_tag"] = df["tags"].apply(lambda x: x[0])
df.head()

Unnamed: 0,sentence_num,word,start_idx,end_idx,tags,single_tag,POS
0,0,This,0,4,[B-Temporal],B-Temporal,PRON
1,0,week,5,9,[I-Temporal],I-Temporal,NOUN
2,0,sees,10,14,[O],O,VERB
3,0,the,15,18,[O],O,PRON
4,0,start,19,24,[O],O,VERB


In [6]:
words = set(list(df['word'].values))
words.add('PADword')
n_words = len(words)
print(f"There are {n_words} unique words")

There are 4317 unique words


In [7]:
tags = list(set(df["single_tag"].values))
n_tags = len(tags)
print(f"There are {n_tags} unique tags")

There are 29 unique tags


In [8]:
from rlner.utils import SentenceGetter

2022-11-22 07:48:56.958626: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcuda.so.1
2022-11-22 07:48:56.958759: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:981] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-11-22 07:48:56.958873: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1561] Found device 0 with properties: 
pciBusID: 0000:41:00.0 name: NVIDIA GeForce RTX 3080 Ti computeCapability: 8.6
coreClock: 1.665GHz coreCount: 80 deviceMemorySize: 11.77GiB deviceMemoryBandwidth: 849.46GiB/s
2022-11-22 07:48:56.958969: W tensorflow/stream_executor/platform/default/dso_loader.cc:55] Could not load dynamic library 'libcudart.so.10.1'; dlerror: libcudart.so.10.1: cannot open shared object file: No such file or directory
2022-11-22 07:48:56.959011: W tensorflow/stream_executor/platform/default/dso_loader.cc:55] Could not load dynamic l

In [9]:
getter = SentenceGetter(df)
sentences = getter.sentences
print(f"There are {len(sentences)} total sentences")

There are 928 total sentences


In [10]:
# Make indices for ML modeling
words2index = {w:i for i,w in enumerate(words)}
tags2index = {t:i for i,t in enumerate(tags)}

In [11]:
from tensorflow.keras.preprocessing.sequence import pad_sequences

max_len = 50
y = [[tags2index[w[-1]] for w in s] for s in sentences]

y = pad_sequences(maxlen=max_len, sequences=y, padding="post", value=tags2index["O"])

In [12]:
y.shape

(928, 50)

In [13]:
max_len = 50
X = [[words2index[w[0]] for w in s] for s in sentences]
X = pad_sequences(maxlen=max_len, sequences=X, padding="post", value=words2index["PADword"])

In [14]:
import tensorflow as tf

x_tensor = tf.convert_to_tensor(X)
y_tensor = tf.convert_to_tensor(y)

ds = tf.data.Dataset.from_tensor_slices((x_tensor, y_tensor))

BATCH_SIZE = 64

2022-11-22 07:53:17.033752: I tensorflow/core/platform/cpu_feature_guard.cc:143] Your CPU supports instructions that this TensorFlow binary was not compiled to use: AVX2 FMA
2022-11-22 07:53:17.063837: I tensorflow/core/platform/profile_utils/cpu_utils.cc:102] CPU Frequency: 3492885000 Hz
2022-11-22 07:53:17.065314: I tensorflow/compiler/xla/service/service.cc:168] XLA service 0x7f2188000b70 initialized for platform Host (this does not guarantee that XLA will be used). Devices:
2022-11-22 07:53:17.065356: I tensorflow/compiler/xla/service/service.cc:176]   StreamExecutor device (0): Host, Default Version
2022-11-22 07:53:17.067158: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1102] Device interconnect StreamExecutor with strength 1 edge matrix:
2022-11-22 07:53:17.067174: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1108]      


In [15]:
def get_dataset_partitions_tf(
    ds, 
    ds_size, 
    train_split=0.8, 
    val_split=0.1, 
    test_split=0.1, 
    shuffle=True, 
    shuffle_size=1000
):
    assert (train_split + test_split + val_split) == 1

    if shuffle:
        ds = ds.shuffle(shuffle_size, seed=42)

    train_size = int(train_split * ds_size)
    val_size = int(val_split * ds_size)
    
    train_ds = ds.take(train_size)    
    val_ds = ds.skip(train_size).take(val_size)
    test_ds = ds.skip(train_size).skip(val_size)
    
    return train_ds.batch(BATCH_SIZE), val_ds.batch(BATCH_SIZE), test_ds.batch(BATCH_SIZE)

In [16]:
train_ds, val_ds, test_ds = get_dataset_partitions_tf(
    ds=ds,
    ds_size=X.shape[0],
)

In [17]:
import io
from pathlib import Path
import numpy as np
import tensorflow as tf

In [18]:
import tensorflow as tf
import tensorflow.keras.backend as K
import tensorflow.keras.layers as L
from tensorflow_addons.text import crf_log_likelihood, crf_decode

"""
Credit:
https://github.com/ngoquanghuy99/POS-Tagging-BiLSTM-CRF
"""

class CRF(L.Layer):
    def __init__(self,
                 output_dim,
                 sparse_target=True,
                 **kwargs):
        """    
        Args:
            output_dim (int): the number of labels to tag each temporal input.
            sparse_target (bool): whether the the ground-truth label represented in one-hot.
        Input shape:
            (batch_size, sentence length, output_dim)
        Output shape:
            (batch_size, sentence length, output_dim)
        """
        super(CRF, self).__init__(**kwargs)
        self.output_dim = int(output_dim) 
        self.sparse_target = sparse_target
        self.input_spec = L.InputSpec(min_ndim=3)
        self.supports_masking = False
        self.sequence_lengths = None
        self.transitions = None

    def build(self, input_shape):
        assert len(input_shape) == 3
        f_shape = tf.TensorShape(input_shape)
        input_spec = L.InputSpec(min_ndim=3, axes={-1: f_shape[-1]})

        if f_shape[-1] is None:
            raise ValueError('The last dimension of the inputs to `CRF` '
                             'should be defined. Found `None`.')
        if f_shape[-1] != self.output_dim:
            raise ValueError('The last dimension of the input shape must be equal to output'
                             ' shape. Use a linear layer if needed.')
        self.input_spec = input_spec
        self.transitions = self.add_weight(name='transitions',
                                           shape=[self.output_dim, self.output_dim],
                                           initializer='glorot_uniform',
                                           trainable=True)
        self.built = True

    def compute_mask(self, inputs, mask=None):
        # Just pass the received mask from previous layer, to the next layer or
        # manipulate it if this layer changes the shape of the input
        return mask

    def call(self, inputs, sequence_lengths=None, training=None, **kwargs):
        sequences = tf.convert_to_tensor(inputs, dtype=self.dtype)
        if sequence_lengths is not None:
            assert len(sequence_lengths.shape) == 2
            assert tf.convert_to_tensor(sequence_lengths).dtype == 'int32'
            seq_len_shape = tf.convert_to_tensor(sequence_lengths).get_shape().as_list()
            assert seq_len_shape[1] == 1
            self.sequence_lengths = K.flatten(sequence_lengths)
        else:
            self.sequence_lengths = tf.ones(tf.shape(inputs)[0], dtype=tf.int32) * (
                tf.shape(inputs)[1]
            )

        viterbi_sequence, _ = crf_decode(sequences,
                                         self.transitions,
                                         self.sequence_lengths)
        output = K.one_hot(viterbi_sequence, self.output_dim)
        return K.in_train_phase(sequences, output)

    @property
    def loss(self):
        def crf_loss(y_true, y_pred):
            y_pred = tf.convert_to_tensor(y_pred, dtype=self.dtype)
            log_likelihood, self.transitions = crf_log_likelihood(
                y_pred,
                tf.cast(K.argmax(y_true), dtype=tf.int32) if self.sparse_target else y_true,
                self.sequence_lengths,
                transition_params=self.transitions,
            )
            return tf.reduce_mean(-log_likelihood)
        return crf_loss

    @property
    def accuracy(self):
        def viterbi_accuracy(y_true, y_pred):
            # -1e10 to avoid zero at sum(mask)
            mask = K.cast(
                K.all(K.greater(y_pred, -1e10), axis=2), K.floatx())
            shape = tf.shape(y_pred)
            sequence_lengths = tf.ones(shape[0], dtype=tf.int32) * (shape[1])
            y_pred, _ = crf_decode(y_pred, self.transitions, sequence_lengths)
            if self.sparse_target:
                y_true = K.argmax(y_true, 2)
            y_pred = K.cast(y_pred, 'int32')
            y_true = K.cast(y_true, 'int32')
            corrects = K.cast(K.equal(y_true, y_pred), K.floatx())
            return K.sum(corrects * mask) / K.sum(mask)
        return viterbi_accuracy

    @property
    def f1(self):
        def crf_f1(y_true, y_pred):
            shape = tf.shape(y_pred)
            sequence_lengths = tf.ones(shape[0], dtype=tf.int32) * (shape[1])
            y_pred, _ = crf_decode(y_pred, self.transitions, sequence_lengths)
            true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
            possible_positives = K.sum(K.round(K.clip(y_true, 0, 1)))
            predicted_positives = K.sum(K.round(K.clip(y_pred, 0, 1)))
            precision = true_positives / (predicted_positives + K.epsilon())
            recall = true_positives / (possible_positives + K.epsilon())
            f1_val = 2*(precision*recall)/(precision+recall+K.epsilon())
            return f1_val
        return crf_f1

    def compute_output_shape(self, input_shape):
        tf.TensorShape(input_shape).assert_has_rank(3)
        return input_shape[:2] + (self.output_dim,)

    def get_config(self):
        config = {
            'output_dim': self.output_dim,
            'sparse_target': self.sparse_target,
            'supports_masking': self.supports_masking,
            'transitions': K.eval(self.transitions)
        }
        base_config = super(CRF, self).get_config()
        return dict(base_config, **config)


In [19]:
EMBEDDING_DIR = DATA_DIR / "embeddings"

In [20]:
# standard libaries
import io
from pathlib import Path
from typing import Dict, Tuple

# third party libraries
import numpy as np
import tensorflow.keras.layers as layers
import tensorflow.keras.models as models


def create_model(
    vocab_size: int, max_length: int, embedding_dim: int, word_index: Dict[str, int], tag_index: Dict[str, int]
) -> Tuple[models.Model]:
    """Create Bi-LSTM CRF model in tensorflow.

    Model1 is the trainable model. Model2 is for predictions and returns:
    [predicted labels, LSTM hidden state (Forward and backward), LSTM cell state (forward and backward), embeddings]

    This is leveraged to build the REINFORCE states.

    Adapted from:
    https://github.com/ngoquanghuy99/POS-Tagging-BiLSTM-CRF

    Args:
        vocab_size (int): Size of vocabulary
        max_length (int): Max sequence length
        embedding_dim (int): Size of embedding. Make sure to match size of GloVe embedding.
        word_index (Dict[str, int]): Index mapping words to ints
        tag_index (Dict[str, int]): Index mapping tokens to ints

    Returns:
        Tuple[Model]: Compiled Model and Non-compiled Model
        with exposed LSTM and embedding layers
    """

    embeddings_index = {}
    with io.open(EMBEDDING_DIR / "glove.6B.100d.txt", "r", encoding="utf-8") as f:
        for line in f:
            values = line.strip().split()
            curr_word = values[0]
            coefs = np.asarray(values[1:], dtype="float64")
            embeddings_index[curr_word] = coefs
        embeddings_matrix = np.zeros((vocab_size, embedding_dim))
        for word, i in word_index.items():
            if i > vocab_size:
                continue
            embedding_vector = embeddings_index.get(word)
            if embedding_vector is not None:
                embeddings_matrix[i] = embedding_vector

    inputs = layers.Input(shape=(max_length, ))

    embeddings = layers.Embedding(
        input_dim=vocab_size,
        output_dim=embedding_dim,
        input_length=max_length,
        weights=[embeddings_matrix],
        mask_zero=True
    )(inputs)

    lstm_out, sh_fw, sc_fw, sh_bw, sc_bw = layers.Bidirectional(
        layers.LSTM(
            units=embedding_dim, return_sequences=True, return_state=True, recurrent_dropout=0.01
        )
    )(embeddings)

    time_dist = layers.TimeDistributed(layers.Dense(len(tag_index)))(lstm_out)
    
    crf = CRF(len(tag_index), sparse_target=False)
    pred = crf(time_dist)

    model1 = models.Model(inputs=[inputs], outputs=[pred])
    model2 = models.Model(inputs=[inputs], outputs=[pred, lstm_out, sh_fw, sc_fw, sh_bw, sc_bw, embeddings])

    model1.compile(optimizer="adam", loss=crf.loss, metrics=[crf.accuracy])
    model1.summary()

    return model1, model2

In [21]:
import shutil

log_dir = ROOT_DIR / "models/logs/"

if any(log_dir.iterdir()):
    for i in log_dir.glob("**/*"):
        if i.is_dir():
            shutil.rmtree(i)
        else:
            i.unlink()

In [22]:
model1, model2 = create_model(
    vocab_size=len(words2index),
    max_length=50,
    embedding_dim=100,
    word_index=words2index,
    tag_index=tags2index
)

Model: "model"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         [(None, 50)]              0         
_________________________________________________________________
embedding (Embedding)        (None, 50, 100)           431700    
_________________________________________________________________
bidirectional (Bidirectional [(None, 50, 200), (None,  160800    
_________________________________________________________________
time_distributed (TimeDistri (None, 50, 29)            5829      
_________________________________________________________________
crf (CRF)                    (None, 50, 29)            841       
Total params: 599,170
Trainable params: 599,170
Non-trainable params: 0
_________________________________________________________________


In [23]:
callback = tf.keras.callbacks.EarlyStopping(monitor="loss", patience=3)
tensorboard_callback = tf.keras.callbacks.TensorBoard(
    log_dir=log_dir, 
    histogram_freq=1,
)


history = model1.fit(
    train_ds,
    epochs=100, 
    verbose=1,
    validation_data=val_ds,
    callbacks=[callback, tensorboard_callback],
)

2022-11-22 07:53:43.499089: I tensorflow/core/profiler/lib/profiler_session.cc:159] Profiler session started.
2022-11-22 07:53:43.499164: I tensorflow/core/profiler/internal/gpu/cupti_tracer.cc:1363] Profiler found 1 GPUs
2022-11-22 07:53:43.499380: W tensorflow/stream_executor/platform/default/dso_loader.cc:55] Could not load dynamic library 'libcupti.so.10.1'; dlerror: libcupti.so.10.1: cannot open shared object file: No such file or directory
2022-11-22 07:53:43.499396: E tensorflow/core/profiler/internal/gpu/cupti_tracer.cc:1408] function cupti_interface_->Subscribe( &subscriber_, (CUpti_CallbackFunc)ApiCallback, this)failed with error CUPTI could not be loaded or symbol could not be found.
2022-11-22 07:53:43.499404: E tensorflow/core/profiler/internal/gpu/cupti_tracer.cc:1447] function cupti_interface_->ActivityRegisterCallbacks( AllocCuptiActivityBuffer, FreeCuptiActivityBuffer)failed with error CUPTI could not be loaded or symbol could not be found.
2022-11-22 07:53:43.499419: 

Epoch 1/100
Epoch 2/100

2022-11-22 07:53:52.035400: E tensorflow/core/profiler/internal/gpu/cupti_tracer.cc:1430] function cupti_interface_->EnableCallback( 0 , subscriber_, CUPTI_CB_DOMAIN_DRIVER_API, cbid)failed with error CUPTI could not be loaded or symbol could not be found.
2022-11-22 07:53:52.485864: I tensorflow/core/profiler/internal/gpu/device_tracer.cc:216]  GpuTracer has collected 0 callback api events and 0 activity events.
2022-11-22 07:53:55.122178: I tensorflow/core/profiler/rpc/client/save_profile.cc:168] Creating directory: /home/jdoe/repos/RLNER/models/logs/train/plugins/profile/2022_11_22_07_53_53
2022-11-22 07:53:57.526049: I tensorflow/core/profiler/rpc/client/save_profile.cc:174] Dumped gzipped tool data for trace.json.gz to /home/jdoe/repos/RLNER/models/logs/train/plugins/profile/2022_11_22_07_53_53/jdoe-X399-DESIGNARE-EX.trace.json.gz
2022-11-22 07:53:58.219537: E tensorflow/core/profiler/utils/hardware_type_utils.cc:60] Invalid GPU compute capability.
2022-11-22 07:53:58.446264: I te

Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78/100
Epoch 79/100


In [26]:
history.history["val_loss"][-1]

115.02660369873047

The baseline Bi-LSTM CRF model has compiled, trained, and tested on the Re3d dataset successfully! Achieving ~99.00% Accuracy on the test set, which is pretty good, as only ~68% of the tags are "O" tags.

In [43]:
model1.evaluate(test_ds)



[125.740234375, 0.6221145391464233]

`create_model` creates two models, one which is trainable and one which is used for prediction. This model outputs:
* predictions
* LSTM hidden state forward pass
* LSTM cell state forward pass
* LSTM hidden state backward pass
* LSTM cell state backward pass
* Embeddings

This is leveraged in construction of the REINFORCE states.

In [44]:
preds = model2.predict(test_ds)

In [45]:
len(preds)

7

In [46]:
# The LSTM hidden state forward pass of the first test sequence
preds[1][0]

array([[-0.37701362, -0.06361421,  0.13121796, ...,  0.7053516 ,
         0.81849736,  0.3800589 ],
       [-0.49132574, -0.10563394,  0.28375712, ...,  0.06301506,
         0.6918716 ,  0.69020206],
       [-0.8381783 , -0.30090937,  0.3310538 , ...,  0.9143059 ,
         0.773429  ,  0.20847169],
       ...,
       [-0.9551602 , -0.397706  ,  0.87192845, ...,  0.28784758,
         0.21293767,  0.15012676],
       [-0.9551803 , -0.3977052 ,  0.871945  , ...,  0.15740854,
         0.1107231 ,  0.07684665],
       [-0.95519596, -0.3977045 ,  0.871959  , ...,  0.06573694,
         0.0415291 ,  0.02455554]], dtype=float32)

In [47]:
pred = model2.predict(tf.convert_to_tensor(X[0:1]))

In [60]:
pred[1][0].shape

(50, 200)

In [28]:
pred[0][0]

array([[0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       ...,
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.]], dtype=float32)

In [29]:
X[0]

array([3033, 1946, 2120, 2021,  571, 3006, 4284, 3935,  542, 3006,    8,
        844, 2021, 2674, 3006, 2021,  350, 3587, 2313,  185, 3457, 3791,
       2021, 3878, 3006, 4065, 2709, 1635,  930,  921, 1069, 1025, 2149,
       4135, 2111,   24, 2944,  604,  522, 4011, 2021, 1825, 4141, 2634,
       1316, 1316, 1316, 1316, 1316, 1316], dtype=int32)

In [30]:
index2word = {idx: word for word, idx in words2index.items()}
index2tag = {idx: tag for tag, idx in tags2index.items()}

In [31]:
for pred, word_idx in zip(pred[0][0], X[0]):
    word = index2word[word_idx]
    tag_idx = np.argmax(pred)
    tag = index2tag[tag_idx]
    print(f"Word: {word} \tTag: {tag}")

Word: This 	Tag: B-Temporal
Word: week 	Tag: I-Temporal
Word: sees 	Tag: O
Word: the 	Tag: O
Word: start 	Tag: O
Word: of 	Tag: O
Word: a 	Tag: O
Word: second 	Tag: O
Word: round 	Tag: O
Word: of 	Tag: O
Word: talks 	Tag: O
Word: in 	Tag: O
Word: the 	Tag: O
Word: framework 	Tag: O
Word: of 	Tag: O
Word: the 	Tag: O
Word: European 	Tag: B-Organisation
Word: Union 	Tag: I-Organisation
Word: 's 	Tag: O
Word: Regional 	Tag: O
Word: Initiative 	Tag: O
Word: on 	Tag: O
Word: the 	Tag: O
Word: future 	Tag: O
Word: of 	Tag: O
Word: Syria 	Tag: B-Location
Word: , 	Tag: O
Word: High 	Tag: B-Person
Word: Representative 	Tag: I-Person
Word: Federica 	Tag: I-Person
Word: Mogherini 	Tag: I-Person
Word: ’s 	Tag: I-Person
Word: initiative 	Tag: O
Word: to 	Tag: O
Word: identify 	Tag: O
Word: post 	Tag: O
Word: - 	Tag: O
Word: conflict 	Tag: O
Word: arrangements 	Tag: O
Word: for 	Tag: O
Word: the 	Tag: B-Location
Word: country 	Tag: I-Location
Word: . 	Tag: O
Word: 
 	Tag: O
Word: PADword 	Tag: O
Wor

In [32]:
sentences[0]

[('This', 'PRON', 'B-Temporal'),
 ('week', 'NOUN', 'I-Temporal'),
 ('sees', 'VERB', 'O'),
 ('the', 'PRON', 'O'),
 ('start', 'VERB', 'O'),
 ('of', 'ADP', 'O'),
 ('a', 'PRON', 'O'),
 ('second', 'ADJ', 'O'),
 ('round', 'ADJ', 'O'),
 ('of', 'ADP', 'O'),
 ('talks', 'NOUN', 'O'),
 ('in', 'ADP', 'O'),
 ('the', 'PRON', 'O'),
 ('framework', 'NOUN', 'O'),
 ('of', 'ADP', 'O'),
 ('the', 'PRON', 'O'),
 ('European', 'ADJ', 'B-Organisation'),
 ('Union', 'NOUN', 'I-Organisation'),
 ("'s", 'AUX', 'O'),
 ('Regional', 'ADJ', 'O'),
 ('Initiative', 'NOUN', 'O'),
 ('on', 'ADP', 'O'),
 ('the', 'PRON', 'O'),
 ('future', 'NOUN', 'O'),
 ('of', 'ADP', 'O'),
 ('Syria', 'PROPN', 'B-Location'),
 (',', 'PUNCT', 'O'),
 ('High', 'ADJ', 'B-Person'),
 ('Representative', 'ADJ', 'I-Person'),
 ('Federica', 'PROPN', 'I-Person'),
 ('Mogherini', 'PROPN', 'I-Person'),
 ('’s', 'VERB', 'I-Person'),
 ('initiative', 'NOUN', 'O'),
 ('to', 'PART', 'O'),
 ('identify', 'VERB', 'O'),
 ('post', 'ADV', 'O'),
 ('-', 'PUNCT', 'O'),
 ('conf