In [1]:
import numpy as np
import os
import re

In [2]:
import pickle as pkl
import itertools
import codecs
import time
import copy
import random
import sys

In [3]:
START_TAG = '<START>'
STOP_TAG = '<STOP>'

def pad_seq(seq, max_length, PAD_token=0):
    
    seq += [PAD_token for i in range(max_length - len(seq))]
    return seq

def create_dico(item_list):
    """
    Create a dictionary of items from a list of list of items.
    """
    assert type(item_list) is list
    dico = {}
    for items in item_list:
        for item in items:
            if item not in dico:
                dico[item] = 1
            else:
                dico[item] += 1
    return dico

def create_mapping(dico):
    """
    Create a mapping (item to ID / ID to item) from a dictionary.
    Items are ordered by decreasing frequency.
    """
    sorted_items = sorted(dico.items(), key=lambda x: (-x[1], x[0]))
    id_to_item = {i: v[0] for i, v in enumerate(sorted_items)}
    item_to_id = {v: k for k, v in id_to_item.items()}
    return item_to_id, id_to_item

def tag_mapping(sentences):
    """
    Create a dictionary and a mapping of tags, sorted by frequency.
    """
    tags = [s[2] for s in sentences]
    dico = create_dico(tags)
    dico[START_TAG] = -1
    dico[STOP_TAG] = -2
    tag_to_id, id_to_tag = create_mapping(dico)
    print("Found %i unique named entity tags" % len(dico))
    return dico, tag_to_id, id_to_tag

def word_mapping(sentences):

    words = [[str(x).lower() for x in s[0]] for s in sentences]
    dico = create_dico(words)

    dico['<PAD>'] = 10000001
    dico['<UNK>'] = 10000000
    dico = {k:v for k,v in dico.items() if v>=3}
    word_to_id, id_to_word = create_mapping(dico)

    print("Found %i unique words (%i in total)" % (
        len(dico), sum(len(x) for x in words)
    ))
    return dico, word_to_id, id_to_word

def augment_with_pretrained(dictionary, ext_emb_path, words):
    """
    Augment the dictionary with words that have a pretrained embedding.
    If `words` is None, we add every word that has a pretrained embedding
    to the dictionary, otherwise, we only add the words that are given by
    `words` (typically the words in the development and test sets.)
    """
    print('Loading pretrained embeddings from %s...' % ext_emb_path)
    assert os.path.isfile(ext_emb_path)

    # Load pretrained embeddings from file
    pretrained = set([
        line.rstrip().split()[0].strip()
        for line in codecs.open(ext_emb_path, 'r', 'utf-8')
        if len(ext_emb_path) > 0
    ])
    
    if words is None:
        for word in pretrained:
            if word not in dictionary:
                dictionary[word] = 0
    else:
        for word in words:
            if any(x in pretrained for x in [
                word,
                word.lower(),
                re.sub('\d', '0', word.lower())
            ]) and word not in dictionary:
                dictionary[word] = 0

    word_to_id, id_to_word = create_mapping(dictionary)
    return dictionary, word_to_id, id_to_word

def cap_feature(s):
    """
    Capitalization feature:
    0 = low caps
    1 = all caps
    2 = first letter caps
    3 = one capital (not first letter)
    """
    if s.lower() == s:
        return 0
    elif s.upper() == s:
        return 1
    elif s[0].upper() == s[0]:
        return 2
    else:
        return 3

def prepare_dataset(sentences, word_to_id, tag_to_id):
    """
    Prepare the dataset. Return a list of lists of dictionaries containing:
        - word indexes
        - word char indexes
        - tag indexes
    """
    def f(x): return x.lower()
    data = []
    for s in sentences:
        str_words = [str(x) for x in s[0]]
        words = [word_to_id[f(w) if f(w) in word_to_id else '<UNK>']
                 for w in str_words]
        caps = [cap_feature(w) for w in str_words]
        tags = [tag_to_id[t] for t in s[2]]
        verbs = [v for v in s[1]]
        data.append({
            'str_words': str_words,
            'words': words,
            'caps': caps,
            'tags': tags,
            'verbs': verbs
        })
    return data

In [4]:
def load_consrl(train_data, val_data, pretrained='wordvectors/glove.6B.100d.txt', word_dim = 100):
    
    dico_words_train, _, _ = word_mapping(train_data)
    dico_tags, tag_to_id, id_to_tag = tag_mapping(train_data+val_data)
    
    dico_words, word_to_id, id_to_word = augment_with_pretrained(
                                         dico_words_train.copy(), pretrained,
                                         list(itertools.chain.from_iterable(
                                         [[str(x).lower() for x in s[0]] for s in val_data])))
    
    train_data = prepare_dataset(train_data, word_to_id, tag_to_id)
    dev_data = prepare_dataset(val_data, word_to_id, tag_to_id)
    
    print("%i / %i sentences in train / dev." % (len(train_data), len(dev_data)))
    
    all_word_embeds = {}
    for i, line in enumerate(codecs.open(pretrained, 'r', 'utf-8')):
        s = line.strip().split()
        if len(s) == word_dim + 1:
            all_word_embeds[s[0]] = np.array([float(i) for i in s[1:]])

    word_embeds = np.random.uniform(-np.sqrt(0.06), np.sqrt(0.06), (len(word_to_id), word_dim))

    for w in word_to_id:
        if w in all_word_embeds:
            word_embeds[word_to_id[w]] = all_word_embeds[w]
        elif w.lower() in all_word_embeds:
            word_embeds[word_to_id[w]] = all_word_embeds[w.lower()]

    print('Loaded %i pretrained embeddings.' % len(all_word_embeds))
    
    mappings = {
        'word_to_id': word_to_id,
        'id_to_word': id_to_word,
        'tag_to_id': tag_to_id,
        'id_to_tag': id_to_tag,
        'word_embeds': word_embeds
    }
        
    return train_data, dev_data, mappings

In [5]:
def log_sum_exp(vec, dim=-1, keepdim = False):
    max_score, _ = vec.max(dim, keepdim=keepdim)
    if keepdim:
        stable_vec = vec - max_score
    else:
        stable_vec = vec - max_score.unsqueeze(dim)
    output = max_score + (stable_vec.exp().sum(dim, keepdim=keepdim)).log()
    return output

def create_batches(dataset, batch_size, order='keep', str_words=False, tag_padded= True):

    newdata = copy.deepcopy(dataset)
    if order=='sort':
        newdata.sort(key = lambda x:len(x['words']))
    elif order=='random':
        random.shuffle(newdata)

    newdata = np.array(newdata)  
    batches = []
    num_batches = np.ceil(len(dataset)/float(batch_size)).astype('int')

    for i in range(num_batches):
        batch_data = newdata[(i*batch_size):min(len(dataset),(i+1)*batch_size)]

        words_seqs = [itm['words'] for itm in batch_data]
        caps_seqs = [itm['caps'] for itm in batch_data]
        verbs_seqs = [itm['verbs'] for itm in batch_data]
        target_seqs = [itm['tags'] for itm in batch_data]
        str_words_seqs = [itm['str_words'] for itm in batch_data]

        seq_pairs = sorted(zip(words_seqs, caps_seqs, target_seqs, verbs_seqs, str_words_seqs), 
                           key=lambda p: len(p[0]), reverse=True)

        words_seqs, caps_seqs, target_seqs, verbs_seqs, str_words_seqs = zip(*seq_pairs)
        words_lengths = np.array([len(s) for s in words_seqs])

        words_padded = np.array([pad_seq(s, np.max(words_lengths)) for s in words_seqs])
        caps_padded = np.array([pad_seq(s, np.max(words_lengths)) for s in caps_seqs])
        verbs_padded = np.array([pad_seq(s, np.max(words_lengths)) for s in verbs_seqs])

        if tag_padded:
            target_padded = np.array([pad_seq(s, np.max(words_lengths)) for s in target_seqs])
        else:
            target_padded = target_seqs

        words_mask = (words_padded!=0).astype('int')

        if str_words:
            outputdict = {'words':words_padded, 'caps':caps_padded, 'tags': target_padded, 
                          'verbs': verbs_padded, 'wordslen': words_lengths, 'tagsmask':words_mask, 
                          'str_words': str_words_seqs}
        else:
            outputdict = {'words':words_padded, 'caps':caps_padded, 'tags': target_padded, 
                          'verbs': verbs_padded, 'wordslen': words_lengths, 'tagsmask':words_mask}

        batches.append(outputdict)

    return batches

In [6]:
class Initializer(object):
    
    def __init__(self):
        pass
    
    def init_embedding(self, input_embedding):
        bias = np.sqrt(3.0 / input_embedding.size(1))
        nn.init.uniform(input_embedding, -bias, bias)
    
    def init_linear(self, input_linear):
        bias = np.sqrt(6.0 / (input_linear.weight.size(0) + input_linear.weight.size(1)))
        nn.init.uniform(input_linear.weight, -bias, bias)
        if input_linear.bias is not None:
            input_linear.bias.data.zero_()
    
    def init_lstm(self, input_lstm):
        for ind in range(0, input_lstm.num_layers):
            weight = eval('input_lstm.weight_ih_l' + str(ind))
            bias = np.sqrt(6.0 / (weight.size(0) / 4 + weight.size(1)))
            nn.init.uniform(weight, -bias, bias)
            weight = eval('input_lstm.weight_hh_l' + str(ind))
            bias = np.sqrt(6.0 / (weight.size(0) / 4 + weight.size(1)))
            nn.init.uniform(weight, -bias, bias)
        
        if input_lstm.bidirectional:
            for ind in range(0, input_lstm.num_layers):
                weight = eval('input_lstm.weight_ih_l' + str(ind) + '_reverse')
                bias = np.sqrt(6.0 / (weight.size(0) / 4 + weight.size(1)))
                nn.init.uniform(weight, -bias, bias)
                weight = eval('input_lstm.weight_hh_l' + str(ind) + '_reverse')
                bias = np.sqrt(6.0 / (weight.size(0) / 4 + weight.size(1)))
                nn.init.uniform(weight, -bias, bias)
        
        if input_lstm.bias:
            
            for ind in range(0, input_lstm.num_layers):
                weight = eval('input_lstm.bias_ih_l' + str(ind))
                weight.data.zero_()
                weight.data[input_lstm.hidden_size: 2 * input_lstm.hidden_size] = 1
                weight = eval('input_lstm.bias_hh_l' + str(ind))
                weight.data.zero_()
                weight.data[input_lstm.hidden_size: 2 * input_lstm.hidden_size] = 1
            
            if input_lstm.bidirectional:
                for ind in range(0, input_lstm.num_layers):
                    weight = eval('input_lstm.bias_ih_l' + str(ind) + '_reverse')
                    weight.data.zero_()
                    weight.data[input_lstm.hidden_size: 2 * input_lstm.hidden_size] = 1
                    weight = eval('input_lstm.bias_hh_l' + str(ind) + '_reverse')
                    weight.data.zero_()
                    weight.data[input_lstm.hidden_size: 2 * input_lstm.hidden_size] = 1

In [7]:
import torch
import torch.nn as nn
from torch.autograd import Variable

class baseRNN(nn.Module):

    def __init__(self, vocab_size, hidden_size, input_dropout_p, output_dropout_p, n_layers, rnn_cell, max_len=25):
        super(baseRNN, self).__init__()
        
        self.vocab_size = vocab_size
        self.hidden_size = hidden_size
        self.n_layers = n_layers
        self.max_len = max_len
        
        self.input_dropout_p = input_dropout_p
        self.output_dropout_p = output_dropout_p
        
        if rnn_cell.lower() == 'lstm':
            self.rnn_cell = nn.LSTM
        elif rnn_cell.lower() == 'gru':
            self.rnn_cell = nn.GRU
        else:
            raise ValueError("Unsupported RNN Cell: {0}".format(rnn_cell))

        self.input_dropout = nn.Dropout(p=input_dropout_p)

    def forward(self, *args, **kwargs):
        raise NotImplementedError()

class WordEncoderRNN(baseRNN):

    def __init__(self, vocab_size, embedding_size ,hidden_size, verb_size, cap_size, input_dropout_p=0.5, 
                 output_dropout_p=0, n_layers=1, bidirectional=True, rnn_cell='lstm'):
        
        super(WordEncoderRNN, self).__init__(vocab_size, hidden_size, input_dropout_p, 
                                             output_dropout_p, n_layers, rnn_cell)

        self.embedding = nn.Embedding(vocab_size, embedding_size)
        
        augmented_embedding_size = embedding_size + verb_size + cap_size
        self.rnn = self.rnn_cell(augmented_embedding_size, hidden_size, n_layers,
                                 bidirectional=bidirectional, dropout=output_dropout_p,
                                 batch_first=True)

    def forward(self, words, verb_embedding, cap_embedding, input_lengths):
        
        embedded = self.embedding(words)
        if cap_embedding is not None:
            embedded = torch.cat((embedded,verb_embedding,cap_embedding),2)  
        else:
            embedded = torch.cat((embedded,verb_embedding),2)
    
        embedded = self.input_dropout(embedded)
        embedded = nn.utils.rnn.pack_padded_sequence(embedded, input_lengths, batch_first= True)
        output, _ = self.rnn(embedded)
        output, _ = nn.utils.rnn.pad_packed_sequence(output, batch_first= True)
        
        return output
    
class DecoderCRF(nn.Module):

    def __init__(self, input_dimension, tag_to_ix, input_dropout_p=0.5):
        
        super(DecoderCRF, self).__init__()
        
        self.tag_to_ix = tag_to_ix
        self.tagset_size = len(tag_to_ix)
        
        self.dropout = nn.Dropout(input_dropout_p)
        self.hidden2tag = nn.Linear(input_dimension, self.tagset_size)
        
        self.transitions = nn.Parameter(torch.zeros(self.tagset_size, self.tagset_size))
        self.transitions.data[tag_to_ix[START_TAG], :] = -10000
        self.transitions.data[:, tag_to_ix[STOP_TAG]] = -10000
    
    def viterbi_decode(self, feats, mask ,usecuda = True, score_only= False):
    
        batch_size, sequence_len, num_tags = feats.size()
        
        assert num_tags == self.tagset_size
        
        mask = mask.transpose(0, 1).contiguous()
        feats = feats.transpose(0, 1).contiguous()
        
        backpointers = []
        
        all_forward_vars = Variable(torch.Tensor(sequence_len, 
                                    batch_size, num_tags).fill_(0.)).cuda()
        
        init_vars = torch.Tensor(batch_size, num_tags).fill_(-10000.)
        init_vars[:,self.tag_to_ix[START_TAG]] = 0.
        if usecuda:
            forward_var = Variable(init_vars).cuda()
        else:
            forward_var = Variable(init_vars)
        
        for i in range(sequence_len):
            broadcast_forward = forward_var.view(batch_size, 1, num_tags)
            transition_scores = self.transitions.view(1, num_tags, num_tags)
            
            next_tag_var = broadcast_forward + transition_scores
            
            viterbivars_t, bptrs_t = torch.max(next_tag_var, dim=2)
            
            forward_var = viterbivars_t + feats[i]
            all_forward_vars[i,:,:] = forward_var

            bptrs_t = bptrs_t.squeeze().data.cpu().numpy()
            backpointers.append(bptrs_t)
        
        mask_sum = torch.sum(mask, dim = 0, keepdim =True) - 1
        mask_sum_ex = mask_sum.view(1, batch_size, 1).expand(1, batch_size, num_tags)
        final_forward_var = all_forward_vars.gather(0, mask_sum_ex).squeeze(0)
        
        terminal_var = final_forward_var + self.transitions[self.tag_to_ix[STOP_TAG]].view(1, num_tags)
        terminal_var.data[:,self.tag_to_ix[STOP_TAG]] = -10000.
        terminal_var.data[:,self.tag_to_ix[START_TAG]] = -10000.
        
        path_score, best_tag_id = torch.max(terminal_var, dim = 1)
                
        if score_only:
            return path_score
        
        n_mask_sum = mask_sum.squeeze().data.cpu().numpy() + 1
        best_tag_id = best_tag_id.data.cpu().numpy()
        decoded_tags = []
        for i in range(batch_size):
            best_path = [best_tag_id[i]]
            bp_list = reversed([itm[i] for itm in backpointers[:n_mask_sum[i]]])
            for bptrs_t in bp_list:
                best_tag_id[i] = bptrs_t[best_tag_id[i]]
                best_path.append(best_tag_id[i])
            start = best_path.pop()
            assert start == self.tag_to_ix[START_TAG]
            best_path.reverse()
            decoded_tags.append(best_path)
        
        return path_score, decoded_tags
    
    def crf_forward(self, feats, mask, usecuda=True):
        
        batch_size, sequence_length, num_tags = feats.size()
        
        mask = mask.float().transpose(0, 1).contiguous()
        feats = feats.transpose(0, 1).contiguous()
        
        init_alphas = torch.Tensor(batch_size, num_tags).fill_(-10000.)
        init_alphas[:,self.tag_to_ix[START_TAG]] = 0.
        if usecuda:
            forward_var = Variable(init_alphas).cuda()
        else:
            forward_var = Variable(init_alphas)
        
        for i in range(sequence_length):
            emit_score = feats[i].view(batch_size, num_tags, 1)
            transition_scores = self.transitions.view(1, num_tags, num_tags)
            broadcast_forward = forward_var.view(batch_size, 1, num_tags)
            tag_var = broadcast_forward + transition_scores + emit_score 
            
            forward_var = (log_sum_exp(tag_var, dim = 2) * mask[i].view(batch_size, 1) +
                            forward_var * (1 - mask[i]).view(batch_size, 1))
            
        terminal_var = (forward_var + (self.transitions[self.tag_to_ix[STOP_TAG]]).view(1, -1))
        alpha = log_sum_exp(terminal_var, dim = 1)
        
        return alpha
        
    
    def score_sentence(self, feats, tags, mask, usecuda=True):
                
        batch_size, sequence_length, num_tags = feats.size()
        
        feats = feats.transpose(0, 1).contiguous()
        tags = tags.transpose(0, 1).contiguous()
        mask = mask.float().transpose(0, 1).contiguous()
                
        broadcast_transitions = self.transitions.view(1, num_tags, num_tags).expand(batch_size, num_tags, num_tags)
        
        score = self.transitions[:,self.tag_to_ix[START_TAG]].index_select(0, tags[0])
        
        for i in range(sequence_length - 1):
            current_tag, next_tag = tags[i], tags[i+1]
            
            transition_score = (
                     broadcast_transitions
                    .gather(1, next_tag.view(batch_size, 1, 1).expand(batch_size, 1, num_tags))
                    .squeeze(1)
                    .gather(1, current_tag.view(batch_size, 1))
                    .squeeze(1)
                    )

            emit_score = feats[i].gather(1, current_tag.view(batch_size, 1)).squeeze(1)

            score = score + transition_score* mask[i + 1] + emit_score * mask[i]  
        last_tag_index = mask.sum(0).long() - 1

        last_tags = tags.gather(0, last_tag_index.view(1, batch_size).expand(sequence_length, batch_size))
        last_tags = last_tags[0]

        last_transition_score = self.transitions[self.tag_to_ix[STOP_TAG]].index_select(0, last_tags)
        
        last_inputs = feats[-1]                                     
        last_input_score = last_inputs.gather(1, last_tags.view(batch_size, 1))
        last_input_score = last_input_score.squeeze(1)
        
        score = score + last_transition_score + last_input_score * mask[-1]
        
        return score
    
    def decode(self, input_var, mask, usecuda=True, score_only= False):
        
        input_var = self.dropout(input_var)
        features = self.hidden2tag(input_var)
        if score_only:
            score = self.viterbi_decode(features, mask, usecuda=usecuda, score_only=True)
            return score
        score, tag_seq = self.viterbi_decode(features, mask, usecuda=usecuda)
        return score, tag_seq
    
    def forward(self, input_var, tags, mask=None, usecuda=True):
        
        if mask is None:
            mask = Variable(torch.ones(*tags.size()).long())
        
        input_var = self.dropout(input_var)
        features = self.hidden2tag(input_var)
        forward_score = self.crf_forward(features, mask, usecuda=usecuda)
        ground_score = self.score_sentence(features, tags, mask, usecuda=usecuda)
        
        return torch.sum(forward_score-ground_score)

In [8]:
class BiLSTM_CRF(nn.Module):
    
    def __init__(self, word_vocab_size, word_embedding_dim, word_hidden_dim, tag_to_id, 
                 verb_embedding_dim, cap_embedding_dim, verb_input_dim = 2, cap_input_dim=4, 
                 pretrained=None):
        
        super(BiLSTM_CRF, self).__init__()
        
        self.word_vocab_size = word_vocab_size
        self.word_embedding_dim = word_embedding_dim
        self.word_hidden_dim = word_hidden_dim
        
        self.verb_input_dim = verb_input_dim 
        self.verb_embedding_dim = verb_embedding_dim
        
        self.cap_input_dim = cap_input_dim
        self.cap_embedding_dim = cap_embedding_dim
        
        self.tag_to_ix = tag_to_id
        self.tagset_size = len(tag_to_id)
        
        self.initializer = Initializer()
        
        if self.cap_embedding_dim:
            self.cap_embedder = nn.Embedding(self.cap_input_dim, self.cap_embedding_dim)
            self.initializer.init_embedding(self.cap_embedder.weight)
            
        self.verb_embedder = nn.Embedding(self.verb_input_dim, self.verb_embedding_dim)
        self.initializer.init_embedding(self.verb_embedder.weight)
        
        self.word_encoder = WordEncoderRNN(word_vocab_size, word_embedding_dim ,word_hidden_dim, 
                                     verb_embedding_dim, cap_embedding_dim, input_dropout_p=0.5)
        
        if pretrained is not None:
            self.word_encoder.embedding.weight = nn.Parameter(torch.FloatTensor(pretrained))
            
        self.initializer.init_lstm(self.word_encoder.rnn)
        
        self.decoder = DecoderCRF(word_hidden_dim*2, self.tag_to_ix, input_dropout_p=0.5)
        self.initializer.init_linear(self.decoder.hidden2tag)
        
    def forward(self, words, tags, verbs, caps, wordslen, tagsmask, usecuda=True):
        
        batch_size, max_len = words.size()
        
        cap_features = self.cap_embedder(caps) if self.cap_embedding_dim else None
        verb_features = self.verb_embedder(verbs)
        word_features = self.word_encoder(words, verb_features ,cap_features, wordslen)
        score = self.decoder(word_features, tags, tagsmask, usecuda=usecuda)
        
        return score
    
    def decode(self, words, verbs, caps, wordslen, tagsmask, usecuda=True, score_only = False):
        
        batch_size, max_len = words.size()
        
        cap_features = self.cap_embedder(caps) if self.cap_embedding_dim else None
        verb_features = self.verb_embedder(verbs)
        word_features = self.word_encoder(words, verb_features ,cap_features, wordslen)
        if score_only:
            score = self.decoder.decode(word_features, tagsmask, usecuda=usecuda, 
                                        score_only = True)
            return score
        score, tag_seq = self.decoder.decode(word_features, tagsmask, usecuda=usecuda)
        
        return score, tag_seq

In [9]:
srl_data = pkl.load(open('datasets/consrl/srl_data.p','rb'))
srl_train_data = srl_data[:20002]
srl_val_data = srl_data[20002:25004]

train_data, dev_data, mappings = load_consrl(srl_train_data, srl_val_data)

word_to_id = mappings['word_to_id']
tag_to_id = mappings['tag_to_id']
word_embeds = mappings['word_embeds']

Found 8934 unique words (480026 in total)
Found 94 unique named entity tags
Loading pretrained embeddings from wordvectors/glove.6B.100d.txt...
20002 / 5002 sentences in train / dev.
Loaded 400000 pretrained embeddings.


In [10]:
class Evaluator(object):
    def __init__(self, result_path, model_name, mappings, usecuda=True):
        self.result_path = result_path
        self.model_name = model_name
        self.tag_to_id = mappings['tag_to_id']
        self.id_to_tag = mappings['id_to_tag']
        self.usecuda = usecuda

    def evaluate_conll(self, model, dataset, best_F, eval_script='./datasets/conll/conlleval',
                          checkpoint_folder='.', record_confmat = False, batch_size = 40):
        
        prediction = []
        save = False
        new_F = 0.0
        confusion_matrix = torch.zeros((len(self.tag_to_id) - 2, len(self.tag_to_id) - 2))
    
        data_batches = create_batches(dataset, batch_size = batch_size, str_words = True,
                                      tag_padded = False)

        for data in data_batches:

            words = data['words']
            verbs = data['verbs']
            caps = data['caps']
            mask = data['tagsmask']

            if self.usecuda:
                words = Variable(torch.LongTensor(words)).cuda()
                verbs = Variable(torch.LongTensor(verbs)).cuda()
                caps = Variable(torch.LongTensor(caps)).cuda()
                mask = Variable(torch.LongTensor(mask)).cuda()
            else:
                words = Variable(torch.LongTensor(words))
                verbs = Variable(torch.LongTensor(verbs))
                caps = Variable(torch.LongTensor(caps))
                mask = Variable(torch.LongTensor(mask))

            wordslen = data['wordslen']
            str_words = data['str_words']
            
            _, out = model.decode(words, verbs, caps, wordslen, mask, usecuda = self.usecuda)
                                
            ground_truth_id = data['tags']
            predicted_id = out            
            
            for (swords, sground_truth_id, spredicted_id) in zip(str_words, ground_truth_id, predicted_id):
                for (word, true_id, pred_id) in zip(swords, sground_truth_id, spredicted_id):
                    line = ' '.join([word, self.id_to_tag[true_id], self.id_to_tag[pred_id]])
                    prediction.append(line)
                    confusion_matrix[true_id, pred_id] += 1
                prediction.append('')
        
        predf = os.path.join(self.result_path, self.model_name, checkpoint_folder ,'pred.txt')
        scoref = os.path.join(self.result_path, self.model_name, checkpoint_folder ,'score.txt')

        with open(predf, 'w') as f:
            f.write('\n'.join(prediction))

        os.system('%s < %s > %s' % (eval_script, predf, scoref))

        eval_lines = [l.rstrip() for l in codecs.open(scoref, 'r', 'utf8')]

        for i, line in enumerate(eval_lines):
            print(line)
            if i == 1:
                new_F = float(line.strip().split()[-1])
                if new_F > best_F:
                    best_F = new_F
                    save = True
                    print('the best F is ', new_F)
        
        return best_F, new_F, save


In [11]:
class Trainer(object):
    
    def __init__(self, model, optimizer, result_path, model_name, usedataset, mappings, 
                 eval_every=1, usecuda = True):
        self.model = model
        self.optimizer = optimizer
        self.eval_every = eval_every
        self.model_name = os.path.join(result_path, model_name)
        self.usecuda = usecuda
        
        if usedataset=='consrl':
            self.evaluator = Evaluator(result_path, model_name, mappings).evaluate_conll
    
    def adjust_learning_rate(self, optimizer, lr):
        for param_group in optimizer.param_groups:
            param_group['lr'] = lr
            
    def train_model(self, num_epochs, train_data, dev_data, learning_rate, checkpoint_folder='.',
                    eval_test_train=True, plot_every=20, adjust_lr=True, batch_size = 40):

        losses = []
        loss = 0.0
        best_dev_F = -1.0
        best_train_F = -1.0
        all_F=[[0,0]]
        count = 0
        word_count = 0
        
        self.model.train(True)
        for epoch in range(1, num_epochs+1):
            t=time.time()
            
            train_batches = create_batches(train_data, batch_size= batch_size, order='random')
            
            for i, index in enumerate(np.random.permutation(len(train_batches))): 
                
                data = train_batches[index]
                self.model.zero_grad()

                words = data['words']
                tags = data['tags']
                verbs = data['verbs']
                caps = data['caps']
                mask = data['tagsmask']
                
                if self.usecuda:
                    words = Variable(torch.LongTensor(words)).cuda()
                    verbs = Variable(torch.LongTensor(verbs)).cuda()
                    caps = Variable(torch.LongTensor(caps)).cuda()
                    mask = Variable(torch.LongTensor(mask)).cuda()
                    tags = Variable(torch.LongTensor(tags)).cuda()
                else:
                    words = Variable(torch.LongTensor(words))
                    verbs = Variable(torch.LongTensor(verbs))
                    caps = Variable(torch.LongTensor(caps))
                    mask = Variable(torch.LongTensor(mask))
                    tags = Variable(torch.LongTensor(tags))
                
                wordslen = data['wordslen']
                
                score = self.model(words, tags, verbs, caps, wordslen, mask, usecuda=self.usecuda)
                
                loss += score.data[0]/np.sum(data['wordslen'])
                score.backward()
                
                nn.utils.clip_grad_norm(self.model.parameters(), 5.0)
                self.optimizer.step()
                
                count += 1
                word_count += batch_size
                
                if count % plot_every == 0:
                    loss /= plot_every
                    print(word_count, ': ', loss)
                    if losses == []:
                        losses.append(loss)
                    losses.append(loss)
                    loss = 0.0
                                        
            if adjust_lr:
                self.adjust_learning_rate(self.optimizer, 
                                          lr=learning_rate/(1+0.05*word_count/len(train_data)))
            
            if epoch%self.eval_every==0:
                
                self.model.train(False)
                
                if eval_test_train:
                    best_train_F, new_train_F, _ = self.evaluator(self.model, train_data, best_train_F, 
                                                                  checkpoint_folder=checkpoint_folder)
                else:
                    best_train_F, new_train_F, _ = 0, 0, 0
                best_dev_F, new_dev_F, save = self.evaluator(self.model, dev_data, best_dev_F,
                                                             checkpoint_folder=checkpoint_folder)
                if save:
                    torch.save(self.model, os.path.join(self.model_name, checkpoint_folder, 'modelweights'))
                    
                sys.stdout.flush()
                all_F.append([new_train_F, new_dev_F])
                self.model.train(True)

            print('*'*80)
            print('Epoch %d Complete: Time Taken %d' %(epoch ,time.time() - t))

        return losses, all_F

In [12]:
word_vocab_size = len(word_to_id)
word_embedding_dim = 100
word_hidden_dim = 200
verb_embedding_dim = 10
cap_embedding_dim = 0

model = BiLSTM_CRF(word_vocab_size, word_embedding_dim, word_hidden_dim, tag_to_id, 
                   verb_embedding_dim, cap_embedding_dim, pretrained = word_embeds)
    
    
model.cuda()
learning_rate = 0.015
print('Initial learning rate is: %s' %(learning_rate))
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate, momentum=0.9)

result_path = '.'
model_name = '.'
trainer = Trainer(model, optimizer, result_path, model_name, usedataset='consrl', mappings= mappings) 
losses, all_F = trainer.train_model(10, train_data, dev_data, learning_rate = learning_rate)
    
plt.plot(losses)
plt.savefig(os.path.join(result_path, model_name, 'lossplot.png'))


Initial learning rate is: 0.015
800 :  2.595070683346564
1600 :  1.63570770355328
2400 :  1.3989303576428846
3200 :  1.3875672810613253
4000 :  1.251654905696571
4800 :  1.1671755277483062
5600 :  1.2219252985343931
6400 :  1.1341341147716555
7200 :  1.1073128052637227
8000 :  1.1006431717702703
8800 :  0.9382336422992033
9600 :  0.9032225882171238
10400 :  0.834429728740749
11200 :  0.8264320164852046
12000 :  0.8433165445377331
12800 :  0.902060032373508
13600 :  0.7889980508425362
14400 :  0.770750515068126
15200 :  0.7176659991160028
16000 :  0.7347934497757698
16800 :  0.7159937699445951
17600 :  0.7549342765925864
18400 :  0.6711838412314545
19200 :  0.6797682728412835
20000 :  0.670285336786841
processed 480026 tokens with 61118 phrases; found: 20013 phrases; correct: 19061.
accuracy:  63.21%; precision:  95.24%; recall:  31.19%; FB1:  46.99
the best F is  46.99
             ARG0: precision:  53.69%; recall:   8.50%; FB1:  14.68  1451
             ARG1: precision:  44.78%; recal

  "type " + obj.__name__ + ". It won't be checked "
  "type " + obj.__name__ + ". It won't be checked "
  "type " + obj.__name__ + ". It won't be checked "


20800 :  0.6882183358579229
21600 :  0.6158887418151069
22400 :  0.6379858382695806
23200 :  0.6338881753628154
24000 :  0.7221007288618645
24800 :  0.6692334994880385
25600 :  0.6584870191859743
26400 :  0.6149539914450052
27200 :  0.5753546886244243
28000 :  0.6249658306036144
28800 :  0.6202996292924768
29600 :  0.5303097140216994
30400 :  0.5529400836548235
31200 :  0.5670427814188075
32000 :  0.5517603796957309
32800 :  0.5482340737518273
33600 :  0.5484248631469815
34400 :  0.5226972831029485
35200 :  0.5582539177908312
36000 :  0.48694343479980534
36800 :  0.5336687457863802
37600 :  0.5228763094052656
38400 :  0.48615921182083605
39200 :  0.4934374300434259
40000 :  0.521024629667814
processed 480026 tokens with 61118 phrases; found: 36482 phrases; correct: 25254.
accuracy:  63.76%; precision:  69.22%; recall:  41.32%; FB1:  51.75
the best F is  51.75
             ARG0: precision:  60.48%; recall:  32.45%; FB1:  42.23  4916
             ARG1: precision:  47.20%; recall:  13.57%

processed 148220 tokens with 13517 phrases; found: 7486 phrases; correct: 5971.
accuracy:  76.01%; precision:  79.76%; recall:  44.17%; FB1:  56.86
the best F is  56.86
             ARG0: precision:  62.16%; recall:  32.24%; FB1:  42.45  946
             ARG1: precision:  50.69%; recall:  22.67%; FB1:  31.33  1440
             ARG2: precision:  46.21%; recall:  21.44%; FB1:  29.29  554
             ARG3: precision:   0.00%; recall:   0.00%; FB1:   0.00  0
             ARG4: precision:   0.00%; recall:   0.00%; FB1:   0.00  0
         ARGM-ADJ: precision:   0.00%; recall:   0.00%; FB1:   0.00  0
         ARGM-ADV: precision:   0.00%; recall:   0.00%; FB1:   0.00  1
         ARGM-CAU: precision:   0.00%; recall:   0.00%; FB1:   0.00  0
         ARGM-COM: precision:   0.00%; recall:   0.00%; FB1:   0.00  0
         ARGM-DIR: precision:   0.00%; recall:   0.00%; FB1:   0.00  0
         ARGM-DIS: precision:  53.19%; recall:  10.25%; FB1:  17.18  47
         ARGM-EXT: precision:   0.00%; rec

********************************************************************************
Epoch 4 Complete: Time Taken 85
80800 :  0.3528975792966639
81600 :  0.3856716068967173
82400 :  0.3933853882412018
83200 :  0.39835582128297925
84000 :  0.355513030741163
84800 :  0.3599182099110152
85600 :  0.3777699519713442
86400 :  0.3684710382394309
87200 :  0.3471861247732421
88000 :  0.3637399840776773
88800 :  0.38217739926071015
89600 :  0.3509991239391935
90400 :  0.35326478407091166
91200 :  0.3747901553626892
92000 :  0.354811235187548
92800 :  0.3481711126779035
93600 :  0.33357686912956364
94400 :  0.3803305651854777
95200 :  0.3333186480061536
96000 :  0.36435366886194603
96800 :  0.3310993157404927
97600 :  0.3462115686351135
98400 :  0.36192322351908957
99200 :  0.32790351002173757
100000 :  0.3221779656257461
processed 480026 tokens with 61118 phrases; found: 43435 phrases; correct: 32930.
accuracy:  73.69%; precision:  75.81%; recall:  53.88%; FB1:  62.99
the best F is  62.99
          

processed 148220 tokens with 13517 phrases; found: 10204 phrases; correct: 7346.
accuracy:  77.17%; precision:  71.99%; recall:  54.35%; FB1:  61.94
the best F is  61.94
             ARG0: precision:  72.10%; recall:  50.71%; FB1:  59.54  1283
             ARG1: precision:  55.39%; recall:  35.40%; FB1:  43.20  2058
             ARG2: precision:  28.73%; recall:  36.43%; FB1:  32.13  1514
             ARG3: precision:   0.00%; recall:   0.00%; FB1:   0.00  0
             ARG4: precision:  66.67%; recall:   8.89%; FB1:  15.69  6
         ARGM-ADJ: precision:   0.00%; recall:   0.00%; FB1:   0.00  0
         ARGM-ADV: precision:  26.14%; recall:   8.79%; FB1:  13.16  153
         ARGM-CAU: precision:   0.00%; recall:   0.00%; FB1:   0.00  1
         ARGM-COM: precision:   0.00%; recall:   0.00%; FB1:   0.00  0
         ARGM-DIR: precision:  20.00%; recall:   2.90%; FB1:   5.06  10
         ARGM-DIS: precision:  54.81%; recall:  23.36%; FB1:  32.76  104
         ARGM-EXT: precision:   0.0

KeyboardInterrupt: 

In [1]:
import pickle as pkl
import os

import torch
torch.manual_seed(0)
import torch.nn as nn
from torch.nn import init
from torch.autograd import Variable
from neural_srl.util.utils import *
import codecs
import pickle as pkl
import itertools

In [2]:
word_dim = 100
pretrained = 'wordvectors/glove.6B.100d.txt'
dataset = 'datasets/conll05srl/'

srl_train_data = pkl.load(open(os.path.join(dataset,'train_data.pkl'),'rb'),errors='ignore')
srl_val_data = pkl.load(open(os.path.join(dataset,'dev_data.pkl'),'rb'),errors='ignore')
srl_test_data = pkl.load(open(os.path.join(dataset,'test_data.pkl'),'rb'),errors='ignore')

for i in range(len(srl_train_data)):
    srl_train_data[i][2][-1] = srl_train_data[i][2][-1].strip()
for i in range(len(srl_val_data)):
    srl_val_data[i][2][-1] = srl_val_data[i][2][-1].strip()
for i in range(len(srl_test_data)):
    srl_test_data[i][2][-1] = srl_test_data[i][2][-1].strip()

dico_words_train, _, _ = word_mapping(srl_train_data)
dico_tags, tag_to_id, id_to_tag = tag_mapping(srl_train_data+srl_val_data+srl_test_data)

dico_words, word_to_id, id_to_word = augment_with_pretrained(
                                     dico_words_train.copy(), pretrained,
                                     list(itertools.chain.from_iterable(
                                     [[str(x).lower() for x in s[0]] for s in srl_val_data+srl_test_data])))

train_data = prepare_dataset(srl_train_data, word_to_id, tag_to_id)
dev_data = prepare_dataset(srl_val_data, word_to_id, tag_to_id)
test_data = prepare_dataset(srl_test_data, word_to_id, tag_to_id)

print("%i / %i / %i sentences in train / dev / test." % (len(train_data), len(dev_data), len(test_data)))

mapping_file = os.path.join(dataset,'mapping.pkl')

if not os.path.isfile(mapping_file):
    all_word_embeds = {}
    for i, line in enumerate(codecs.open(pretrained, 'r', 'utf-8')):
        s = line.strip().split()
        if len(s) == word_dim + 1:
            all_word_embeds[s[0]] = np.array([float(i) for i in s[1:]])

    word_embeds = np.random.uniform(-np.sqrt(0.06), np.sqrt(0.06), (len(word_to_id), word_dim))

    for w in word_to_id:
        if w in all_word_embeds:
            word_embeds[word_to_id[w]] = all_word_embeds[w]
        elif w.lower() in all_word_embeds:
            word_embeds[word_to_id[w]] = all_word_embeds[w.lower()]

    print('Loaded %i pretrained embeddings.' % len(all_word_embeds))

Found 36278 unique words (5905439 in total)
Found 129 unique named entity tags
Loading pretrained embeddings from wordvectors/glove.6B.100d.txt...
225114 / 31577 / 23045 sentences in train / dev / test.


In [3]:
train_data[0]

{'caps': [2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 0],
 'str_words': ['We',
  'respectfully',
  'invite',
  'you',
  'to',
  'watch',
  'a',
  'special',
  'edition',
  'of',
  'Across',
  'China',
  '.'],
 'tags': [6, 22, 3, 5, 9, 2, 2, 2, 2, 2, 2, 2, 0],
 'verbs': [0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
 'words': [36, 12490, 4367, 16, 5, 1401, 8, 467, 3791, 7, 998, 132, 4]}

In [5]:
id_to_tag[6]

'B-ARG0'

In [6]:
srl_train_data[0]

(['We',
  'respectfully',
  'invite',
  'you',
  'to',
  'watch',
  'a',
  'special',
  'edition',
  'of',
  'Across',
  'China',
  '.'],
 ['0', '0', '1', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0'],
 ['B-ARG0',
  'B-ARGM-MNR',
  'B-V',
  'B-ARG1',
  'B-ARG2',
  'I-ARG2',
  'I-ARG2',
  'I-ARG2',
  'I-ARG2',
  'I-ARG2',
  'I-ARG2',
  'I-ARG2',
  'O'])

In [12]:
from __future__ import print_function
from collections import OrderedDict
import os
import neural_srl
from neural_srl.util import Loader
from neural_srl.models import BiLSTM_CRF
from neural_srl.util.utils import *
import matplotlib.pyplot as plt
from torch.autograd import Variable
import torch
import random
random.seed(0)

In [61]:
import torch
import codecs

In [3]:
model = torch.load('neural_srl/results/conll05srl/BiLSTM_CRF/modelweights')

In [62]:
def convert_bio_tags_to_conll_format(labels):
    """
    Converts BIO formatted SRL tags to the format required for evaluation with the
    official CONLL 2005 perl script. Spans are represented by bracketed labels,
    with the labels of words inside spans being the same as those outside spans.
    Beginning spans always have a opening bracket and a closing asterisk (e.g. "(ARG-1*" )
    and closing spans always have a closing bracket (e.g. "*)" ). This applies even for
    length 1 spans, (e.g "(ARG-0*)").
    A full example of the conversion performed:
    [B-ARG-1, I-ARG-1, I-ARG-1, I-ARG-1, I-ARG-1, O]
    [ "(ARG-1*", "*", "*", "*", "*)", "*"]
    Parameters
    ----------
    labels : List[str], required.
        A list of BIO tags to convert to the CONLL span based format.
    Returns
    -------
    A list of labels in the CONLL span based format.
    """
    sentence_length = len(labels)
    conll_labels = []
    for i, label in enumerate(labels):
        if label == "O":
            conll_labels.append("*")
            continue
        new_label = "*"
        # Are we at the beginning of a new span, at the first word in the sentence,
        # or is the label different from the previous one? If so, we are seeing a new label.
        if label[0] == "B" or i == 0 or label[1:] != labels[i - 1][1:]:
            new_label = "(" + label[2:] + new_label
        # Are we at the end of the sentence, is the next word a new span, or is the next
        # word not in a span? If so, we need to close the label span.
        if i == sentence_length - 1 or labels[i + 1][0] == "B" or label[1:] != labels[i + 1][1:]:
            new_label = new_label + ")"
        conll_labels.append(new_label)
    return conll_labels

In [63]:
def write_to_conll_eval_file(prediction_file,
                             gold_file,
                             verb_index,
                             sentence,
                             prediction,
                             gold_labels):
    """
    Prints predicate argument predictions and gold labels for a single verbal
    predicate in a sentence to two provided file references.
    Parameters
    ----------
    prediction_file : TextIO, required.
        A file reference to print predictions to.
    gold_file : TextIO, required.
        A file reference to print gold labels to.
    verb_index : Optional[int], required.
        The index of the verbal predicate in the sentence which
        the gold labels are the arguments for, or None if the sentence
        contains no verbal predicate.
    sentence : List[str], required.
        The word tokens.
    prediction : List[str], required.
        The predicted BIO labels.
    gold_labels : List[str], required.
        The gold BIO labels.
    """
    verb_only_sentence = ["-"] * len(sentence)
    if verb_index is not None:
        verb_only_sentence[verb_index] = sentence[verb_index]

    conll_format_predictions = convert_bio_tags_to_conll_format(prediction)
    conll_format_gold_labels = convert_bio_tags_to_conll_format(gold_labels)

    for word, predicted, gold in zip(verb_only_sentence,
                                     conll_format_predictions,
                                     conll_format_gold_labels):
        prediction_file.write(word.ljust(15))
        prediction_file.write(predicted.rjust(15) + "\n")
        gold_file.write(word.ljust(15))
        gold_file.write(gold.rjust(15) + "\n")
    prediction_file.write("\n")
    gold_file.write("\n")

In [128]:
class Evaluator(object):
    def __init__(self, result_path, model_name, mappings, usecuda=True):
        self.result_path = result_path
        self.model_name = model_name
        self.tag_to_id = mappings['tag_to_id']
        self.id_to_tag = mappings['id_to_tag']
        self.usecuda = usecuda

    def evaluate_ner(self, model, dataset, best_F, eval_script='./datasets/conll/conlleval',
                          checkpoint_folder='.', record_confmat = False, batch_size = 80):
        
        model.eval()
        
        prediction = []
        save = False
        new_F = 0.0
        confusion_matrix = torch.zeros((len(self.tag_to_id) - 2, len(self.tag_to_id) - 2))
    
        data_batches = create_batches(dataset, batch_size = batch_size, str_words = True,
                                      tag_padded = False)

        for data in data_batches:

            words = data['words']
            verbs = data['verbs']
            caps = data['caps']
            mask = data['tagsmask']

            if self.usecuda:
                words = Variable(torch.LongTensor(words)).cuda()
                verbs = Variable(torch.LongTensor(verbs)).cuda()
                caps = Variable(torch.LongTensor(caps)).cuda()
                mask = Variable(torch.LongTensor(mask)).cuda()
            else:
                words = Variable(torch.LongTensor(words))
                verbs = Variable(torch.LongTensor(verbs))
                caps = Variable(torch.LongTensor(caps))
                mask = Variable(torch.LongTensor(mask))

            wordslen = data['wordslen']
            str_words = data['str_words']
            
            _, out = model.decode(words, verbs, caps, wordslen, mask, usecuda = self.usecuda)
                                
            ground_truth_id = data['tags']
            predicted_id = out            
            
            for (swords, sground_truth_id, spredicted_id) in zip(str_words, ground_truth_id, predicted_id):
                for (word, true_id, pred_id) in zip(swords, sground_truth_id, spredicted_id):
                    if self.id_to_tag[true_id]!='B-V':
                        line = ' '.join([word, self.id_to_tag[true_id], self.id_to_tag[pred_id]])
                        prediction.append(line)
                        confusion_matrix[true_id, pred_id] += 1
                prediction.append('')
        
        predf = os.path.join(self.result_path, self.model_name, checkpoint_folder ,'pred.txt')
        scoref = os.path.join(self.result_path, self.model_name, checkpoint_folder ,'score.txt')

        with open(predf, 'w+') as f:
            f.write('\n'.join(prediction))

        os.system('%s < %s > %s' % (eval_script, predf, scoref))

        eval_lines = [l.rstrip() for l in codecs.open(scoref, 'r', 'utf8')]

        for i, line in enumerate(eval_lines):
            print(line)
            if i == 1:
                new_F = float(line.strip().split()[-1])
                if new_F > best_F:
                    best_F = new_F
                    save = True
                    print('the best F is ', new_F)
        
        return best_F, new_F, save
    
    def evaluate_srl(self, model, dataset, best_F, 
                        eval_script='/home/ubuntu/PGMProject/datasets/srlconll-1.1/bin/srl-eval.pl',
                        checkpoint_folder='.', record_confmat = False, batch_size = 80):
        
        v1= 'PERL5LIB="$HOME/PGMProject/datasets/srlconll-1.1/lib:$PERL5LIB"'
        v2= 'PATH="$HOME/PGMProject/datasets/srlconll-1.1/bin:$PATH"'

        model.eval()
        
        prediction = []
        save = False
        new_F = 0.0
        confusion_matrix = torch.zeros((len(self.tag_to_id) - 2, len(self.tag_to_id) - 2))
    
        data_batches = create_batches(dataset, batch_size = batch_size, str_words = True,
                                      tag_padded = False)
        
        predf = os.path.join(self.result_path, self.model_name, 
                             checkpoint_folder ,'pred1.txt')
        goldf = os.path.join(self.result_path, self.model_name, 
                             checkpoint_folder ,'gold1.txt')
        scoref = os.path.join(self.result_path, self.model_name, 
                             checkpoint_folder ,'score1.txt')
        
        predfile = open(predf,'w+')
        goldfile = open(goldf,'w+')

        for data in data_batches:

            words = data['words']
            verbs = data['verbs']
            caps = data['caps']
            mask = data['tagsmask']

            if self.usecuda:
                words = Variable(torch.LongTensor(words)).cuda()
                verbs = Variable(torch.LongTensor(verbs)).cuda()
                caps = Variable(torch.LongTensor(caps)).cuda()
                mask = Variable(torch.LongTensor(mask)).cuda()
            else:
                words = Variable(torch.LongTensor(words))
                verbs = Variable(torch.LongTensor(verbs))
                caps = Variable(torch.LongTensor(caps))
                mask = Variable(torch.LongTensor(mask))

            wordslen = data['wordslen']
            str_words = data['str_words']
            
            _, out = model.decode(words, verbs, caps, wordslen, mask, usecuda = self.usecuda)
                                
            ground_truth_id = data['tags']
            predicted_id = out            
            
            for (swords, sground_truth_id, spredicted_id, sverb) in \
                zip(str_words, ground_truth_id, predicted_id, data['verbs']):
                tspredicted_id = [self.id_to_tag[idxt] for idxt in spredicted_id]
                tsground_truth_id = [self.id_to_tag[idxt] for idxt in sground_truth_id]
                try:
                    verb_index = list(sverb).index(1)
                except (KeyboardInterrupt, SystemExit):
                    raise
                except:
                    verb_index = None
                write_to_conll_eval_file(predfile,
                                         goldfile,
                                         verb_index,
                                         swords,
                                         tspredicted_id,
                                         tsground_truth_id)

        predfile.close()
        goldfile.close()

        out = os.system('%s %s %s %s %s > %s' % (v1, v2, eval_script, goldf, predf, scoref))
        
        eval_lines = [l.rstrip() for l in codecs.open(scoref, 'r', 'utf8')]

        for i, line in enumerate(eval_lines):
            print(line)
            if 'Overall' in line:
                new_F = float(line.strip().split()[-1])
                if new_F > best_F:
                    best_F = new_F
                    save = True
                    print('the best F is ', new_F)
        
        return best_F, new_F, save

In [5]:
parameters = OrderedDict()
parameters['wrdim'] = 100
parameters['ptrnd'] = 'wordvectors/glove.6B.100d.txt'

In [6]:
loader = Loader()
train_data, dev_data, test_data, mappings = loader.load_conll05srl('datasets/conll05srl/', parameters)

Found 36278 unique words (5905439 in total)
Found 129 unique named entity tags
Loading pretrained embeddings from wordvectors/glove.6B.100d.txt...
225114 / 31577 / 23045 sentences in train / dev / test.


In [7]:
result_path = 'neural_srl/results/conll05srl'
model_name = 'BiLSTM_CRF'

In [129]:
evaluator = Evaluator(result_path, model_name, mappings)

In [35]:
evaluator.evaluate_ner(model,test_data,0.0)

  output, _ = self.rnn(embedded)


processed 590872 tokens with 60728 phrases; found: 60245 phrases; correct: 48039.
accuracy:  88.09%; precision:  79.74%; recall:  79.11%; FB1:  79.42
the best F is  79.42
             ARG0: precision:  87.55%; recall:  88.37%; FB1:  87.95  13153
             ARG1: precision:  81.15%; recall:  82.54%; FB1:  81.84  20943
             ARG2: precision:  75.49%; recall:  73.72%; FB1:  74.59  6768
             ARG3: precision:  66.33%; recall:  36.26%; FB1:  46.89  199
             ARG4: precision:  70.94%; recall:  67.30%; FB1:  69.07  351
             ARG5: precision:  66.67%; recall:  22.22%; FB1:  33.33  3
             ARGA: precision:   0.00%; recall:   0.00%; FB1:   0.00  0
         ARGM-ADJ: precision:  34.31%; recall:  34.81%; FB1:  34.56  137
         ARGM-ADV: precision:  69.29%; recall:  55.48%; FB1:  61.62  1957
         ARGM-CAU: precision:  70.77%; recall:  64.32%; FB1:  67.39  349
         ARGM-COM: precision:  35.56%; recall:  55.17%; FB1:  43.24  45
         ARGM-DIR: precis

(79.42, 79.42, True)

In [130]:
evaluator.evaluate_srl(model,test_data,0.0)

  output, _ = self.rnn(embedded)


Number of Sentences    :       23045
Number of Propositions :       23045
Percentage of perfect props :  57.56

              corr.  excess  missed    prec.    rec.      F1
------------------------------------------------------------
   Overall    47828   12263   12621    79.59   79.12   79.36
the best F is  79.36
----------
      ARG0    11493    1661    1539    87.37   88.19   87.78
      ARG1    16894    4082    3697    80.54   82.05   81.29
      ARG2     5091    1679    1839    75.20   73.46   74.32
      ARG3      132      67     232    66.33   36.26   46.89
      ARG4      249     102     121    70.94   67.30   69.07
      ARG5        2       1       7    66.67   22.22   33.33
      ARGA        0       0       2     0.00    0.00    0.00
  ARGM-ADJ       47      90      88    34.31   34.81   34.56
  ARGM-ADV     1355     602    1089    69.24   55.44   61.58
  ARGM-CAU      247     102     137    70.77   64.32   67.39
  ARGM-COM       16      29      13    35.56   55.17   43.24
  

(79.36, 79.36, True)