In [21]:
from typing import Dict, Tuple, List
import logging

from overrides import overrides
from conllu import parse_incr

from allennlp.common.file_utils import cached_path
from allennlp.data.dataset_readers.dataset_reader import DatasetReader
from allennlp.data.fields import Field, TextField, SequenceLabelField, MetadataField
from allennlp.data.instance import Instance
from allennlp.data.token_indexers import SingleIdTokenIndexer, TokenIndexer
from allennlp.data.tokenizers import Token, Tokenizer

In [13]:
words = ['The', 'team', 'who', 'work', 'there', 'are', 'helpfull', ',', 'friendly', 'and', 'extremely', 'knowledgeable', 'and', 'will', 'help', 'you', 'as', 'much', 'as', 'they', 'can', 'with', 'thier', 'years', 'of', 'hands', 'on', 'practice', '.']
tags = ['det', 'nsubj', 'nsubj', 'acl:relcl', 'advmod', 'cop', 'root', 'punct', 'conj', 'cc', 'advmod', 'conj', 'cc', 'aux', 'conj', 'obj', 'advmod', 'advmod', 'mark', 'nsubj', 'advcl', 'case', 'nmod:poss', 'obl', 'case', 'compound', 'compound', 'nmod', 'punct']
heads = [2, 7, 4, 2, 4, 7, 0, 9, 7, 12, 12, 7, 15, 15, 7, 15, 18, 15, 21, 21, 17, 24, 24, 15, 28, 28, 26, 24, 7]
deps = [[('det', 2)], [('nsubj', 4), ('nsubj', 7), ('nsubj', 9), ('nsubj', 12), ('nsubj', 15)], [('ref', 2)], [('acl:relcl', 2)], [('advmod', 4)], [('cop', 7)], [('root', 0)], [('punct', 9)], [('conj:and', 7)], [('cc', 12)], [('advmod', 12)], [('conj:and', 7)], [('cc', 15)], [('aux', 15)], [('conj:and', 7)], [('obj', 15)], [('advmod', 18)], [('advmod', 15)], [('mark', 21)], [('nsubj', 21)], [('advcl:as', 17)], [('case', 24)], [('nmod:poss', 24)], [('obl:with', 15)], [('case', 28)], [('compound', 28)], [('compound', 26)], [('nmod:of', 24)], [('punct', 7)]]

In [14]:
orig = list(zip(tags, heads))
#[('det', 2), ('nsubj', 7), ('nsubj', 4), ('acl:relcl', 2), ('advmod', 4), ('cop', 7), ('root', 0), ('punct', 9), ('conj', 7), ('cc', 12), ('advmod', 12), ('conj', 7), ('cc', 15), ('aux', 15), ('conj', 7), ('obj', 15), ('advmod', 18), ('advmod', 15), ('mark', 21), ('nsubj', 21), ('advcl', 17), ('case', 24), ('nmod:poss', 24), ('obl', 15), ('case', 28), ('compound', 28), ('compound', 26), ('nmod', 24), ('punct', 7)]

enhanced = deps
#[[('det', 2)], [('nsubj', 4), ('nsubj', 7), ('nsubj', 9), ('nsubj', 12), ('nsubj', 15)], [('ref', 2)], [('acl:relcl', 2)], [('advmod', 4)], [('cop', 7)], [('root', 0)], [('punct', 9)], [('conj:and', 7)], [('cc', 12)], [('advmod', 12)], [('conj:and', 7)], [('cc', 15)], [('aux', 15)], [('conj:and', 7)], [('obj', 15)], [('advmod', 18)], [('advmod', 15)], [('mark', 21)], [('nsubj', 21)], [('advcl:as', 17)], [('case', 24)], [('nmod:poss', 24)], [('obl:with', 15)], [('case', 28)], [('compound', 28)], [('compound', 26)], [('nmod:of', 24)], [('punct', 7)]]

In [15]:
enhanced

[[('det', 2)],
 [('nsubj', 4), ('nsubj', 7), ('nsubj', 9), ('nsubj', 12), ('nsubj', 15)],
 [('ref', 2)],
 [('acl:relcl', 2)],
 [('advmod', 4)],
 [('cop', 7)],
 [('root', 0)],
 [('punct', 9)],
 [('conj:and', 7)],
 [('cc', 12)],
 [('advmod', 12)],
 [('conj:and', 7)],
 [('cc', 15)],
 [('aux', 15)],
 [('conj:and', 7)],
 [('obj', 15)],
 [('advmod', 18)],
 [('advmod', 15)],
 [('mark', 21)],
 [('nsubj', 21)],
 [('advcl:as', 17)],
 [('case', 24)],
 [('nmod:poss', 24)],
 [('obl:with', 15)],
 [('case', 28)],
 [('compound', 28)],
 [('compound', 26)],
 [('nmod:of', 24)],
 [('punct', 7)]]

In [75]:
heads = []
rels = []

for target_output in deps:
    # check if there is just 1 head
    if len(target_output) == 1:
        head = [x[1] for x in target_output]
        rel = [x[0] for x in target_output]
        heads.append(head)
        rels.append(rel)
    # more than 1 head
    else:
        # append all current target heads/rels to a list
        current_heads = []
        current_rels = []
        for rel_head_tup in target_output:
            current_heads.append(rel_head_tup[1])
            current_rels.append(rel_head_tup[0])
        heads.append(current_heads)
        rels.append(current_rels)

In [76]:
for h, r in list(zip(heads, rels)):
    print(h, "==>", r)

[2] ==> ['det']
[4, 7, 9, 12, 15] ==> ['nsubj', 'nsubj', 'nsubj', 'nsubj', 'nsubj']
[2] ==> ['ref']
[2] ==> ['acl:relcl']
[4] ==> ['advmod']
[7] ==> ['cop']
[0] ==> ['root']
[9] ==> ['punct']
[7] ==> ['conj:and']
[12] ==> ['cc']
[12] ==> ['advmod']
[7] ==> ['conj:and']
[15] ==> ['cc']
[15] ==> ['aux']
[7] ==> ['conj:and']
[15] ==> ['obj']
[18] ==> ['advmod']
[15] ==> ['advmod']
[21] ==> ['mark']
[21] ==> ['nsubj']
[17] ==> ['advcl:as']
[24] ==> ['case']
[24] ==> ['nmod:poss']
[15] ==> ['obl:with']
[28] ==> ['case']
[28] ==> ['compound']
[26] ==> ['compound']
[24] ==> ['nmod:of']
[7] ==> ['punct']


In [81]:
targs = list(zip(heads, rels))
targs

[([2], ['det']),
 ([4, 7, 9, 12, 15], ['nsubj', 'nsubj', 'nsubj', 'nsubj', 'nsubj']),
 ([2], ['ref']),
 ([2], ['acl:relcl']),
 ([4], ['advmod']),
 ([7], ['cop']),
 ([0], ['root']),
 ([9], ['punct']),
 ([7], ['conj:and']),
 ([12], ['cc']),
 ([12], ['advmod']),
 ([7], ['conj:and']),
 ([15], ['cc']),
 ([15], ['aux']),
 ([7], ['conj:and']),
 ([15], ['obj']),
 ([18], ['advmod']),
 ([15], ['advmod']),
 ([21], ['mark']),
 ([21], ['nsubj']),
 ([17], ['advcl:as']),
 ([24], ['case']),
 ([24], ['nmod:poss']),
 ([15], ['obl:with']),
 ([28], ['case']),
 ([28], ['compound']),
 ([26], ['compound']),
 ([24], ['nmod:of']),
 ([7], ['punct'])]

In [85]:
for i, w in enumerate(words):
    print(w, targs[i])

The ([2], ['det'])
team ([4, 7, 9, 12, 15], ['nsubj', 'nsubj', 'nsubj', 'nsubj', 'nsubj'])
who ([2], ['ref'])
work ([2], ['acl:relcl'])
there ([4], ['advmod'])
are ([7], ['cop'])
helpfull ([0], ['root'])
, ([9], ['punct'])
friendly ([7], ['conj:and'])
and ([12], ['cc'])
extremely ([12], ['advmod'])
knowledgeable ([7], ['conj:and'])
and ([15], ['cc'])
will ([15], ['aux'])
help ([7], ['conj:and'])
you ([15], ['obj'])
as ([18], ['advmod'])
much ([15], ['advmod'])
as ([21], ['mark'])
they ([21], ['nsubj'])
can ([17], ['advcl:as'])
with ([24], ['case'])
thier ([24], ['nmod:poss'])
years ([15], ['obl:with'])
of ([28], ['case'])
hands ([28], ['compound'])
on ([26], ['compound'])
practice ([24], ['nmod:of'])
. ([7], ['punct'])


In [69]:
#rels

In [18]:
List[List]

typing.List[typing.List]

In [22]:
fields: Dict[str, Field] = {}
fields

{}