In [1]:
from dfiner.utils import get_default_config
from dfiner.ontonote.ontonotes_data import read_figer
from dfiner.ontonote.mention_classifier import MentionClassifier
from dfiner.ontonote.ontonote_annotator import OntonoteTypeAnnotator
from dfiner.annotators.mention_lex_entail import MentionEntailmentAnnotator
from dfiner.annotators.nsd_annotator import NounSenseAnnotator, AverageEmbeddingNSD

from dfiner.kb_bias.kb_bias_annotator import KBBiasTypeAnnotator
from dfiner.eval.utils import to_column_format
import spacy
import numpy as np
import codecs
import os

nlp = spacy.load('en')
config = get_default_config()
kbann = KBBiasTypeAnnotator(config, "OntonoteType")
config["kba"] = kbann
mc = OntonoteTypeAnnotator(config)
noun_sense = NounSenseAnnotator(config)

nsd = None
nsd_cache_path = config["nsd_cache_path"]
embeddings_path = config["embeddings_path"]
synset_offset_pos_embeddings_path = config["synset_offset_pos_embeddings_path"]
if os.path.isfile(nsd_cache_path):
    try:
        nsd = AverageEmbeddingNSD.load_instance_from_pickle(nsd_cache_path)
    except:
        print("Encountered error while loading pickle from " + nsd_cache_path)
nsd = nsd if nsd else AverageEmbeddingNSD(embeddings_path, synset_offset_pos_embeddings_path)
nsd = NounSenseAnnotator(nsd)

from dfiner.annotators import get_non_default_annotator
non_default_annotators = get_non_default_annotator(nlp, config, ngram_length=3)

mention_lex = MentionEntailmentAnnotator(config)
mention_lex.mention_view ="OntonoteType"

In [170]:
figer_docs = read_figer(nlp, config["figer_path"])
# figer_docs = read_figer(nlp, "/home/haowu4/codes/dataless_finer/eval_figer/data/figer_original_gold_test.label")

for doc in figer_docs:
    nlp.tagger(doc)
    nlp.parser(doc)
    mc(doc)
    kbann(doc)
    nsd(doc)
#     non_default_annotators(doc)
    mention_lex(doc)
    

In [171]:
# %debug

In [172]:
with codecs.open("/tmp/figer_output.label", "w", "utf-8") as out:
    for doc in figer_docs:
        s = to_column_format(doc, [("OntonoteType", False), ("KBBiasType", False)])
#         s = to_column_format(doc, [("OntonoteType", False)])        
        out.write(s)
        out.write("\n")

In [16]:
from dfiner.utils.utils import best_k_label

def print_constituent(doc, vn):
    for c in doc.user_data[vn].constituents:
        print doc[c.start: c.end].text,
        print "  --  ["
        
        if c.label2score:
            b = c.best_label_name
            for l in c.label2score:
                if b == l:
                    print "BEST\t", l, ", ", "%.2f" % c.label2score[l]
                else:
                    print "    \t", l, ", ", "%.2f" % c.label2score[l]
        print "]"
        
#         if c.outgoing_relations:
#             for rel in c.outgoing_relations:
#                 print rel.relation_name + ": ",
#                 t = rel.target
#                 print doc[t.start: t.end].text,
#                 print ", ",
#         print "]"
        
    

In [17]:
figer_docs[:10]
doc = figer_docs[1]
i=0

In [169]:
# 29
print i
doc = figer_docs[i]
print doc
print ""
print_constituent(doc, "MentionEntail")
print "\n" + "="*60
i+=1

150
“ Everybody knows me in this area , ” he said . 




In [8]:
from dfiner.annotators.fine_type_annotator import SynsetFineTyper
typer=SynsetFineTyper(config)


In [9]:
typer.get_fine_types()

TypeError: get_fine_types() takes exactly 2 arguments (1 given)

In [174]:
from nltk.corpus import wordnet as wn


In [None]:
ss = ["%d_n" % x.offset() for x in wn.synsets("dam") if x.pos() == "n"]

In [None]:
typer.get_fine_types(ss[1])

In [145]:
kbann.surface_totype_dist["Washington State"]

{u'government.government': 0.0016155088852988692,
 u'government_agency': 0.0032310177705977385,
 u'location.province': 0.4862681744749596,
 u'organization.company': 0.17609046849757673,
 u'organization.educational_institution': 0.1777059773828756,
 u'organization.sports_team': 0.15508885298869143}

In [None]:
# %autoreload 2
# from dfiner.annotators.mention_lex_entail import MentionEntailmentAnnotator

# mention_lex = MentionEntailmentAnnotator(config)


In [176]:
wn.synsets("shooter")

[Synset('taw.n.02'),
 Synset('shot.n.05'),
 Synset('shooter.n.03'),
 Synset('shooter.n.04'),
 Synset('gunman.n.01')]