# Testing e2e-coref, current SOTA for Coreference Resolution

In [1]:
# Necessary imports
# Used Python 3.6
# Can work on mac. If using mojave, please refer to https://stackoverflow.com/questions/52509602/cant-compile-c-program-on-a-mac-after-upgrade-to-mojave.
# Also in the requirements.txt change the first line to tensorflow==1.13.1
# assumes that the e2e-coref folder is in the project directory
import sys
sys.path.append("..") #to add the root project directory to the python modules path, so that subdirectories of it can be imported

from src.preparation.data_loading import read_dossier, read_news_article



In [2]:
%%javascript
document.title='sota_e2e_coref_testing_guru - Jupyter Lab'

<IPython.core.display.Javascript object>

In [3]:
#this library's stuff

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

from six.moves import input
import tensorflow as tf
import coref_model as cm
import util

import nltk
nltk.download("punkt")
from nltk.tokenize import sent_tokenize, word_tokenize

def create_example(text):
  raw_sentences = sent_tokenize(text)
  sentences = [word_tokenize(s) for s in raw_sentences]
  speakers = [["" for _ in sentence] for sentence in sentences]
  return {
    "doc_key": "nw",
    "clusters": [],
    "sentences": sentences,
    "speakers": speakers,
  }

def print_predictions(example):
  words = util.flatten(example["sentences"])
  for cluster in example["predicted_clusters"]:
    print(u"Predicted cluster: {}".format([" ".join(words[m[0]:m[1]+1]) for m in cluster]))

def make_predictions(text, model):
  example = create_example(text)
  tensorized_example = model.tensorize_example(example, is_training=False)
  feed_dict = {i:t for i,t in zip(model.input_tensors, tensorized_example)}
  _, _, _, mention_starts, mention_ends, antecedents, antecedent_scores, head_scores = session.run(model.predictions + [model.head_scores], feed_dict=feed_dict)

  predicted_antecedents = model.get_predicted_antecedents(antecedents, antecedent_scores)

  example["predicted_clusters"], _ = model.get_predicted_clusters(mention_starts, mention_ends, predicted_antecedents)
  example["top_spans"] = zip((int(i) for i in mention_starts), (int(i) for i in mention_ends))
  example["head_scores"] = head_scores.tolist()
  return example


config = util.initialize_from_env()
model = cm.CorefModel(config)
with tf.Session() as session:
    model.restore(session)
#     while True:
#       text = input("Document text: ")
#       if len(text) > 0:
#         print_predictions(make_predictions(text, model))


W0817 21:18:17.308133 140734788502976 __init__.py:56] Some hub symbols are not available because TensorFlow version is less than 1.14



For more information, please see:
  * https://github.com/tensorflow/community/blob/master/rfcs/20180907-contrib-sunset.md
  * https://github.com/tensorflow/addons
If you depend on functionality not listed there, please file an issue.



[nltk_data] Downloading package punkt to
[nltk_data]     /Users/GuruSenthil/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


Setting CUDA_VISIBLE_DEVICES to: 
Running experiment: -f


ConfigMissingException: 'No configuration setting found for key -f'

In [None]:
text = 'This is Janice. She is a cat.'

print_predictions(make_predictions(text, model))

## Evaluation

* Started to test the python code that they used for the paper in Ubuntu - it is very memory intensive - at least 16 GB RAM is recommended
* Can compile on mac, but you have to install tensorflow 1.13.1. Might have to set some environment variables to get it to compile also due to Mojave.
* You also have to download 1 extra file at ‘https://lil.cs.washington.edu/coref/char_vocab.english.txt’
* Takes a couple of minutes to load the pre-trained model, after that it’s not too slow to run.
* Seems to predict too-large entities for each cluster, ex. Predicts 'the unions , which it has accused of an “ illegal work slowdown ” to win leverage in contract talks' as an entity. Technically correct, but it only needs to include ‘the unions’
* Not too slow when running on longer pieces of text, the accuracy also seems to be pretty good.
* The largest memory-hogging part seems to be the glove word embeddings, we could probably train our own smaller version of them
* Currently only works when used as a command line tool, will need to modify the source code a little bit to make it work as an API
