In [None]:
# we will definitely need pyConText
import pyConTextNLP
from pyConTextNLP import pyConTextGraph
from pyConTextNLP.itemData import itemData
from pyConTextNLP.display._bokeh import graphDocumentBokeh
print(pyConTextNLP.__version__)

In [None]:
# we will need a few other packages
import nltk

In [None]:
from nlp_pneumonia_utils import Annotation
from nlp_pneumonia_utils import AnnotatedDocument
from nlp_pneumonia_utils import read_brat_annotations
from nlp_pneumonia_utils import read_annotations
from nlp_pneumonia_utils import calculate_prediction_metrics
from nlp_pneumonia_utils import mark_text
from nlp_pneumonia_utils import pneumonia_html_markup

print('Imported pneumonia nlp utilities...')

In [None]:
from bokeh.io import output_notebook
# we only need to call this once and then we can show graphs inline within this notebook
output_notebook()

In [None]:
%matplotlib inline

In [None]:
# First thing, let's load in our dataset
annotated_docs = read_annotations('pneumonia_brat_full_set1.zip')
print('Total Annotated Documents : {0}'.format(len(annotated_docs)))

In [None]:
def markup_sentence(s, modifiers, targets, prune_inactive=True):
    """
    """
    markup = pyConTextGraph.ConTextMarkup()
    markup.setRawText(s)
    markup.cleanText()
    markup.markItems(targets, mode="target")
    markup.markItems(modifiers, mode="modifier")
    markup.pruneMarks()
    markup.dropMarks('Exclusion')
    # apply modifiers to any targets within the modifiers scope
    markup.applyModifiers()
    markup.pruneSelfModifyingRelationships()
    if prune_inactive:
        markup.dropInactiveModifiers()
    return markup

In [None]:
# let us set up an example document to work with
example_document = """
INDICATION:  41-year-old male with supraventricular tachycardia.
COMPARISON:  AP upright and lateral chest x-ray dated Jan 1, 1900.

AP SUPINE PORTABLE CHEST X-RAY:  Cardiac silhouette is upper limits of
normal, possibly related to AP technique.  The mediastinal and hilar contours
appear as normal.  The pulmonary vasculature is normal and there is no pneumothorax
on this supine radiograph.  A consolidation with air bronchograms 
is new.  No right-sided consolidations or effusions.

IMPRESSION:  Left lower lobe process suggesting pneumonia."""

example_sentence = """IMPRESSION:  Left lower lobe process suggesting pneumonia."""

# Before we continue, note that any itemData in pyConText has 4 parts:
1. The literal (e.g. "pneumonia", "pneumoniathorax", "can rule out", "cannot be excluded", etc)
2. The category (e.g. "EVIDENCE_OF_PNEUMONIA")
3. The regular expression (optional) used to capture the literal in the text. If no regular expression is provided, a regular expression is generated literally from the literal.
4. The rule (optional). If the itemData is being used as a modifier, the rule states what direction the modifier operates in the sentence: current valid values are: "forward", the item can modify objects following it in the sentence; "backward", the item can modify objects preceding it in the sentence; or "bidirectional", the item can modify objects preceding and following it in the sentence.

In [None]:
# Now let's set up some rules for pyConText for EVIDENCE_OF_PNEUMONIA
# At this moment, we will just set up these "concepts" and well handle modifiers for them after that

targets1 = []
modifiers1 = []

# so before we add targets, remember from above that they will look like this : 
# targets = itemData(["literal", "CATEGORY", "regular expression(s)", "empty or forward or backward or bidirectional"])

# so now let's set this up for "pneumonia" with the category "EVIDENCE_OF_PNEUMONIA"
targets1 = itemData(["pneumonia", "EVIDENCE_OF_PNEUMONIA", "", ""])

# let's go ahead and use this now on one single example sentence:
markup = markup_sentence(example_sentence, modifiers1, targets1)
print(markup.nodes(data = True))

In [None]:
# Since pyConText uses a graph behind the scenes, we can inspect this graph witha  visualization:
graphDocumentBokeh(markup, title = 'My first pneumonia concept')

In [None]:
# this now works on entire documents combining all sentence-level objects into
# one object we can can then graph
def markup_context_document(report_text, modifiers, targets):
    context = pyConTextGraph.ConTextDocument()
    
    # we will use ntlk for breaking up sentences
    sentences = nltk.sent_tokenize(report_text)
    for sentence in sentences:
        m = markup_sentence(sentence, modifiers=modifiers, targets=targets)
        context.addMarkup(m)
    
    return context

In [None]:
# Very simple, so now let's add some additional concepts
targets2 = []
modifiers2 = []

# so before we add targets, remember from above that they will look like this : 
# targets = itemData(["literal", "CATEGORY", "regular expression(s)", "empty or forward or backward or bidirectional"])

# so now let's set this up for "pneumonia" with the category "EVIDENCE_OF_PNEUMONIA"
targets2 = itemData(["pneumonia", "EVIDENCE_OF_PNEUMONIA", "", ""],
                   ["consolidation", "EVIDENCE_OF_PNEUMONIA", "", ""],
                   ["infiltrates", "EVIDENCE_OF_PNEUMONIA", "", ""],
                   ["opacity", "EVIDENCE_OF_PNEUMONIA", "", ""])

# let's go ahead and use this again on our updated targets
context = markup_context_document(example_document, modifiers2, targets2)
print(context.getXML())

In [None]:
# Since pyConText uses a graph behind the scenes, we can inspect this graph witha  visualization:
graphDocumentBokeh(context.getDocumentGraph(), title = 'My first pneumonia document')