## Please take this quiz on pneumonia:
https://goo.gl/forms/OsySYLhPclplK4hw1

In [None]:
import urllib.request
import os
import codecs
import zipfile
import pandas as pd
from IPython.display import display, HTML
import sklearn.metrics

# packages for interaction
from IPython.html.widgets import interact, interactive, fixed
from IPython.display import display, HTML, Image
import ipywidgets

# and also our utilities for this class
from nlp_pneumonia_utils import Annotation
from nlp_pneumonia_utils import AnnotatedDocument
from nlp_pneumonia_utils import read_brat_annotations
from nlp_pneumonia_utils import read_doc_annotations
from nlp_pneumonia_utils import read_annotations
from nlp_pneumonia_utils import calculate_prediction_metrics
from nlp_pneumonia_utils import mark_text_custom
from nlp_pneumonia_utils import clearPyConTextRegularExpressions
from nlp_pneumonia_utils import pneumonia_annotation_html_markup
print('Loaded utilities...')

## First, we'll load in our dataset but throughout these notebooks, there are a lot of utility functions used.  Feel free to look at them here : nlp_pneumonia_utils.py 

In [None]:
# First thing, let's load our training set
annotated_doc_map = read_doc_annotations('data/training_v2.zip')
annotated_docs = list(annotated_doc_map.values())
print('Total Annotated Documents : {0}'.format(len(annotated_docs)))

total_positives = 0
for anno_doc in annotated_docs:
    if anno_doc.positive_label:
        total_positives += 1
    
print('Total Positive Pneumonia Documents : {0}'.format(total_positives))

## Next, let's look at some of the annotations annotated by our expert.  Note that there are 3 total annotation types in this set : 
1. **PNEUMONIA_DOC_YES* -> Document shows **active** or **possible** case of pneumonia
2. **PNEUMONIA_DOC_NO** -> Document shows **no evidence** of pneumonia
3. **EVIDENCE_OF_PNEUMONIA** -> Spans of phrases/sentence which show positive or possible evidence of pneumonia which led the expert annotator to the final document-level conclusion

## Let's render one of our annotated documents in HTML.  When using the function 'pneumonia_annotation_html_markup' these show up as the colors:
1. **PNEUMONIA_DOC_YES** -> RED
2. **PNEUMONIA_DOC_NO** -> GREEN
3. **EVIDENCE_OF_PNEUMONIA** -> BLUE

## We can also use widgets to "scrub" through the documents and examine any of the  expert annotations

In [None]:
# This function let's us iterate through all documents and view the markup
def view_annotation_markup(anno_docs):
    @interact(i=ipywidgets.IntSlider(min=0, max=len(anno_docs)-1))
    def _view_markup(i):
        report_html = pneumonia_annotation_html_markup(anno_docs[i])
        report_html = report_html.replace('\n', '<br>')
        display(HTML(report_html))

In [None]:
view_annotation_markup(annotated_docs)

<br/><br/>This material presented as part of the DeCART Data Science for the Health Science Summer Program at the University of Utah in 2018.<br/>
Presenters : Dr. Wendy Chapman, Jianlin Shi <br> Acknowledgement: Many thanks to Kelly Peterson, because part of the materials are adopted from his previous work.