In [10]:
import medspacy_pna
from medspacy.visualization import visualize_ent
from medspacy_pna.util import add_additional_resources

import pandas as pd

# Load NLP and customize

In [2]:
nlp = medspacy_pna.build_nlp("radiology")



In [39]:
nlp.pipe_names

['tok2vec',
 'tagger',
 'parser',
 'medspacy_concept_tagger',
 'medspacy_target_matcher',
 'medspacy_context',
 'medspacy_sectionizer',
 'medspacy_postprocessor',
 'pneumonia_radiologydocumentclassifier']

You can customize the NLP by adding new rules to the appropriate pipeline components. Here are two rules we added for the University of Utah data:

In [37]:
from medspacy.context import ConTextRule

In [38]:
new_context_rules = [
    ConTextRule("does not confirm", "NEGATED_EXISTENCE", "FORWARD"),
    ConTextRule("consistent with", "POSSIBLE_EXISTENCE", "FORWARD"),
]

In [40]:
# Add to the context component
nlp.get_pipe("medspacy_context").add(new_context_rules)

Example of processing a single doc:

In [41]:
doc = nlp("There is no evidence of pneumonia.")
visualize_ent(doc)

  matches = self.matcher(doc)
  from IPython.core.display import display, HTML


In [42]:
doc._.document_classification

'NEG'

# Process dataset
## 1. Input

In [20]:
df = pd.read_excel("./dataset_demo.xlsx")

In [21]:
df

Unnamed: 0,encounter_id,text
0,1,There are no opacities.
1,1,There may be some consolidation.
2,2,Impression: pneumonia


## 2. Process notes and extract document classifications

In [43]:
docs = list(nlp.pipe(df["text"]))

  matches = self.matcher(doc)
  matches = self.matcher(doc)
  matches = self.matcher(doc)


In [44]:
docs

[There are no opacities.,
 There may be some consolidation.,
 Impression: pneumonia]

The document classifier assigns one of three levels to each note: "POS" (positive); "POSSIBLE"; or "NEG" (negative). We can also group positive/possible together to get a binary classification.

In [47]:
doc_cls = [doc._.document_classification for doc in docs]

In [48]:
doc_cls

['NEG', 'POSSIBLE', 'POS']

In [49]:
df["document_classification"] = doc_cls

In [50]:
df["document_classification_bin"] = [1*(cls != "NEG") for cls in doc_cls]

In [51]:
df

Unnamed: 0,encounter_id,text,document_classification,document_classification_bin
0,1,There are no opacities.,NEG,0
1,1,There may be some consolidation.,POSSIBLE,1
2,2,Impression: pneumonia,POS,1


## 3. Output
We can save a note-level table and an encounter-level table. For the encounter-level, we call the encounter positive if any of the notes have a "Possible" or "Positive" classification.

In [52]:
df_out = df[["encounter_id", "document_classification", "document_classification_bin"]]
df_out

Unnamed: 0,encounter_id,document_classification,document_classification_bin
0,1,NEG,0
1,1,POSSIBLE,1
2,2,POS,1


In [53]:
df_out.to_excel("./output.xlsx")

In [54]:
df_encounter = df_out.groupby("encounter_id")["document_classification_bin"].max().to_frame()
df_encounter

Unnamed: 0_level_0,document_classification_bin
encounter_id,Unnamed: 1_level_1
1,1
2,1


In [55]:
df_encounter.to_excel("./encounter_level.xlsx")