In [None]:
!pip install medspacy

In [None]:
!pip install https://github.com/abchapman93/spacy_models/raw/master/releases/en_info_3700_i2b2_2012-0.1.0/dist/en_info_3700_i2b2_2012-0.1.0.tar.gz

# Overview
In this notebook, we'll put together all of the NLP tools we've learned and apply them to several clinical problems:

1. Detecting evidence of surgical site infections (SSIs) and body location
2. Identify texts with positive COVID-19 cases
3. Write some examples of your own research problems which could use NLP and show some dummy examples of how you could use medSpaCy

In [None]:
import spacy
import medspacy

from medspacy.ner import TargetRule
from medspacy.context import ConTextItem
from medspacy.visualization import visualize_ent, visualize_dep

## Choice of Model
For Tasks #1-2, which are short and include very specific concepts, we'll load a blank model and add target rules to match the concepts. Task #3 is quite long and will include many different concepts. This is exactly the kind of scenario when we would want to use a pre-trained machine learning model.

In [None]:
nlp = medspacy.load()
# nlp = medspacy.load("en_info_3700_i2b2_2012")

In [None]:
nlp.pipe_names

In [None]:
target_matcher = nlp.get_pipe("target_matcher")
context = nlp.get_pipe("context")

# I. Surgical site infection
Patient safety measures require identifying adverse outcomes of medical treatment, such as surgical site infections (SSIs). In this exercise, we will identify evidence of SSIs from radiology exams. 

- Add target rules to match any mention of evidence of SSI such as **"fluid collection"** or **"abscess"**
- Add target rules to match any body location
- Add context rules to identify any additional modifiers, such as negation or uncertainty

See this manuscript: [Detecting Evidence of Intra-abdominal Surgical Site Infections From Radiology Reports Using Natural Language Processing](https://pubmed.ncbi.nlm.nih.gov/29854116/)

In [None]:
texts = [
    "Fluid collection is seen in the abdomen.",
    "There is no evidence of fluid collection.",
    "PURPOSE OF EXAM: Rule out abscess.",
    "Hematomas are seen around in the right lower quadrant.",
    "No drainable collection is noted at this time.",
    "Post-surgical enhancing fluid collections, most notable right lower quadrant and left pelvis.",
    "please assess for possible ir drainage if abscess present.",
    "Intraloop fluid collection with air-fluid level, might represent contained rupture or intraloop abscess.",
    "No evidence of intra-abdominal abscess.",
    "Right paracolic and anterior abdominal fluid collections with rim enhacement likely representing abscesses amenable to drainage.",
    "r/o infiltrate, fluid collection",
    "No fluid collection is identified within the abdomen and pelvis.",
    "no fluid collection is seen around the anastamosis site",
    "36 year old woman with nectrotzing pancreatitis and intraabdominal collections.",
    "REASON FOR THIS EXAMINATION: CT abd/pelvis to access for dihiscence or abcess / fluid collection.",
]

In [None]:
target_rules = [
    TargetRule("abdomen", "BODY_LOC",
              pattern =[
                  {"LOWER": {"IN": ["intra-abdominal", "intraabdominal", "abd"]}},
                  {"LOWER": {"IN": ["right lower quadrant", "Right paracolic and anterior abdominal"]}},
              ]),
    TargetRule("abdomen", "BODY_LOC"),
    # This will match both "intra-abdominal" and "intraabdominal"
    TargetRule("intra-abdominal", "BODY_LOC",
              pattern=[{"LOWER": {"REGEX": "intra-?abdominal"}}]), 
    
    TargetRule("abd", "BODY_LOC"),
    
    # This will match "left upper quadrant", "left lower quadrant","right upper quadrant", "right lower quadrant"
    TargetRule("<LEFT/RIGHT> <UPPER/LOWER> quadrant", "BODY_LOC",
              pattern=[
                  {"LOWER": {"IN": ["left", "right"]}},
                  {"LOWER": {"IN": ["upper", "lower"]}},
                  {"LOWER": "quadrant"}
              ]),
    
    
    TargetRule("<LEFT/RIGHT> paracolic", "BODY_LOC",
              pattern=[
                  {"LOWER": {"IN": ["left", "right"]}},
                  {"LOWER": "paracolic"}
              ]),
    TargetRule("<POSTERIOR/ANTERIOR> abdomen", "BODY_LOC",
              pattern=[
                  {"LOWER": {"IN": ["posterior", "anterior"]}},
                  {"LOWER": "abdomen"}
              ]),
    
#     TargetRule(____, "SSI"),
#     # ...
    
#     TargetRule(____, "BODY_LOC"),
    # ...
    
]

In [None]:
target_matcher.add(target_rules)

In [None]:
context_item_data = [
    
]

In [None]:
context.add(context_item_data)

In [None]:
docs = list(nlp.pipe(texts))

Now scroll through the results and see if you extracted all of the relevant information. If your model misses any, go back and add them to your rules.

In [None]:
for doc in docs:
    visualize_ent(doc, jupyter=True)

In [None]:
for doc in docs:
    visualize_dep(doc, jupyter=True)
    print()

# II. COVID-19
In this exercise, we will look for evidence of COVID-19 from clinical text. See this manuscript describing a similar like this: [A Natural Language Processing System for National
COVID-19 Surveillance in the US Department of Veterans Affairs](https://openreview.net/pdf?id=ZQ_HvBxcdCv)

- Add target rules to extract any mentions of COVID-19 or synonyms
- Add context rules to identify any modifiers such as **"NEGATED_EXISTENCE"**, **"UNCERTAIN"**, or **"POSITIVE_EXISTENCE"** (ie, "confirmed" or "diagnosed with")


In [None]:
nlp = medspacy.load()
target_matcher = nlp.get_pipe("target_matcher")

In [None]:
texts = [
    "Patient admitted to hospital for respiratory failure secondary to COVID-19.",
    "The patient reports that they have been diagnosed with COVID-19",
    "Requested that patient be screened for novel coronavirus via telephone",
    "Lab Results: SARS-COV-2 DETECTED",
    "Patient does not have COVID-19",
    "This encounter is done over the telephone secondary to COVID-19 precautions.",
    
]

In [None]:
target_matcher = nlp.get_pipe("target_matcher")
context = nlp.get_pipe("context")

In [None]:
target_rules = [
    TargetRule(___, "COVID-19"),
    # ...
    
]

In [None]:
target_matcher.add(target_rules)

In [None]:
context = nlp.get_pipe("context")

In [None]:
item_data = [
    ConTextItem(____, "POSITIVE_EXISTENCE"),
    # ...

In [None]:
context.add(item_data)

In [None]:
docs = list(nlp.pipe(texts))

In [None]:
for doc in docs:
    visualize_ent(doc, jupyter=True)

In [None]:
for doc in docs:
    visualize_dep(doc, jupyter=True)
    print()

# III. Choose Your Own Adventure
Are there any problems in your research which involve text data? How can medSpaCy be used to extract information from that data? Add some examples below to illustrate how you could use medSpaCy. Obviously real data will more informative, but starting with some small, made up examples can be a useful method for framing your research question.

In [None]:
my_texts = [
    
]