Models come from ScispaCy collection:
https://allenai.github.io/scispacy/

In [None]:
# Install a known compatible NumPy version
!pip install numpy==1.24.4

# Reinstall spaCy and scispaCy
!pip install scispacy



In [None]:
import scispacy
import spacy
from scispacy.linking import EntityLinker
from spacy.tokens import Token, Span, Doc
from spacy import displacy
from collections import defaultdict

In [None]:
!pip install https://s3-us-west-2.amazonaws.com/ai2-s2-scispacy/releases/v0.5.4/en_ner_bc5cdr_md-0.5.4.tar.gz

Collecting https://s3-us-west-2.amazonaws.com/ai2-s2-scispacy/releases/v0.5.4/en_ner_bc5cdr_md-0.5.4.tar.gz
  Downloading https://s3-us-west-2.amazonaws.com/ai2-s2-scispacy/releases/v0.5.4/en_ner_bc5cdr_md-0.5.4.tar.gz (119.8 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m119.8/119.8 MB[0m [31m7.1 MB/s[0m eta [36m0:00:00[0m
[?25h  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
Building wheels for collected packages: en_ner_bc5cdr_md
  Building wheel for en_ner_bc5cdr_md (pyproject.toml) ... [?25l[?25hdone
  Created wheel for en_ner_bc5cdr_md: filename=en_ner_bc5cdr_md-0.5.4-py3-none-any.whl size=119787716 sha256=c20f25d1db887a36fc916174e90107b50fb68d06228a88d9201bbb619a67ce91
  Stored in directory: /root/.cache/pip/wheels/6e/a6/d6/bd15a41e2ff02a62f0a0a48dddbc07d048307db7199a1538f7
Successfully built en_ner_bc5cdr_md
Installing collected 

**Clinical Text Example**

In [None]:
# ✅ Sample medical text with abbreviations
text = """
Spinal and bulbar muscular atrophy (SBMA) is an inherited motor neuron disease
caused by the expansion of a polyglutamine tract within the androgen receptor (AR).
SBMA can be caused by this easily. AR dysfunction leads to severe outcomes.

The patient, a 68-year-old female with a history of hypertension and Type 2 diabetes,
was admitted due to severe chest pain and shortness of breath. Initial evaluations
suggested acute myocardial infarction. She was immediately started on aspirin,
clopidogrel, and intravenous heparin.

Troponin levels were elevated, confirming cardiac injury. During hospitalization,
the patient developed signs of acute kidney injury, possibly related to the high-dose
NSAIDs administered for pain management.

Due to the presence of MRSA in her blood cultures, vancomycin therapy was initiated.
However, after three days, she developed a mild rash and pruritus, suspected to be
a hypersensitivity reaction to vancomycin.

To manage her hyperglycemia, insulin glargine was administered subcutaneously daily.
Additionally, metformin was resumed once renal function stabilized.

The medical team discontinued NSAIDs and switched to acetaminophen. Her condition
improved with supportive care, and she was discharged on dual antiplatelet therapy
and a reduced dose of lisinopril to manage her blood pressure and cardiac risk.
"""

In [None]:
import spacy

# Load SciSpacy-compatible NER model
nlp = spacy.load("en_ner_bc5cdr_md")

# ✅ Add the abbreviation detector
try:
    from scispacy.abbreviation import AbbreviationDetector

    if "abbreviation_detector" not in nlp.pipe_names:
        nlp.add_pipe("abbreviation_detector")

    print("✅ Abbreviation detector added.")
except Exception as e:
    print("⚠️ Couldn't add abbreviation detector:", e)

# ✅ Add the UMLS Entity Linker with abbreviation resolution enabled
try:
    if "scispacy_linker" not in nlp.pipe_names:
        nlp.add_pipe("scispacy_linker", config={
            "resolve_abbreviations": True,
            "linker_name": "umls"
        })

    linker = nlp.get_pipe("scispacy_linker")
    print("✅ Entity linker added.")
    print("UMLS KB loaded. Number of concepts:", len(linker.kb.cui_to_entity))
except Exception as e:
    print("⚠️ Couldn't add entity linker:", e)
    linker = None

# ✅ Run NLP pipeline
doc = nlp(text)

# 🔍 Display detected abbreviations
print("\n🔍 Abbreviations Found:")
print("Short Form\t→ Long Form")
abbrev_map = {}
for abrv in doc._.abbreviations:
    print(f"{abrv.text}\t→ {abrv._.long_form.text}")
    abbrev_map[abrv.text] = abrv._.long_form.text

# 🧠 Show entities + UMLS concepts + definitions — ONLY link long forms
print("\n🧠 Named Entities + UMLS Concepts + Definitions:")
linked_long_forms = set()  # prevent duplicate prints

for ent in doc.ents:
    # ❌ Skip abbreviation short forms
    if ent.text in abbrev_map:
        continue

    # ❌ Also skip if this entity is a long form already processed (avoid duplicates)
    if ent.text in linked_long_forms:
        continue

    print(f"Entity: {ent.text} (Label: {ent.label_})")
    linked_long_forms.add(ent.text)

    if linker and ent._.kb_ents:
        top_id, score = ent._.kb_ents[0]
        cui_entry = linker.kb.cui_to_entity[top_id]
        print(f"  ↳ UMLS CUI: {top_id}")
        print(f"     Canonical Name: {cui_entry.canonical_name}")
        print(f"     Definition: {cui_entry.definition or 'No definition available.'}")
        print(f"     Score: {score:.3f}")
    elif linker:
        print("  - No linked UMLS concepts.")


✅ Abbreviation detector added.
✅ Entity linker added.
UMLS KB loaded. Number of concepts: 3920422

🔍 Abbreviations Found:
Short Form	→ Long Form
SBMA	→ Spinal and bulbar muscular atrophy
SBMA	→ Spinal and bulbar muscular atrophy
AR	→ androgen receptor
AR	→ androgen receptor

🧠 Named Entities + UMLS Concepts + Definitions:
Entity: muscular atrophy (Label: DISEASE)
  ↳ UMLS CUI: C0026846
     Canonical Name: Muscular Atrophy
     Definition: Derangement in size and number of muscle fibers occurring with aging, reduction in blood supply, or following immobilization, prolonged weightlessness, malnutrition, and particularly in denervation.
     Score: 0.993
Entity: inherited motor neuron disease (Label: DISEASE)
  ↳ UMLS CUI: C0085084
     Canonical Name: Motor Neuron Disease
     Definition: Diseases characterized by a selective degeneration of the motor neurons of the spinal cord, brainstem, or motor cortex. Clinical subtypes are distinguished by the major site of degeneration. In AMYOTRO

In [None]:
print("\n--- Entity Relationships (Head Verb Connections) ---")
for sent in doc.sents:
    entities_in_sent = [ent for ent in doc.ents if ent.start >= sent.start and ent.end <= sent.end]
    if len(entities_in_sent) >= 2:
        for ent1 in entities_in_sent:
            for ent2 in entities_in_sent:
                if ent1 != ent2 and ent1.root.head == ent2.root or ent2.root.head == ent1.root:
                    print(f"Relationship: ({ent1.text}) <--{ent1.root.dep_}--> ({ent2.text})")

# Visualize the dependency structure (shows entities within context)
for sent in doc.sents:
    displacy.render(sent, style="dep", jupyter=True)


--- Entity Relationships (Head Verb Connections) ---
Relationship: (muscular atrophy) <--nsubj--> (SBMA)
Relationship: (SBMA) <--appos--> (muscular atrophy)
Relationship: (hypertension) <--nmod--> (Type 2 diabetes)
Relationship: (Type 2 diabetes) <--conj--> (hypertension)
Relationship: (chest pain) <--nmod--> (shortness of breath)
Relationship: (shortness of breath) <--conj--> (chest pain)
Relationship: (aspirin) <--nmod--> (clopidogrel)
Relationship: (aspirin) <--nmod--> (heparin)
Relationship: (clopidogrel) <--conj--> (aspirin)
Relationship: (heparin) <--conj--> (aspirin)
Relationship: (rash) <--dobj--> (pruritus)
Relationship: (pruritus) <--conj--> (rash)


In [None]:
from collections import defaultdict

# Organize entities by sentence
entity_sent_map = defaultdict(list)

for ent in doc.ents:
    if ent.label_ in {"DISEASE", "CHEMICAL"}:
        # Find which sentence the entity belongs to
        for sent in doc.sents:
            if ent.start >= sent.start and ent.end <= sent.end:
                entity_sent_map[sent].append(ent)
                break

# Now find relationships between diseases and chemicals within sentences
print("\n--- Disease–Chemical Interactions (sentence-based) ---")
interactions = []

for sent, ents in entity_sent_map.items():
    diseases = [e for e in ents if e.label_ == "DISEASE"]
    chemicals = [e for e in ents if e.label_ == "CHEMICAL"]

    for disease in diseases:
        for chemical in chemicals:
            interactions.append((disease.text, chemical.text, sent.text.strip()))
            print(f"[{disease.text}] ↔ [{chemical.text}]")
            print(f"  ↳ In Sentence: \"{sent.text.strip()}\"\n")



--- Disease–Chemical Interactions (sentence-based) ---
[acute kidney injury] ↔ [high-dose]
  ↳ In Sentence: "During hospitalization, 
the patient developed signs of acute kidney injury, possibly related to the high-dose 
NSAIDs administered for pain management."

[pain] ↔ [high-dose]
  ↳ In Sentence: "During hospitalization, 
the patient developed signs of acute kidney injury, possibly related to the high-dose 
NSAIDs administered for pain management."

[rash] ↔ [vancomycin]
  ↳ In Sentence: "However, after three days, she developed a mild rash and pruritus, suspected to be 
a hypersensitivity reaction to vancomycin."

[pruritus] ↔ [vancomycin]
  ↳ In Sentence: "However, after three days, she developed a mild rash and pruritus, suspected to be 
a hypersensitivity reaction to vancomycin."

[hypersensitivity] ↔ [vancomycin]
  ↳ In Sentence: "However, after three days, she developed a mild rash and pruritus, suspected to be 
a hypersensitivity reaction to vancomycin."

[hyperglycemia] ↔ 