In [2]:
import spacy
import scispacy
from scispacy.linking import EntityLinker

In [3]:
nlp = spacy.load("en_core_sci_sm")
nlp.add_pipe("scispacy_linker", config={"resolve_abbreviations": True, "linker_name": "umls"})

# Obtain the linker from the pipeline
linker = nlp.get_pipe("scispacy_linker")

  deserializers["tokenizer"] = lambda p: self.tokenizer.from_disk(  # type: ignore[union-attr]


In [18]:
import json


def process_drug_uses(drugs_data):
    # Initialize dictionary for drug uses
    drug_uses_dict = {}
    relevant_types = {"T047", "T048", "T184", "T037", "T046", "T061", "T060"}
    semantic_type_descriptions = {
        "T047": "Disease or Syndrome",
        "T184": "Sign or Symptom",
        "T037": "Injury or Poisoning",
        "T046": "Pathologic Function",
        "T061": "Therapeutic or Preventive Procedure",
        "T060": "Diagnostic Procedure",
        "T048": "Mental or Behavioral Dysfunction"
    }
    # Iterate over each drug in the data
    for drug in drugs_data:
        drug_name = drug['name']
        conditions_and_diseases = []
        
        # Process each paragraph in the 'uses' list for this drug
        for uses_text in drug['uses']:
            # Process the uses text with the NLP model
            doc = nlp(uses_text)

            # Iterate over the entities and link them to UMLS
            for ent in doc.ents:
                kb_ents = ent._.kb_ents
                if kb_ents:
                    cui = kb_ents[0][0]  # Get the top CUI
                    umls_entity = linker.kb.cui_to_entity[cui]

                    # Check if the UMLS entity has a relevant semantic type
                    if any(st in relevant_types for st in umls_entity.types):
                        types = [semantic_type_descriptions[type_code] for type_code in umls_entity.types]
                        print(f"Types for {umls_entity.canonical_name}:{types}")
                        conditions_and_diseases.append(umls_entity.canonical_name)



        # Remove duplicates and add to the dictionary
        drug_uses_dict[drug_name] = list(set(conditions_and_diseases))

    return drug_uses_dict

# Load the JSON data
json_file_path = 'outputtest.json'  # Update this to your JSON file path
with open(json_file_path, 'r', encoding='utf-8') as file:
    drugs_data = json.load(file)

# Process the drug uses
drugs_uses_dict = process_drug_uses(drugs_data)

# Output the results
for drug, uses in drugs_uses_dict.items():
    print(f"{drug}: {uses}")

Types for Ulcerative Colitis:['Disease or Syndrome']
Types for Ulcerative Colitis:['Disease or Syndrome']
Types for Ulcerative Colitis:['Disease or Syndrome']
Types for Ulcerative Colitis:['Disease or Syndrome']
Types for Rheumatoid Arthritis:['Disease or Syndrome']
Types for Arthritis:['Disease or Syndrome']
Types for Cancer Pain:['Sign or Symptom']
Types for Acne Vulgaris:['Disease or Syndrome']
Types for Acne Vulgaris:['Disease or Syndrome']
Types for Hypersensitivity:['Pathologic Function']
Types for Thrombus:['Pathologic Function']
Types for Congenital Thrombotic Thrombocytopenic Purpura:['Disease or Syndrome']
Types for Infusion procedures:['Therapeutic or Preventive Procedure']
Types for Intravenous infusion procedures:['Therapeutic or Preventive Procedure']
Types for Blood Coagulation Disorders:['Disease or Syndrome']
Types for Anemia, Hemolytic:['Disease or Syndrome']
Types for Headache:['Sign or Symptom']
Types for Abdominal Pain:['Sign or Symptom']
Types for Digital Arthropa

In [32]:
def process_drug(drug, nlp, linker):
    drug_name = drug['name']
    conditions_and_diseases = []
    
    # Process each paragraph in the 'side_effects' list for this drug
    for uses_text in drug['uses']:
        # Process the uses text with the NLP model
        doc = nlp(uses_text)

        # Iterate over the entities and link them to UMLS
        for ent in doc.ents:
            kb_ents = ent._.kb_ents
            if kb_ents:
                cui = kb_ents[0][0]  # Get the top CUI
                umls_entity = linker.kb.cui_to_entity[cui]

                # Check if the UMLS entity has a relevant semantic type
                relevant_types = {"T047", "T048", "T184", "T037", "T046", "T061", "T060"}
                if any(st in relevant_types for st in umls_entity.types):
                    conditions_and_diseases.append(umls_entity.canonical_name)

    # Remove duplicates
    return drug_name, list(set(conditions_and_diseases))

In [34]:
# Load the JSON data
json_file_path = 'outputtest.json'  # Update this to your JSON file path
with open(json_file_path, 'r', encoding='utf-8') as file:
    drugs_data = json.load(file)
    
# Loop through the drugs and process only Melatonin
for drug in drugs_data:
    if drug['name'] == 'Melatonin':
        drug_name, conditions_and_diseases = process_drug(drug, nlp, linker)
        print(f"{drug_name} is used for: {conditions_and_diseases}")
        break  # Stop the loop after finding Melatonin

Melatonin is used for: ['Cancer Therapeutic Procedure', 'Sleeplessness', 'Sleep disturbances', 'Jet Lag Syndrome', 'Sleep Disorders']


In [23]:
import json

def process_side_effects(drugs_data, nlp, linker):
    # Initialize dictionary for drug uses
    drug_uses_dict = {}
    
    # Iterate over each drug in the data
    for drug in drugs_data:
        drug_name, conditions_and_diseases = process_drug(drug, nlp, linker)
        drug_uses_dict[drug_name] = conditions_and_diseases

    return drug_uses_dict

# Load the JSON data
json_file_path = 'outputtest.json'  # Update this to your JSON file path
with open(json_file_path, 'r', encoding='utf-8') as file:
    drugs_data = json.load(file)

# Process the drug uses
drugs_uses_dict = process_side_effects(drugs_data)

# Output the results
for drug, uses in drugs_uses_dict.items():
    print(f"{drug}: {uses}")

Types for Generalized essential telangiectasia (disorder):['Disease or Syndrome']
Types for Hypersensitivity:['Pathologic Function']
Types for Urticaria:['Disease or Syndrome']
Types for Edema:['Pathologic Function']
Types for Fever:['Sign or Symptom']
Types for Pharyngitis:['Disease or Syndrome']
Types for Eye Burns:['Injury or Poisoning']
Types for Pain of skin:['Sign or Symptom']
Types for Exanthema:['Sign or Symptom']
Types for Adverse reaction to drug:['Pathologic Function']
Types for Exanthema:['Sign or Symptom']
Types for Fever:['Sign or Symptom']
Types for Swollen Lymph Node:['Sign or Symptom']
Types for Myalgia:['Sign or Symptom']
Types for Asthenia:['Sign or Symptom']
Types for Contusions:['Injury or Poisoning']
Types for Infections of musculoskeletal system:['Disease or Syndrome']
Types for Infection:['Pathologic Function']
Types for Communicable Diseases:['Disease or Syndrome']
Types for Fever:['Sign or Symptom']
Types for Chills:['Sign or Symptom']
Types for Pharyngitis:['

In [48]:
lst = ["Mirtazapine is an antidepressant. The way this medication works is still not fully understood. It is thought to positively affect communication between nerve cells in the central nervous system and/or restore chemical balance in the brain.", "Mirtazapine is used to treat major depressive disorder in adults.", "It is not known if mirtazapine is safe and effective for use to treat MDD in children."]

In [49]:
def process_melatonin_texts(text_list, nlp, linker):
    # Define relevant semantic types and their descriptions
    semantic_type_descriptions = {
        "T047": "Disease or Syndrome",
        "T184": "Sign or Symptom",
        "T046": "Pathologic Function",
        "T048": "Mental or Behavioral Dysfunction"
        # Add other relevant types as needed
    }

    for text in text_list:
        # Apply NLP to the text
        doc = nlp(text)

        # Iterate over the entities in the doc
        for entity in doc.ents:
            print("Name: ", entity.text)

            # Each entity is linked to UMLS with a score
            for umls_ent in entity._.kb_ents:
                umls_entity = linker.kb.cui_to_entity[umls_ent[0]]
                print(f"UMLS Entity: {umls_entity.canonical_name}, Score: {umls_ent[1]}")
                
                # Print semantic types of the UMLS entity
                semantic_types = umls_entity.types
                for type_code in semantic_types:
                    type_description = semantic_type_descriptions.get(type_code, "Unknown Type")
                    print(f"Semantic Type: {type_code}, Description: {type_description}")

process_melatonin_texts(lst, nlp, linker)



Name:  Mirtazapine
UMLS Entity: mirtazapine, Score: 1.0
Semantic Type: T109, Description: Unknown Type
Semantic Type: T121, Description: Unknown Type
Name:  antidepressant
UMLS Entity: Antidepressive Agents, Score: 1.0
Semantic Type: T121, Description: Unknown Type
UMLS Entity: Antidepressant Measurement, Score: 0.8169975876808167
Semantic Type: T059, Description: Unknown Type
UMLS Entity: response to antidepressant, Score: 0.7954068779945374
Semantic Type: T040, Description: Unknown Type
UMLS Entity: Tricyclic Antidepressive Agents, Score: 0.7750267386436462
Semantic Type: T121, Description: Unknown Type
Name:  medication
UMLS Entity: Pharmaceutical Preparations, Score: 1.0
Semantic Type: T121, Description: Unknown Type
UMLS Entity: medication - HL7 publishing domain, Score: 1.0
Semantic Type: T170, Description: Unknown Type
UMLS Entity: Medications, Score: 1.0
Semantic Type: T170, Description: Unknown Type
UMLS Entity: Medication Management, Score: 0.8655552864074707
Semantic Type: T

In [56]:
import requests

# Define the URL to retrieve the XML file
url = "https://dailymed.nlm.nih.gov/dailymed/services/v2/spls/1efe378e-fee1-4ae9-8ea5-0fe2265fe2d8.xml"

try:
    # Send an HTTP GET request to the URL
    response = requests.get(url)

    # Check if the request was successful (status code 200)
    if response.status_code == 200:
        # Define the file name to save the XML content
        file_name = "output2.xml"
        
        # Open the file in binary write mode and write the XML content
        with open(file_name, "wb") as file:
            file.write(response.content)
        
        print(f"XML content saved to {file_name}")
    else:
        # Handle the case where the request was not successful
        print(f"Request failed with status code {response.status_code}")
except requests.exceptions.RequestException as e:
    # Handle any exceptions that may occur during the request
    print(f"An error occurred: {e}")



XML content saved to output2.xml


In [55]:
import xml.etree.ElementTree as ET

# Parse the XML file
tree = ET.parse("output.xml")  # Replace "output.xml" with the actual file name

# Get the root element
root = tree.getroot()

# Define a list to store the extracted ingredient names
ingredient_names = []

# Find all <ingredientSubstance> elements
ingredient_substances = root.findall(".//ingredientSubstance")

# Iterate through <ingredientSubstance> elements and extract the <name> element
for ingredient_substance in ingredient_substances:
    name_element = ingredient_substance.find(".//name")
    if name_element is not None:
        ingredient_name = name_element.text
        ingredient_names.append(ingredient_name)

# Print the extracted ingredient names
for ingredient_name in ingredient_names:
    print("Ingredient Name:", ingredient_name)

