In [21]:
import json
import sys
import spacy
import scispacy
import pandas as pd
from scispacy.umls_linking import UmlsEntityLinker

nlp = spacy.load("en_ner_bc5cdr_md")

In [22]:
drugs_df = pd.read_csv(r"D:\Events\GE Hackathon\sadrat\additional resources\drug.csv")
drugs_df.drop(['drug_id', 'drugbank_id', 'pubchem_cid'], axis=1, inplace=True)
drugs_df.dropna(axis=0, inplace=True)
drugs_df.head()

Unnamed: 0,name,alias
0,bivalirudin,Angiomax#BG8967#BG 8967#BG-8967#Hirulog#Hirulo...
1,Goserelin,"Acetate, Goserelin#Goserelin Acetate#ICI118630..."
2,Deamino Arginine Vasopressin,1-Deamino-8-D-arginine Vasopressin#1-Desamino-...
3,cetrorelix,cetrorelix acetate#cetrorelix pamoate#Cetrotid...
4,Felypressin,"Lysine Vasopressin, Phenylalanine#Octapressin#..."


In [23]:
with open(r"D:\Events\GE Hackathon\sadrat\additional resources\drug_disease.json", "r") as file:
    drug_disease = json.load(file)

In [24]:
def get_chemical(common_name):
    """   
    This function takes in the common name of any drug as an argument
    and returns its chemical name.
  
    Parameters: 
    common_name (str): Common Name of any drug as a string. 
  
    Returns: 
    chemical_name (str): Chemical Name of the given drug. 
  
    """
    chemical_name = None
    for i in range(len(drugs_df.name)):
        if (common_name in drugs_df.iloc[i]['alias']):
            chemical_name = drugs_df.iloc[i]['name']
    if chemical_name == None:
        common_name = ' '.join([word.capitalize() for word in common_name.split()])
        for i in range(len(drugs_df.name)):
            if (common_name in drugs_df.iloc[i]['alias']):
                chemical_name = drugs_df.iloc[i]['name']
    return chemical_name

In [25]:
def get_disease(drug):
    """   
    This function takes in the chemical name of any drug as an argument
    and returns all the possible diseases, the drug can be prescribed for.
  
    Parameters: 
    drug (str): Chemical Name of any drug as a string. 
  
    Returns: 
    disease (list): List of all possible diseases. 
  
    """
    disease = None
    if drug in drug_disease.keys():
        disease = sorted(drug_disease[drug])
    drug = ' '.join([word.capitalize() for word in drug.split()])
    if drug in drug_disease.keys():
        disease = sorted(drug_disease[drug])
    drug = get_chemical(drug)
    if drug in drug_disease.keys():
        disease = sorted(drug_disease[drug])
    return disease 

In [26]:
def disease_from_tweet(tweet):
    """   
    This function takes in a tweet or any other string as an argument
    and returns its possible disease related to the tweet.
  
    Parameters: 
    tweet (str): A tweet or a string. 
  
    Returns: 
    diseases (list): List of all probable diseases.
  
    """
    doc = nlp(tweet)
    drugs = []
    diseases = []
    for entity in doc.ents:
        drug = entity.text
        label = entity.label_
        if label=="CHEMICAL" and drug not in drugs:
            diseases.extend(get_disease(drug))
        elif label=="DISEASE" and drug not in drugs:
            diseases.append(drug)
        drugs.append(drug)
    return diseases

In [27]:
tweet = 'My doctor prescribed me lisinopril because my BP was a little high.  I took the pill for 2 days and noticed a small mosquito looking bump in the center of my top lip.  About 3 hours later my lip was so big & numb I immediately went to the ER to find out lisinopril was the cause of it.  This is ridiculous Im embarrassed to go any place because of my lip. If any one is thinking about starting a law suite you can count me in. ... more »My doctor prescribed me lisinopril because my BP was a little high.  I took the pill for 2 days and noticed a small mosquito looking bump in the center of my top lip.  About 3 hours later my lip was so big & numb I immediately went to the ER to find out lisinopril was the cause of it.  This is ridiculous Im embarrassed to go any place because of my lip. If any one is thinking about starting a law suite you can count me in.'

In [28]:
disease_from_tweet(tweet)

['Abdominal Pain',
 'Acidosis',
 'Acquired angioedema',
 'Acute Kidney Injury',
 'Airway Obstruction',
 'Albuminuria',
 'Anemia, Aplastic',
 'Angioedema',
 'Azotemia',
 'Bipolar Disorder',
 'Bradycardia',
 'Cardiomegaly',
 'Chemical and Drug Induced Liver Injury',
 'Cholestasis, Intrahepatic',
 'Cough',
 'Diabetic Nephropathies',
 'Diabetic Neuropathies',
 'Diabetic Retinopathy',
 'Diarrhea',
 'Dizziness',
 'Dyspnea',
 'Exanthema',
 'Fatigue',
 'Fever',
 'Fibrosis',
 'Glomerulonephritis, IGA',
 'Glomerulosclerosis, Focal Segmental',
 'Glycosuria',
 'Headache',
 'Heart Failure',
 'Hematoma',
 'Hepatic Encephalopathy',
 'Hyperalgesia',
 'Hypercholesterolemia',
 'Hyperkalemia',
 'Hyperlipidemias',
 'Hypertension',
 'Hypertension, Malignant',
 'Hypertriglyceridemia',
 'Hypertrophy, Left Ventricular',
 'Hypotension',
 'Hypotension, Orthostatic',
 'Inappropriate ADH Syndrome',
 'Infant, Premature, Diseases',
 'Intestinal Diseases',
 'Jaundice',
 'Kidney Diseases',
 'Kidney Failure, Chronic',