In [1]:
import pandas as pd
import numpy as np
import re
from wordcloud import WordCloud, STOPWORDS
from collections import Counter
import scispacy
import spacy

If scispacy and spacy are not installed, uncomment the next cell and run it

In [2]:
#This cell will install both scispacy and spacy libraries
# import sys
# !{sys.executable} -m pip install --user scispacy

#### Please change file location in next cell to the location it is stored in your system

In [3]:
#importing
df = pd.read_csv('df_diseases.csv')

In [4]:
#sneaking
# df.head()

In [5]:
# dropping col's
df.drop([df.columns[0],df.columns[2]],axis=1,inplace=True)

In [6]:
# sneaking again
#df.head()

In [7]:
# Filling NaN values with empty string
df.fillna('',inplace=True)

In [8]:
## some pre-processing
for i in range(len(df)):
    df.loc[i,'symptoms']=re.sub('[^A-Za-z0-9., ]+', '', df.loc[i,'symptoms'])
    df.loc[i,'causes']=re.sub('[^A-Za-z0-9., ]+', '', df.loc[i,'causes'])
    df.loc[i,'risk_factor']=re.sub('[^A-Za-z0-9., ]+', '', df.loc[i,'risk_factor'])
    df.loc[i,'overview']=re.sub('[^A-Za-z0-9., ]+', '', df.loc[i,'overview'])
    df.loc[i,'treatment']=re.sub('[^A-Za-z0-9., ]+', '', df.loc[i,'treatment'])
    df.loc[i,'medication']=re.sub('[^A-Za-z0-9., ]+', '', df.loc[i,'medication'])
    df.loc[i,'home_remedies']=re.sub('[^A-Za-z0-9., ]+', '', df.loc[i,'home_remedies'])

In [9]:
##sneaking
# df.head()

In [10]:
#some more preprocessing
df['name']=df['name'].str.lower()
df['symptoms']=df['symptoms'].str.lower()
df['causes']=df['causes'].str.lower()
df['risk_factor']=df['risk_factor'].str.lower()
df['overview']=df['overview'].str.lower()
df['treatment']=df['treatment'].str.lower()
df['medication']=df['medication'].str.lower()
df['home_remedies']=df['home_remedies'].str.lower()

In [11]:
# df.head()

#### The next cell requires 'stopwords.txt' file.

In [12]:
#importing list of stopwords using file handling
f=open("stopwords.txt","r")
var=f.read()
var=re.sub('[^A-Za-z0-9., ]+','' ,var)
sw_list=var.split(',')
sw_list=list(set(sw_list))
for i in range(len(sw_list)):
    sw_list[i]=sw_list[i].replace(' ','')

stopwords = set(STOPWORDS)
## adding stopwords
for i in range(len(sw_list)):
    stopwords.add(sw_list[i])
#the list stopwords now contains all the added stopwords

symptoms_str=''
for i in range(len(df)):
    symptoms_str += df.loc[i,'symptoms']
str_split = symptoms_str.split()
ss = [words for words in str_split if words not in stopwords]

#finding the count of words in descending order
Counter1 = Counter(ss)

#the list 'symptoms_common' contains all the keywords with their frequency
symptoms_common = Counter1.most_common()

In [13]:
#symptoms_common[:100]

## Task 2

#### Installing some required models

In [14]:
# import sys
# !{sys.executable} -m pip install https://s3-us-west-2.amazonaws.com/ai2-s2-scispacy/releases/v0.2.4/en_core_sci_sm-0.2.4.tar.gz

Uncomment the next cell and simply run it to install the model to be used later:

In [15]:
# import sys
# !{sys.executable} -m pip install https://s3-us-west-2.amazonaws.com/ai2-s2-scispacy/releases/v0.2.4/en_core_sci_md-0.2.4.tar.gz

In [16]:
# !{sys.executable} -m spacy download en_core_web_sm

In [17]:
# import sys
# !{sys.executable} -m spacy download en_core_sci_lg

In [18]:
# !{sys.executable} -m pip install https://s3-us-west-2.amazonaws.com/ai2-s2-scispacy/releases/v0.2.4/en_core_sci_lg-0.2.4.tar.gz

In [19]:
#add a new column 'keywords' to the dataframe
df['keywords']='na'

#### There are 3 pre-trained models, each model containing different amount of medical words. 

##### IF the below cell throws an error even after successful installation of 'en-core-sci-md' model, then it'll start working after restarting the kernel

#### The next two cells take a lot of time to execute, if you don't need them now, comment them

In [20]:
#These models are arranged in ascending order of number of medical words.
# We have tried all three models and now using one of these models for demonstration purposes.
#nlp=spacy.load('en_core_web_sm')
nlp=spacy.load('en_core_sci_md')
#nlp=spacy.load('en_core_sci_lg')

In [21]:
for i in range(len(df)):
    x=df.loc[i,'symptoms']+df.loc[i,'causes']+df.loc[i,'risk_factor']+df.loc[i,'overview']
    temp=nlp(x)
    list1=list(temp.ents)
    for j in range(len(list1)):
        list1[j]=str(list1[j])
    df.loc[i,'keywords']=list(set(list1)) 

In [22]:
keyword_list=[]

In [23]:
for i in range(len(df)):
    keyword_list+=df.loc[i,'keywords']

In [24]:
Counter=Counter(keyword_list)

In [25]:
keywords_common=Counter.most_common()

##### Top k frequently appearing words in entire column 'keywords':

In [26]:
keywords_common[:100]

[('symptoms', 1016),
 ('signs', 890),
 ('people', 778),
 ('risk', 672),
 ('treatment', 618),
 ('increase', 571),
 ('condition', 563),
 ('severe', 513),
 ('age', 477),
 ('your risk', 399),
 ('cases', 399),
 ('pain', 393),
 ('associated with', 377),
 ('medications', 357),
 ('increased', 345),
 ('children', 342),
 ('your body', 337),
 ('factors', 334),
 ('infection', 302),
 ('conditions', 296),
 ('clinic', 296),
 ('women', 284),
 ('skin', 281),
 ('changes', 278),
 ('abnormal', 276),
 ('years', 276),
 ('body', 272),
 ('increases', 269),
 ('family history', 267),
 ('blood', 267),
 ('experience', 266),
 ('child', 263),
 ('disease', 259),
 ('surgery', 248),
 ('affected', 238),
 ('swelling', 238),
 ('problems', 236),
 ('person', 230),
 ('fatigue', 228),
 ('doctor', 228),
 ('cause', 228),
 ('brain', 225),
 ('fever', 223),
 ('appointment', 220),
 ('damage', 217),
 ('inflammation', 214),
 ('men', 211),
 ('nausea', 208),
 ('days', 208),
 ('weeks', 205),
 ('diabetes', 203),
 ('exposure to', 203),
 

### Task 3

In [27]:
#adding a new column named 'class' which will contain the category of the disease
df['class']=-1

#### Higher Risk- Class 1
#### Medium Risk - Class 2
#### Neutral RIsk - Class 3
#### Lower Risk - Class 4

In [29]:
def create_classes(row):
    if((row['medication']=='') and (row['home_remedies']=='')):
        return 1
    elif((row['medication']!='') and (row['home_remedies']=='')):
        return 2
    elif((row['medication']!='') and (row['home_remedies']!='')):
        return 3
    elif((row['medication']=='') and (row['home_remedies']!='')):
        return 4

In [30]:
df['class']=df.apply(create_classes,axis=1)

In [31]:
organ_dict={"muscular system": ["human skeleton", "joints", "ligaments", "muscular system", "tendons"],
 "digestive system": ["mouth", "teeth", "tongue", "salivary glands", "parotid glands", "submandibular glands", "sublingual glands", "pharynx", "esophagus", "stomach", "small intestine", "duodenum", "jejunum", "ileum", "large intestine", "liver", "gallbladder", "mesentery", "pancreas", "anal canal", "anus", "blood cells"],
 "respiratory system": ["nasal cavity", "pharynx", "larynx", "trachea", "bronchi", "lungs", "diaphragm"],
 "urinary system": ["kidneys", "ureter", "bladder", "urethra"],
 "reproductive organs": ["testes", "epididymis", "vas deferens", "seminal vesicles", "prostate", "bulbourethral glands", "ovaries", "fallopian tubes", "uterus", "vagina", "vulva", "clitoris", "placenta", "penis", "scrotum"],
 "endocrine system": ["pituitary gland", "pineal gland", "thyroid gland", "parathyroid glands", "adrenal glands", "pancreas"],
 "circulatory system": ["heart", "patent foramen ovale", "arteries", "veins", "capillaries", "lymphatic vessel", "lymph node", "bone marrow", "thymus", "spleen", "gut-associated lymphoid tissue", "tonsils", "interstitium"],
 "nervous system": ["brain", "cerebrum", "cerebral hemispheres", "diencephalon", "the brainstem", "midbrain", "pons", "medulla oblongata", "cerebellum", "the spinal cord", "the ventricular system", "choroid plexus", "nerves", "cranial nerves", "spinal nerves", "ganglia", "enteric nervous system", "eye", "cornea", "iris", "ciliary body", "lens", "retina", "ear", "outer ear", "earlobe", "eardrum", "middle ear", "ossicles", "inner ear", "cochlea", "vestibule of the ear", "semicircular canals", "olfactory epithelium", "tongue", "taste buds"],
 "integumentary system": ["mammary glands", "skin", "subcutaneous tissue"]}

In [32]:
organ_keys=organ_dict.keys()

In [33]:
organ_keys

dict_keys(['muscular system', 'digestive system', 'respiratory system', 'urinary system', 'reproductive organs', 'endocrine system', 'circulatory system', 'nervous system', 'integumentary system'])

In [34]:
df['organs'] = [[] for _ in range(len(df))]

In [35]:
for i in range(len(df)):
    s=df['symptoms'][i]+df['causes'][i]+df['risk_factor'][i]+df['overview'][i]
    s_tokens=s.split(' ')
    
    for value in organ_keys:
        val=organ_dict.get(value)
        
        for j in val:
            if(j in s_tokens):
                df.loc[i,'organs'].append(j)

In [36]:
df['systems'] = [[] for _ in range(len(df))]

In [37]:
key_list = list(organ_dict.keys()) 
val_list = list(organ_dict.values()) 

In [38]:
for i in range(len(df)):
    for val in df['organs'][i]:
        for k in range(len(val_list)):
            if(val in val_list[k]):
                df['systems'][i].append(key_list[k])

In [39]:
df.head()

Unnamed: 0,name,symptoms,causes,risk_factor,overview,treatment,medication,home_remedies,keywords,class,organs,systems
0,acanthosis nigricans,skin changes are the only signs of acanthosis ...,"acanthosis nigricans has been associated with,...","acanthosis nigricans risk factors include, obe...",acanthosis nigricans is a skin condition that ...,"in many situations, treating the underlying pr...",,,"[texture, type 2 diabetes, acanthosis nigrican...",1,"[stomach, pancreas, pancreas, skin]","[digestive system, digestive system, endocrine..."
1,achalasia,,,,achalasia is a rare disorder that makes it dif...,achalasia treatment focuses on relaxing or for...,,,"[symptoms, loses, your stomach, nerves, result...",1,"[mouth, esophagus, stomach, nerves]","[digestive system, digestive system, digestive..."
2,achilles tendon rupture,although its possible to have no signs or symp...,your achilles tendon helps you point your foot...,factors that may increase your risk of achille...,the achilles tendon is a strong fibrous cord t...,treatment for a ruptured achilles tendon often...,,,"[symptoms, your toes, impair, risk, tennis, to...",1,[tendons],[muscular system]
3,acute coronary syndrome,the signs and symptoms of acute coronary syndr...,acute coronary syndrome usually results from t...,the risk factors for acute coronary syndrome a...,acute coronary syndrome is a term used to desc...,the immediate goals of treatment for acute cor...,"depending on your diagnosis, medications for e...",heart healthy lifestyle changes are an importa...,"[symptoms, discomfort, reduced, prompt, upper ...",3,[heart],[circulatory system]
4,adenomyosis,"sometimes, adenomyosis causes no signs or symp...",the cause of adenomyosis isnt known. there hav...,"risk factors for adenomyosis include, most cas...","with adenomyosis, the same tissue that lines t...","adenomyosis often goes away after menopause, s...",,to ease pelvic pain and cramping related to ad...,"[symptoms, discomfort, estrogen , research, w...",4,[uterus],[reproductive organs]


In [42]:
#apurv

df.at[525,'organs']=['large intestine']
df.at[525,'systems']=['digestive system']

df.at[537,'organs']=['nose','eyes','throat','skin']
df.at[537,'systems']=['respiratory system']

df.at[539,'organs']=['blood']
df.at[539,'systems']=['circulatory system']

df.at[541,'organs']=['brain']
df.at[541,'systems']=['nervous system'] #psycological disorder

df.at[550,'organs']=['blood cells']
df.at[550,'systems']=['circulatory system'] #type of blood cancer

df.at[551,'organs']=['brain']
df.at[551,'systems']=['nervous system'] #psycological disorder

df.at[552,'organs']=['salivary gland'] #infection in salivary glands due to bacteria
df.at[552,'systems']=['digestive system']

df.at[554,'organs']=['muscles']
df.at[554,'systems']=['muscular system']

df.at[555,'organs']=['muscles']
df.at[555,'systems']=['muscular system']

df.at[558,'organs']=['muscles']
df.at[558,'systems']=['muscular system'] #bone marrow cancer

df.at[563,'organs']=['muscles']
df.at[563,'systems']=['muscular system'] #extreme pain in muscles due to various reasons

df.at[564,'organs']=['legs','arms']
df.at[564,'systems']=['muscular system'] #cancer that develops lump in legs or arms

df.at[566,'organs']=['nose','eyes','mouth']
df.at[566,'systems']=['respiratory system'] #cancer developing in nose

df.at[576,'organs']=['nerves'] #tumor nerves in peripheral nervous system
df.at[576,'systems']=['nervous system']

#the disease is classified as a disease of the respiratory sytem, but actually for nose fracture the 
#patient must be recommended to an orthopedic
df.at[593,'organs']=['nose']
df.at[593,'systems']=['respiratory system'] #nose fracture

df.at[594,'organs']=['fat']
df.at[594,'systems']=['integumentary system'] #obesity

df.at[603,'organs']=['brain']
df.at[603,'systems']=['nervous system'] #a type of brain tumor

df.at[609,'organs']=['joints']
df.at[609,'systems']=['muscular system']

df.at[614,'organs']=['joints','bones']
df.at[614,'systems']=['muscular system']

df.at[617,'organs']=['bone']
df.at[617,'systems']=['muscular system'] #bone cancer

df.at[623,'organs']=['thyroid gland']
df.at[623,'systems']=['endocrine system'] #cancer that occurs in the cells of thyroid gland

df.at[637,'organs']=['nerves']
df.at[637,'systems']=['nervous system'] #tumor in certain nerves

df.at[640,'organs']=['salivary glands']
df.at[640,'systems']=['digestive system'] #tumor in salivary glands

df.at[641,'organs']=['pumonary veins']
df.at[641,'systems']=['circulatory system'] #defect in pulmonary veins of the heart

df.at[643,'organs']=['brain']
df.at[643,'systems']=['nervous system'] #mental disorder

df.at[647,'organs']=['chest']
df.at[647,'systems']=['circulatory system'] 

df.at[649,'organs']=['hair','scalp']
df.at[649,'systems']=['integumentary system'] #tiny insects infect the scalp

df.at[670,'organs']=['foot']
df.at[670,'systems']=['muscular system'] 

df.at[679,'organs']=['brain','pineal gland']
df.at[679,'systems']=['nervous system'] #cancer develops in the pineal gland of brain

df.at[689,'organs']=['tissue']
df.at[689,'systems']=['muscular system']

df.at[694,'organs']=['lungs']
df.at[694,'systems']=['respiratory system']

df.at[700,'organs']=['muscles']
df.at[700,'systems']=['muscular system']

df.at[701,'organs']=['rectum','large intestine']
df.at[701,'systems']=['digestive system']

df.at[702,'organs']=['sinus','nose']
df.at[702,'systems']=['respiratory system']

df.at[704,'organs']=['popliteal artery','knee joint']
df.at[704,'systems']=['circulatory system']

df.at[706,'organs']=['knee','joint']
df.at[706,'systems']=['muscular system']

df.at[714,'organs']=['brain']
df.at[714,'systems']=['nervous system'] #psycological disorder

df.at[715,'organs']=['urine']
df.at[715,'systems']=['reproductive system']

df.at[716,'organs']=['thyroid gland']
df.at[716,'systems']=['endocrine system']

df.at[722,'organs']=['None']
df.at[722,'systems']=['Reproductive system']

df.at[728,'organs']=['brain']
df.at[728,'systems']=['nervous system'] #psycological disorder, addiction

df.at[735,'organs']=['adrenal gland']
df.at[735,'systems']=['endocrine system']

df.at[748,'organs']=['muscles']
df.at[748,'systems']=['muscular system']

df.at[750,'organs']=['large intestine']
df.at[750,'systems']=['digestive system'] #swelling of large intestine due to bacteria

df.at[757,'organs']=['pulmonary valve']
df.at[757,'systems']=['cirulatory system']

df.at[758,'organs']=['heart']
df.at[758,'systems']=['ciruclatory system']

df.at[769,'organs']=['body']
df.at[769,'systems']=['nervous system']

df.at[775,'organs']=['brain']
df.at[775,'systems']=['nervous system']

df.at[776,'organs']=['rectum']
df.at[776,'systems']=['reproductive system']

#end of apurv code

#Gargi

#df['name'][270] = 'fibrocystic breasts'
df.at[270,'organs']=['breasts']
df.at[270,'systems']=['reproductive system']

#df['name'][274] = 'fibrous dysplasia'
df.at[274,'organs']=['bone']
df.at[274,'systems']=['muscular system']

#df['name'][281] = 'foot fracture (see: broken foot)'
df.at[281,'organs']=['foot', 'bones']
df.at[281,'systems']=['muscular system']

#df['name'][282] = 'fracture, arm (see: broken arm)'
df.at[282,'organs']=['bone']
df.at[282,'systems']=['muscular system']

#df['name'][298] = 'gastrointestinal stromal tumor (gist)'
df.at[298,'organs']=['stomach', 'intestine']
df.at[298,'systems']=['digestive system']

#df['name'][300] = 'gender identity disorder (see: gender dysphoria)'
df.at[300,'organs']=['reproductive system']
df.at[300,'systems']=['reproductive system']

#df['name'][306] = 'giardia infection (giardiasis)'
df.at[306,'organs']=['stomach', 'intestine']
df.at[306,'systems']=['digestive system']

#df['name'][309] = 'glioblastoma'
df.at[309,'organs']=['brain', 'spine']
df.at[309,'systems']=['nervous system']

#df['name'][318] = 'growing pains'
df.at[318,'organs']=['legs','thighs','knees','calves']
df.at[318,'systems']=['muscular system']

#df['name'][325] = 'hamstring injury'
df.at[325,'organs']=['thigh']
df.at[325,'systems']=['muscular system']

#df['name'][326] = 'hand fracture (see: broken hand)'
df.at[326,'organs']=['hand']
df.at[326,'systems']=['muscular system']

#df['name'][330] = 'head and neck cancers'
df.at[330,'organs']=['mouth', 'sinuses', 'nose', 'throat']
df.at[330,'systems']=['respiratory system']

#df['name'][346] = 'hemifacial spasm
df.at[346,'organs']=['facial muscles']
df.at[346,'systems']=['muscular system']

#df['name'][353] = 'hepatocellular carcinoma'
df.at[353,'organs']=['liver']
df.at[353,'systems']=['digestive system']

#df['name'][361] = 'hip dysplasia'
df.at[361,'organs']=['hip']
df.at[361,'systems']=['muscular system']

#df['name'][363] = 'hip impingement'
df.at[363,'organs']=['hip']
df.at[363,'systems']=['muscular system']

#df['name'][364] = 'hip labral tear'
df.at[364,'organs']=['labrum', 'hip']
df.at[364,'systems']=['muscular system']

#df['name'][374] = 'hydronephrosis'
df.at[374,'organs']=['abdomen', 'kidney']
df.at[374,'systems']=['digestive system', 'urinary system']

#df['name'][386] = 'hypoglycemia, diabetic (see: diabetic hypoglycemia)'
df.at[386,'organs']=['low sugar']
df.at[386,'systems']=['digestive system']

#df['name'][395] = 'idiopathic hypersomnia'
df.at[395,'organs']=['neurological sleep disorder']
df.at[395,'systems']=['nervous system']

#df['name'][402] = 'ncomplete fracture (see: greenstick fractures)'
df.at[402,'organs']=['bones', 'forearm', 'legs']
df.at[402,'systems']=['muscular system']

#df['name'][408] = 'inflammatory bowel disease (ibd)'
df.at[408,'organs']=['digestive tract']
df.at[408,'systems']=['digestive system']

#df['name'][413] = 'ingrown toenails'
df.at[413,'organs']=['toenails', 'nails', 'finger']
df.at[413,'systems']=['integumentary system']

#df['name'][416] = 'intestinal obstruction'
df.at[416,'organs']=['abdomen', 'intestine']
df.at[416,'systems']=['digestive system']

#df['name'][418] = 'intraductal carcinoma (see: ductal carcinoma in situ (dcis))'
df.at[418,'organs']=['breasts']
df.at[418,'systems']=['reproductive system']

#df['name'][419] = 'intussusception'
df.at[419,'organs']=['intestine']
df.at[419,'systems']=['digestive system']

#df['name'][432] = 'Kaposi sarcoma'
df.at[432,'organs']=['skin', 'lymph nodes', 'mucous membranes lining the mouth', 'nose', 'throat']
df.at[432,'systems']=['integumentary system']

#df['name'][436] = 'kidney cysts'
df.at[436,'organs']=['kidney']
df.at[436,'systems']=['urinary system']

#df['name'][438] = 'klatskin tumor (see: hilar cholangiocarcinoma)'
df.at[438,'organs']=['hepatic duct', 'liver']
df.at[438,'systems']=['digestive system']

#df['name'][439] = 'klinefelter syndrome'
df.at[439,'organs']=['genetic', 'reproductive']
df.at[439,'systems']=['reproductive system']

#df['name'][450] = 'leg fracture (see: broken leg)'
df.at[450,'organs']=['leg', 'bone']
df.at[450,'systems']=['muscular system']

#df['name'][451] = 'legg-calve-perthes disease'
df.at[451,'organs']=['femur', 'hip', 'bone', 'thigh bone']
df.at[451,'systems']=['circulatory system']

#df['name'][452] = 'legionnaires disease'
df.at[452,'organs']=['heart' , 'lungs' ,'muscles']
df.at[452,'systems']=['muscular system','respiratory system', 'circulatory system']

#df['name'][453] = 'leiomyosarcoma'
df.at[453,'organs']=['stomach','bladder', 'intestine', 'uterus']
df.at[453,'systems']=['reproductive system','urinary system']

#df['name'][454] = 'leukemia, acute lymphocytic (see: acute lymphocytic leukemia)'
df.at[454,'organs']=['blood', 'bone marrow']
df.at[454,'systems']=['circulatory system']

#df['name'][455] = 'leukemia, acute myelogenous (see: acute myelogenous leukemia)'
df.at[455,'organs']=['blood', 'bone marrow']
df.at[455,'systems']=['circulatory system']

#df['name'][456] = 'leukemia, chronic lymphocytic (see: chronic lymphocytic leukemia)'
df.at[456,'organs']=['blood', 'bone marrow']
df.at[456,'systems']=['circulatory system']

#df['name'][457] = 'leukemia, chronic myelogenous (see: chronic myelogenous leukemia)'
df.at[457,'organs']=['blood', 'bone marrow']
df.at[457,'systems']=['circulatory system']

#df['name'][459] = 'leukemia, hairy cell (see: hairy cell leukemia)'
df.at[459,'organs']=['blood', 'bone marrow']
df.at[459,'systems']=['circulatory system']

#df['name'][469] = 'liposarcoma'
df.at[469,'organs']=['abdomen', 'thigh', 'knee', 'fat cells']
df.at[469,'systems']=['muscular system']

#df['name'][470] = 'listeriosis (see: listeria infection)'
df.at[470,'organs']=['brain', 'spinal cord', 'bloodstream']
df.at[470,'systems']=['nervous system','circulatory system']

#df['name'][476] = 'lobular carcinoma in situ (lcis)'
df.at[476,'organs']=['breasts']
df.at[476,'systems']=['reproductive system']

#df['name'][481] = 'low sex drive in women'
df.at[481,'organs']=['reproductive system']
df.at[481,'systems']=['reproductive system']

#df['name'][501] = 'mammary duct ectasia'
df.at[501,'organs']=['breasts']
df.at[501,'systems']=['reproductive system']

#df['name'][502] = 'manic-depressive illness (see: bipolar disorder)'
df.at[502,'organs']=['nervous system']
df.at[502,'systems']=['nervous system']

#df['name'][507] = 'mcad deficiency'
df.at[507,'organs']=['skeletal- and heart muscle', 'liver', 'brain']
df.at[507,'systems']=['circulatory system', 'digestive system', 'nervous system']

#df['name'][509] = 'medulloblastoma'
df.at[509,'organs']=['cerebellum', 'brain']
df.at[509,'systems']=['nervous system']

#df['name'][523] = 'metatarsalgia'
df.at[523,'organs']=['foot']
df.at[523,'systems']=['muscular system']

#end of gargi code
                     

#pradeumna

df.at[779,'organs']=['anus']
df.at[779,'systems']=['digestive system']

df.at[791,'organs']=['human skeleton']
df.at[791,'systems']=['muscular system']

df.at[808,'organs']=['brain']
df.at[808,'systems']=['nervous system']

df.at[810,'organs']=['stomach']
df.at[810,'systems']=['digestive system']

df.at[811,'organs']=['tendons']
df.at[811,'systems']=['muscular system']

df.at[815,'organs']=['joints']
df.at[815,'systems']=['muscular system']

df.at[823,'organs']=['human skeleton']
df.at[823,'systems']=['nervous system']

df.at[826,'organs']=['brain']
df.at[826,'systems']=['nervous system']

df.at[827,'organs']=['brain']
df.at[827,'systems']=['nervous system']

df.at[831,'organs']=['brain']
df.at[831,'systems']=['nervous system']

df.at[841,'organs']=['skin']
df.at[841,'systems']=['integumentary system']

df.at[848,'organs']=['brain']
df.at[848,'systems']=['nervous system']

df.at[849,'organs']=['brain']
df.at[849,'systems']=['nervous system']

df.at[853,'organs']=['brain']
df.at[853,'systems']=['nervous system']

df.at[859,'organs']=['lungs']
df.at[859,'systems']=['respiratory system']

df.at[867,'organs']=['large intestine']
df.at[867,'systems']=['digestive system']

df.at[876,'organs']=['brain']
df.at[876,'systems']=['nervous system']

df.at[889,'organs']=['brain']
df.at[889,'systems']=['nervous system']

df.at[905,'organs']=['muscular']
df.at[905,'systems']=['muscular system']

df.at[928,'organs']=['vagina']
df.at[928,'systems']=['reproductive system']

df.at[949,'organs']=['muscular system']
df.at[949,'systems']=['muscular system']

df.at[962,'organs']=['brain']
df.at[962,'systems']=['nervous system']

df.at[969,'organs']=['skin']
df.at[969,'systems']=['integumentary system']

df.at[977,'organs']=['joints']
df.at[977,'systems']=['muscular system']

df.at[982,'organs']=['bone marrow']
df.at[982,'systems']=['circulatory system']

df.at[995,'organs']=['muscular system']
df.at[995,'systems']=['muscular system']

df.at[996,'organs']=['joints']
df.at[996,'systems']=['muscular system']

df.at[1006,'organs']=['kidneys']
df.at[1006,'systems']=['urinary system']

df.at[1009,'organs']=['skin']
df.at[1009,'systems']=['integumentary system']

df.at[1014,'organs']=['bone marrow']
df.at[1014,'systems']=['circulatory system']

df.at[1015,'organs']=['blood']
df.at[1015,'systems']=['circulatory system']

df.at[1022,'organs']=['thyroid gland']
df.at[1022,'systems']=['endocrine system']

df.at[1024,'organs']=['thyroid gland']
df.at[1024,'systems']=['endocrine system']

df.at[1029,'organs']=['human skeleton']
df.at[1029,'systems']=['muscular system']

df.at[1042,'organs']=['human skeleton']
df.at[1042,'systems']=['muscular system']

df.at[1045,'organs']=['heart']
df.at[1045,'systems']=['circulatory system']

df.at[1057,'organs']=['brain']
df.at[1057,'systems']=['nervous system']

df.at[1060,'organs']=['vagina']
df.at[1060,'systems']=['reproductive system']

df.at[1061,'organs']=['brain']
df.at[1061,'systems']=['nervous system']

df.at[1080,'organs']=['intestines']
df.at[1080,'systems']=['digestive system']

df.at[1083,'organs']=['anus']
df.at[1083,'systems']=['digestive system']

df.at[1086,'organs']=['muscular system']
df.at[1086,'systems']=['muscular system']

df.at[1124,'organs']=['trachea']
df.at[1124,'systems']=['respiratory system']

df.at[1143,'organs']=['blood']
df.at[1143,'systems']=['circulatory system']

df.at[1147,'organs']=['blood']
df.at[1147,'systems']=['circulatory system']

df.at[1159,'organs']=['blood']
df.at[1159,'systems']=['circulatory system']

df.at[1167,'organs']=['human skeleton']
df.at[1167,'systems']=['muscular system']

df.at[1177,'organs']=['muscular system']
df.at[1177,'systems']=['muscular system']

#end of praduemna code
                     
#prathamesh
df.at[5,'organs']=['brain']
df.at[5,'systems']=['nervous system']

df.at[7,'organs']=['adrenal gland']
df.at[7,'systems']=['endocrine system']

df.at[17,'organs']=['jaw']
df.at[17,'systems']=['muscular system']

df.at[32,'organs']=['uterus','vagina']
df.at[32,'systems']=['reproductive system']

df.at[41,'organs']=['appendix']  ###which system??
df.at[41,'systems']=['digestive system']

df.at[50,'organs']=['lungs']
df.at[50,'systems']=['respiratory system']

df.at[52,'organs']=['brain']
df.at[52,'systems']=['nervous system']

df.at[56,'organs']=['heart']
df.at[56,'systems']=['circulatory system']

df.at[57,'organs']=['brain']
df.at[57,'systems']=['nervous system']

df.at[58,'organs']=['brain']
df.at[58,'systems']=['nervous system']

df.at[60,'organs']=['breasts']#add breast as an organ of the reproductive system in the val_list
df.at[60,'systems']=['reproductive system']

df.at[63,'organs']=['heart']
df.at[63,'systems']=['circulatory system']

df.at[64,'organs']=['skin']
df.at[64,'systems']=['integumentary system']

df.at[72,'organs']=['adrenal gland']
df.at[72,'systems']=['endocrine system']

df.at[74,'organs']=['heart']
df.at[74,'systems']=['circulatory system']

df.at[78,'organs']=['stomach','small intestine','large intestine']
df.at[78,'systems']=['digestive system']

df.at[86,'organs']=['breats']#add breast as an organ of the reproductive system in the val_list
df.at[86,'systems']=['reproductive system']

df.at[87,'organs']=['ligament','joints']
df.at[87,'systems']=['muscular system']

df.at[88,'organs']=['ligament']
df.at[88,'systems']=['muscular system']

df.at[89,'organs']=['human skeleton','joint']
df.at[89,'systems']=['muscular system']

df.at[91,'organs']=['lungs']
df.at[91,'systems']=['respiratory system']

df.at[124,'organs']=['brain']
df.at[124,'systems']=['nervous system']

df.at[130,'organs']=['human skeleton','joints']
df.at[130,'systems']=['muscular system']

df.at[131,'organs']=['brain','pinal nerves']
df.at[131,'systems']=['nervous system']

df.at[132,'organs']=['brain']
df.at[132,'systems']=['nervous system']

df.at[133,'organs']=['brain']
df.at[133,'systems']=['nervous system']

df.at[144,'organs']=['large intestine']
df.at[144,'systems']=['digestive system']

df.at[147,'organs']=['lungs']
df.at[147,'systems']=['respiratory system']

df.at[148,'organs']=['muscular system']
df.at[148,'systems']=['muscular system']

df.at[152,'organs']=['heart']
df.at[152,'systems']=['circulatory system']

df.at[153,'organs']=['muscular system']
df.at[153,'systems']=['muscular system']

df.at[165,'organs']=['brain']
df.at[165,'systems']=['nervous system']

df.at[172,'organs']=['skin']
df.at[172,'systems']=['integumentary system']

df.at[173,'organs']=['UNKNOWN']#vomiting vala problem
df.at[173,'systems']=['digestive system']

df.at[185,'organs']=['brain']
df.at[185,'systems']=['nervous system']

df.at[190,'organs']=['muscular system']
df.at[190,'systems']=['muscular system']

df.at[191,'organs']=['ligament']
df.at[191,'systems']=['muscular system']

df.at[192,'organs']=['skin']
df.at[192,'systems']=['integumentary system']

df.at[200,'organs']=['UNKNOWN']#watery stools due to indigestion
df.at[200,'systems']=['digestive system']

df.at[206,'organs']=['stomach','large intestine']
df.at[206,'systems']=['digestive system']

df.at[208,'organs']=['heart']
df.at[208,'systems']=['circulatory system']

df.at[212,'organs']=['brain']#sleep deprivation
df.at[212,'systems']=['nervous system']

df.at[213,'organs']=['stomach','small intestine','pancreas'] 
df.at[213,'systems']=['digestive system']

df.at[225,'organs']=['brain']
df.at[225,'systems']=['nervous system']

df.at[236,'organs']=['brain']
df.at[236,'systems']=['nervous system']

df.at[247,'organs']=['brain']
df.at[247,'systems']=['nervous system']

df.at[251,'organs']=['bones']
df.at[251,'systems']=['muscular system']

df.at[255,'organs']=['muscular system','human skeleton']
df.at[255,'systems']=['muscular system']

df.at[256,'organs']=['skin']
df.at[256,'systems']=['integumentary system']

df.at[270,'organs']=['breasts']
df.at[270,'systems']=['reproductive system']

#end of prathamesh code

df.at[171,'organs']=['skin']
df.at[171,'systems']=['integumentary system']

df.at[390,'organs']=['parathyroid gland']
df.at[390,'systems']=['endocrine system']

df.at[987,'organs']=['stomach','intestine']
df.at[987,'systems']=['digestive system']

In [43]:
df['final_system'] = [[] for _ in range(len(df))]

In [44]:
from statistics import mode, StatisticsError

In [45]:
for i in range(len(df)):
    if(len(df.loc[i,'systems'])==0):
        df.loc[i,'final_system']='no system'
        continue
    try:
        var=mode(df['systems'][i])
        df.loc[i,'final_system']=var
    except StatisticsError:
        df.loc[i,'final_system']='ambigious'

#### Some final systems are left ambigious for now, because no unique mode was found for such systems in the 'systems' column

In [46]:
df.head(10)

Unnamed: 0,name,symptoms,causes,risk_factor,overview,treatment,medication,home_remedies,keywords,class,organs,systems,final_system
0,acanthosis nigricans,skin changes are the only signs of acanthosis ...,"acanthosis nigricans has been associated with,...","acanthosis nigricans risk factors include, obe...",acanthosis nigricans is a skin condition that ...,"in many situations, treating the underlying pr...",,,"[texture, type 2 diabetes, acanthosis nigrican...",1,"[stomach, pancreas, pancreas, skin]","[digestive system, digestive system, endocrine...",digestive system
1,achalasia,,,,achalasia is a rare disorder that makes it dif...,achalasia treatment focuses on relaxing or for...,,,"[symptoms, loses, your stomach, nerves, result...",1,"[mouth, esophagus, stomach, nerves]","[digestive system, digestive system, digestive...",digestive system
2,achilles tendon rupture,although its possible to have no signs or symp...,your achilles tendon helps you point your foot...,factors that may increase your risk of achille...,the achilles tendon is a strong fibrous cord t...,treatment for a ruptured achilles tendon often...,,,"[symptoms, your toes, impair, risk, tennis, to...",1,[tendons],[muscular system],muscular system
3,acute coronary syndrome,the signs and symptoms of acute coronary syndr...,acute coronary syndrome usually results from t...,the risk factors for acute coronary syndrome a...,acute coronary syndrome is a term used to desc...,the immediate goals of treatment for acute cor...,"depending on your diagnosis, medications for e...",heart healthy lifestyle changes are an importa...,"[symptoms, discomfort, reduced, prompt, upper ...",3,[heart],[circulatory system],circulatory system
4,adenomyosis,"sometimes, adenomyosis causes no signs or symp...",the cause of adenomyosis isnt known. there hav...,"risk factors for adenomyosis include, most cas...","with adenomyosis, the same tissue that lines t...","adenomyosis often goes away after menopause, s...",,to ease pelvic pain and cramping related to ad...,"[symptoms, discomfort, estrogen , research, w...",4,[uterus],[reproductive organs],reproductive organs
5,adjustment disorders,signs and symptoms depend on the type of adjus...,adjustment disorders are caused by significant...,some things may make you more likely to have a...,adjustment disorders are stressrelated conditi...,many people with adjustment disorders find tre...,medications such as antidepressants and antian...,here are some steps you can take to care for y...,"[symptoms, your childs pediatrician, your life...",3,[brain],[nervous system],nervous system
6,adnexal tumors,,,,"the ovaries, fallopian tubes, uterus, cervix a...",,,,"[uterus, cells, location, organs, connective t...",1,"[ovaries, vagina]","[reproductive organs, reproductive organs]",reproductive organs
7,adrenal cancer,"signs and symptoms of adrenal cancer include, ...","its not clear what causes adrenal cancer., adr...",adrenal cancer happens more often in people wi...,"perched atop each of your kidneys, your adrena...",adrenal cancer treatment usually involves surg...,,,"[symptoms, kidneys, shrinking testicles, risk,...",1,[adrenal gland],[endocrine system],endocrine system
8,adrenoleukodystrophy,,,,adrenoleukodystrophy uhdreenohlookohdistruhfee...,"adrenoleukodystrophy has no cure. however, ste...",,,"[symptoms, xlinked ald, leukodystrophy, males,...",1,"[bladder, brain]","[urinary system, nervous system]",ambigious
9,adult still's disease,most people with adult stills disease have a c...,its not certain what causes adult stills disea...,age is the main risk factor for adult stills d...,adult stills disease is a rare type of inflamm...,doctors use a variety of drugs to treat adult ...,,here are ways to make the most of your health ...,"[symptoms, risk factor, prednisone, males, swo...",4,[joints],[muscular system],muscular system


In [None]:
temp=[]

for i in range(len(df)):
    if(len(df.loc[i,'systems'])==0):
        temp.append(True)
    else:
        temp.append(False)

#### Train the model: