In [2]:
import json
import os

file_path = 'Medical_dataset/intents_short.json'

# Check if the file exists
if not os.path.exists(file_path):
    print(f"The file {file_path} does not exist.")
else:
    with open(file_path, 'r') as file:
        data = json.load(file)
        print(data)


The file Medical_dataset/intents_short.json does not exist.


In [None]:
import pandas as pd
import nltk
import numpy as np
from nltk.stem import WordNetLemmatizer
from nltk.corpus import stopwords
nltk.download('punkt')
nltk.download('wordnet')
nltk.download('stopwords')


lemmatizer = WordNetLemmatizer()

[nltk_data] Downloading package punkt to /home/etd/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package wordnet to /home/etd/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package stopwords to /home/etd/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


In [None]:
from spacy.lang.en.stop_words import STOP_WORDS
import spacy
nlp = spacy.load('en_core_web_sm')

In [None]:
def preprocess(doc):
    doc=doc.replace("'t",' not')
    nlp_doc=nlp(doc)
    d=[]
    for token in nlp_doc:
        if(not token.text.lower()  in STOP_WORDS and  token.text.isalpha()):
            d.append(token.lemma_.lower() )
    return ' '.join(d)

In [None]:
stp=stopwords.words('english')
stp.remove('not')

In [None]:
def preprocess_sent(sent):
    sent=sent.replace("'t",' not')
    t=nltk.word_tokenize(sent)
    return ' '.join([lemmatizer.lemmatize(w.lower()) for w in t if (w not in stp and w.isalpha())])

In [None]:
preprocess_sent("i can't breath")

'not breath'

In [None]:
sent=[]
app_tag=[]
for intent in intents['intents']:
    tag = intent['tag']
    for pattern in intent['patterns']:
        sent.append(preprocess_sent(pattern))
        app_tag.append(tag)

In [None]:
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

In [None]:
vectorizer = TfidfVectorizer()
vectors = vectorizer.fit_transform(sent)
feature_names = vectorizer.get_feature_names()
dense = vectors.todense()
denselist = dense.tolist()
df = pd.DataFrame(denselist, columns=feature_names)

In [None]:
vocab=list(df.columns)

In [None]:
def bag_of_words(tokenized_sentence, all_words):
    bag = np.zeros(len(all_words), dtype=np.float32)
    for idx, w in enumerate(all_words):
        if w in tokenized_sentence:
            bag[idx] = 1.0
    return bag

TEST

In [None]:
# synthetic dataset created for neural network validation
xy_test = [
    (['can',"'t", 'think', 'straight'], 'altered_sensorium'),
    (['suffer', 'from', 'anxeity'], 'anxiety'),
    (['suffer', 'from', 'anxeity'], 'anxiety'),
    (['bloody', 'poop'], 'bloody_stool'),
    (['blurred', 'vision'], 'blurred_and_distorted_vision'),
    (['can', "'t", 'breathe'], 'breathlessness'),
    (['Yellow', 'liquid', 'pimple'], 'yellow_crust_ooze'),
    (['lost', 'weight'], 'weight_loss'),
    (['side', 'weaker'], 'weakness_of_one_body_side'),
    (['watering', 'eyes'], 'watering_from_eyes'),
    (['brief', 'blindness'], 'visual_disturbances'),
    (['throat', 'hurts'], 'throat_irritation'),
    (['extremities', 'swelling'], 'swollen_extremeties'),
    (['swollen', 'lymph', 'nodes'], 'swelled_lymph_nodes'),
    (['dark', 'under', 'eyes'], 'sunken_eyes'),
    (['stomach', 'blood'], 'stomach_bleeding'),
    (['blood', 'urine'], 'spotting_urination'),
    (['sinuses', 'hurt'], 'sinus_pressure'),
    (['watery', 'from', 'nose'], 'runny_nose'),
    (['have', 'to', 'move'], 'restlessness'),
    (['red', 'patches', 'body'], 'red_spots_over_body'),
    (['sneeze'], 'continuous_sneezing'),
    (['coughing'], 'cough'),
    (['skin', 'patches'], 'dischromic_patches'),
    (['skin', 'bruised'], 'bruising'),
    (['burning', 'pee'], 'burning_micturition'),
    (['hurts', 'pee'], 'burning_micturition'),
    (['Burning', 'sensation'], 'burning_micturition'),
    (['chest', 'pressure'], 'chest_pain'),
    (['pain', 'butt'], 'pain_in_anal_region'),
    (['heart', 'bad', 'beat'], 'palpitations'),
    (['fart', 'lot'], 'passage_of_gases'),
    (['cough', 'phlegm'], 'phlegm'),
    (['lot', 'urine'], 'polyuria'),
    (['Veins', 'bigger'], 'prominent_veins_on_calf'),
    (['Veins', 'emphasized'], 'prominent_veins_on_calf'),
    (['yellow', 'pimples'], 'pus_filled_pimples'),
    (['red', 'nose'], 'red_sore_around_nose'),
    (['skin', 'yellow'], 'yellowish_skin'),
    (['eyes', 'yellow'], 'yellowing_of_eyes'),
    (['large', 'thyroid'], 'enlarged_thyroid'),
    (['really', 'hunger'], 'excessive_hunger'),
    (['always', 'hungry'], 'excessive_hunger'),
]

In [None]:
#df.to_csv(r'tfidfsymptoms.csv', index = False)

In [None]:
def preprocess_test(sent):
    return [lemmatizer.lemmatize(w.lower()) for w in sent if (w not in set(stopwords.words('english')) and w.isalpha())]

In [None]:
preprocess_sent(' '.join(xy_test[0][0]))

'not think straight'

In [None]:
y_true=[]
y_pred=[]
for x,y in xy_test:
    y_true.append(y)
    p=preprocess_sent(' '.join(x))
    print(p)
    bow=np.array(bag_of_words(p,vocab))
    #    bow=vectorizer.transform(p).toarray()
    res=cosine_similarity(bow.reshape((1, -1)), df).reshape(-1)
    y_pred.append(app_tag[np.argmax(res)])

not think straight
suffer anxeity
suffer anxeity
bloody poop
blurred vision
not breathe
yellow liquid pimple
lost weight
side weaker
watering eye
brief blindness
throat hurt
extremity swelling
swollen lymph node
dark eye
stomach blood
blood urine
sinus hurt
watery nose
move
red patch body
sneeze
coughing
skin patch
skin bruised
burning pee
hurt pee
burning sensation
chest pressure
pain butt
heart bad beat
fart lot
cough phlegm
lot urine
vein bigger
vein emphasized
yellow pimple
red nose
skin yellow
eye yellow
large thyroid
really hunger
always hungry


In [None]:
y_pred

['altered_sensorium',
 'anxiety',
 'anxiety',
 'constipation',
 'blurred_and_distorted_vision',
 'loss_of_appetite',
 'yellow_crust_ooze',
 'weight_loss',
 'weakness_of_one_body_side',
 'watering_from_eyes',
 'visual_disturbances',
 'patches_in_throat',
 'restlessness',
 'swelled_lymph_nodes',
 'sunken_eyes',
 'belly_pain',
 'spotting_urination',
 'sinus_pressure',
 'runny_nose',
 'restlessness',
 'red_spots_over_body',
 'continuous_sneezing',
 'cough',
 'patches_in_throat',
 'bruising',
 'burning_micturition',
 'burning_micturition',
 'burning_micturition',
 'chest_pain',
 'pain_in_anal_region',
 'palpitations',
 'passage_of_gases',
 'phlegm',
 'polyuria',
 'prominent_veins_on_calf',
 'prominent_veins_on_calf',
 'pus_filled_pimples',
 'red_sore_around_nose',
 'yellowish_skin',
 'yellowing_of_eyes',
 'enlarged_thyroid',
 'excessive_hunger',
 'excessive_hunger']

In [None]:
error=0
for i in range(len(y_pred)):
    if y_pred[i]!=y_true[i]:
        error+=1
    

In [None]:
1-error/len(y_true)

0.8604651162790697

In [None]:
x=['breathe']
p=preprocess_sent(' '.join(x))
bow=np.array(bag_of_words(p,vocab))
res=cosine_similarity(bow.reshape((1, -1)), df).reshape(-1)
app_tag[np.argmax(res)]

'breathlessness'

In [None]:
a=np.argsort(res)[::-1][:2].tolist()

# DEPLOIMENT

In [None]:
df=pd.read_csv('tfidfsymptoms.csv')
vocab=list(df.columns)

In [None]:
import joblib
knn= joblib.load('knn.pkl')  
#knn_from_joblib.predict(X_test) 

In [None]:
def bag_of_words(tokenized_sentence, all_words):
    bag = np.zeros(len(all_words), dtype=np.float32)
    for idx, w in enumerate(all_words):
        if w in tokenized_sentence:
            bag[idx] = 1.0
    return bag

In [None]:
def predictSym(sym,vocab,app_tag):
    sym=preprocess_sent(sym)
    bow=np.array(bag_of_words(sym,vocab))
    res=cosine_similarity(bow.reshape((1, -1)), df).reshape(-1)
    order=np.argsort(res)[::-1].tolist()
    possym=[]
    for i in order:
        if app_tag[i].replace('_',' ') in sym:
            return app_tag[i],1
        if app_tag[i] not in possym and res[i]!=0:
            possym.append(app_tag[i])
    return possym,0

In [None]:
predictSym('i have skin erumptions',vocab,app_tag)

(['nodal_skin_eruptions',
  'skin_rash',
  'yellowish_skin',
  'silver_like_dusting',
  'dischromic_patches',
  'skin_peeling',
  'sunken_eyes'],
 0)

In [None]:
df_tr=pd.read_csv('Medical_dataset/Training.csv')

In [None]:
disease=df_tr.iloc[:,-1].to_list()
all_symp_col=list(df_tr.columns[:-1])
all_symp=[clean_symp(sym) for sym in (all_symp_col)]

In [None]:
#recoit client_symptoms et renvoit un dataframe avec 1 pour les symptoms associees
def OHV(cl_sym,all_sym):
    l=np.zeros([1,len(all_sym)])
    for sym in cl_sym:
        l[0,all_sym.index(sym)]=1
    return pd.DataFrame(l, columns =all_symp)

def contains(small, big):
    a=True
    for i in small:
        if i not in big:
            a=False
    return a

def possible_diseases(l,disease):
    poss_dis=[]
    for dis in set(disease):
        if contains(l,symVONdisease(df_tr,dis)):
            poss_dis.append(dis)
    return poss_dis

def possible_diseases(l):
    poss_dis=[]
    for dis in set(disease):
        if contains(l,symVONdisease(df_tr,dis)):
            poss_dis.append(dis)
    return poss_dis

#recoit une maladie renvoit tous les sympts
def symVONdisease(df,disease):
    ddf=df[df.prognosis==disease]
    m2 = (ddf == 1).any()
    return m2.index[m2].tolist()
    
def clean_symp(sym):
    return sym.replace('_',' ').replace('.1','').replace('(typhos)','').replace('yellowish','yellow').replace('yellowing','yellow') 

In [None]:
symVONdisease(df_tr,'Allergy')

['continuous_sneezing', 'shivering', 'chills', 'watering_from_eyes']

In [None]:
def getInfo():
    # name=input("Name:")
    print("Your Name \n\t\t\t\t\t\t",end="=>")
    name=input("")
    print("hello ",name)
    return str(name)

In [None]:
import csv

severityDictionary=dict()
description_list = dict()
precautionDictionary=dict()

def getDescription():
    global description_list
    with open('symptom_Description.csv') as csv_file:
        csv_reader = csv.reader(csv_file, delimiter=',')
        line_count = 0
        for row in csv_reader:
            _description={row[0]:row[1]}
            description_list.update(_description)




def getSeverityDict():
    global severityDictionary
    with open('symptom_severity.csv') as csv_file:

        csv_reader = csv.reader(csv_file, delimiter=',')
        line_count = 0
        try:
            for row in csv_reader:
                _diction={row[0]:int(row[1])}
                severityDictionary.update(_diction)
        except:
            pass


def getprecautionDict():
    global precautionDictionary
    with open('symptom_precaution.csv') as csv_file:

        csv_reader = csv.reader(csv_file, delimiter=',')
        line_count = 0
        for row in csv_reader:
            _prec={row[0]:[row[1],row[2],row[3],row[4]]}
            precautionDictionary.update(_prec)

def calc_condition(exp,days):
    sum=0
    for item in exp:
         sum=sum+severityDictionary[item]
    if((sum*days)/(len(exp))>13):
        return 1
        print("You should take the consultation from doctor. ")
    else:
        return 0
        print("It might not be that bad but you should take precautions.")


In [None]:
getSeverityDict()
getprecautionDict()
getDescription()

In [None]:
def main_sp(name):
    #main Idea: At least two initial sympts to start with
    
    #get the 1st syp ->> process it ->> check_pattern ->>> get the appropriate one (if check_pattern==1 == similar syntaxic symp found)
    print("Hi Mr/Ms "+name+", can you describe you main symptom ?  \n\t\t\t\t\t\t",end="=>")
    sym1 = input("")
    psym1,find=predictSym(sym1,vocab,app_tag)
    if find==1:
        sym1=psym1
    else:
        i=0
        while True and i<len(psym1):
            print('Do you experience '+psym1[i].replace('_',' '))
            rep=input("")
            if str(rep)=='yes':
                sym1=psym1[i]
                break
            else:
                i=i+1

    print("Is there any other symtom Mr/Ms "+name+"  \n\t\t\t\t\t\t",end="=>")
    sym2=input("")
    psym2,find=predictSym(sym2,vocab,app_tag)
    if find==1:
        sym2=psym2
    else:
        i=0
        while True and i<len(psym2):
            print('Do you experience '+psym2[i].replace('_',' '))
            rep=input("")
            if str(rep)=='yes':
                sym2=psym2[i]
                break
            else:
                i=i+1
    
    #create patient symp list
    all_sym=[sym1,sym2]
    #predict possible diseases
    diseases=possible_diseases(all_sym)
    stop=False
    print("Are you experiencing any ")
    for dis in diseases:
        if stop==False:
            for sym in symVONdisease(df_tr,dis):
                if sym not in all_sym:
                    print(clean_symp(sym)+' ?')
                    while True:
                        inp=input("")
                        if(inp=="yes" or inp=="no"):
                            break
                        else:
                            print("provide proper answers i.e. (yes/no) : ",end="")
                    if inp=="yes":
                        all_sym.append(sym)
                        dise=possible_diseases(all_sym)
                        if len(dise)==1:
                            stop=True 
                            break
                    else:
                        continue
    return knn.predict(OHV(all_sym,all_symp_col)),all_sym

In [None]:
def chat_sp():
    a=True
    while a:
        name=getInfo()
        result,sym=main_sp(name)
        if result == None :
            ans3=input("can you specify more what you feel or tap q to stop the conversation")
            if ans3=="q":
                a=False
            else:
                continue

        else:
            print("you may have "+result[0])
            print(description_list[result[0]])
            an=input("how many day do you feel those symptoms ?")
            if calc_condition(sym,int(an))==1:
                print("you should take the consultation from doctor")
            else : 
                print('Take following precautions : ')
                for e in precautionDictionary[result[0]]:
                    print(e)
            print("do you need another medical consultation (yes or no)? ")
            ans=input()
            if ans!="yes":
                a=False
                print("!!!!! thanks for using ower application !!!!!! ")


In [None]:
chat_sp()