In [19]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sea
import os
from tqdm import tqdm
import re
import nltk
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
from collections import defaultdict
import pandas as pd
import re

In [37]:
nltk.download('punkt')
nltk.download('stopwords')
nltk.download('averaged_perceptron_tagger')

[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\akhil\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\akhil\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     C:\Users\akhil\AppData\Roaming\nltk_data...
[nltk_data]   Unzipping taggers\averaged_perceptron_tagger.zip.


True

In [46]:
data = 'C:/Users/akhil/OneDrive/Documents/School/Summer 2024/MSDS 458/Final Project/Symptom2Disease.csv'
df = pd.read_csv(data)

In [47]:
stop_words = set(stopwords.words('english'))

In [48]:
def extract_symptoms_pos(text):
    text = text.lower()
    text = re.sub(r'[^a-zA-Z\s]', '', text)
    words = word_tokenize(text)
    filtered_words = [word for word in words if word not in stop_words]
    pos_tags = nltk.pos_tag(filtered_words)
    symptoms = [word for word, pos in pos_tags if pos in ('NN', 'NNS', 'JJ', 'JJR', 'JJS')]
    return symptoms

In [49]:
symptom_disease_map = defaultdict(set)

In [51]:
for index, row in df.iterrows():
    symptoms = extract_symptoms_pos(row['text'])
    for symptom in symptoms:
        symptom_disease_map[symptom].add(row['label'])

In [52]:
symptom_disease_map = {k: list(v) for k, v in symptom_disease_map.items()}

In [53]:
for symptom, diseases in list(symptom_disease_map.items())[:50]:
    print(f"Symptom: {symptom}, Diseases: {diseases}")

Symptom: skin, Diseases: ['Impetigo', 'Jaundice', 'Psoriasis', 'Dengue', 'Varicose Veins', 'Acne', 'allergy', 'drug reaction', 'Chicken pox', 'Fungal infection', 'diabetes']
Symptom: rash, Diseases: ['Impetigo', 'Jaundice', 'Psoriasis', 'Dengue', 'Varicose Veins', 'Acne', 'Chicken pox', 'Fungal infection']
Symptom: arms, Diseases: ['Impetigo', 'Psoriasis', 'Dengue', 'Chicken pox', 'Fungal infection', 'Cervical spondylosis']
Symptom: torso, Diseases: ['allergy', 'Psoriasis']
Symptom: past, Diseases: ['Impetigo', 'peptic ulcer disease', 'Bronchial Asthma', 'Psoriasis', 'Varicose Veins', 'Chicken pox']
Symptom: weeks, Diseases: ['Varicose Veins', 'Psoriasis']
Symptom: itchy, Diseases: ['Impetigo', 'Jaundice', 'Psoriasis', 'Dengue', 'Varicose Veins', 'Acne', 'allergy', 'Dimorphic Hemorrhoids', 'drug reaction', 'Chicken pox', 'Fungal infection', 'Common Cold']
Symptom: dry, Diseases: ['Impetigo', 'Bronchial Asthma', 'Psoriasis', 'Varicose Veins', 'allergy', 'drug reaction', 'Cervical spondy

In [75]:
def forward_chaining(extracted_symptoms):
    possible_diagnoses = defaultdict(float)
    
    for symptom in extracted_symptoms:
        if symptom in symptom_disease_map:
            diseases = symptom_disease_map[symptom]
            for disease in diseases:
                possible_diagnoses[disease] += 1
    
    total_symptoms = len(extracted_symptoms)
    diagnosis_confidence = {disease: (count / total_symptoms) for disease, count in possible_diagnoses.items()}
    return diagnosis_confidence

In [76]:
def backward_chaining(goal_disease, extracted_symptoms):
    relevant_symptoms = set()
    
    for symptom in extracted_symptoms:
        if symptom in symptom_disease_map and goal_disease in symptom_disease_map[symptom]:
            relevant_symptoms.add(symptom)
    
    confidence = len(relevant_symptoms) / len(extracted_symptoms)
    return confidence if relevant_symptoms else 0.0

In [83]:
def diagnose(sentence, method="forward", goal_disease=None):
    extracted_symptoms = extract_symptoms_pos(sentence)
    print(f"Extracted Symptoms: {extracted_symptoms}")
    
    if method == "forward":
        diagnosis_confidence = forward_chaining(extracted_symptoms)
    elif method == "backward":
        if not goal_disease:
            goal_disease = input("Enter the disease you want to verify (goal): ")
        confidence = backward_chaining(goal_disease, extracted_symptoms)
        diagnosis_confidence = {goal_disease: confidence}
    else:
        print("Invalid method. Choose 'forward' or 'backward'.")
        return
    
    if diagnosis_confidence:
        sorted_diagnoses = sorted(diagnosis_confidence.items(), key=lambda x: x[1], reverse=True)
        print("\nPossible diagnoses with confidence levels:")
        for diagnosis, confidence in sorted_diagnoses:
            print(f"{diagnosis}: {confidence:.2f} confidence")
    else:
        print("No matching diagnosis found.")

In [90]:
userInput = input('Please input your symptoms!')
diagnose(userInput)

Extracted Symptoms: ['sick', 'heart', 'pain']

Possible diagnoses with confidence levels:
Pneumonia: 1.00 confidence
drug reaction: 1.00 confidence
Impetigo: 0.67 confidence
Jaundice: 0.67 confidence
Malaria: 0.67 confidence
urinary tract infection: 0.67 confidence
Hypertension: 0.67 confidence
Typhoid: 0.67 confidence
Dengue: 0.67 confidence
allergy: 0.67 confidence
gastroesophageal reflux disease: 0.67 confidence
Chicken pox: 0.67 confidence
Common Cold: 0.67 confidence
Bronchial Asthma: 0.33 confidence
diabetes: 0.33 confidence
peptic ulcer disease: 0.33 confidence
Psoriasis: 0.33 confidence
Varicose Veins: 0.33 confidence
Dimorphic Hemorrhoids: 0.33 confidence
Cervical spondylosis: 0.33 confidence


In [91]:
diagnose(userInput, method="backward")

Extracted Symptoms: ['sick', 'heart', 'pain']

Possible diagnoses with confidence levels:
diabetes: 0.33 confidence
