In [7]:
from pgmpy.models import BayesianNetwork
from pgmpy.factors.discrete import TabularCPD
from pgmpy.inference import VariableElimination
# Author: Henry Moore
# Date: July 7, 2024
# Assignment: Health-care assistance via probabilistic graphical modeling, Math 76.01

# Disclaimers:
# I was strugging with the pgmpy packaging given the size that the TabularCPDs would have to be /
# and the number of symptoms i'd have to input into evidence={}
# I am using "!" after names of diseases to differentiate from symptoms of the same name

#Initializes nodes/edges in network
file = open("/Users/henrymoore/Desktop/mathai 2/correlations.txt","r")
correlationdata = file.readlines()
file.close()
relationships = {}
correlationdata.remove(correlationdata[0])
for line in correlationdata:
    splitline = line.lower().strip('\n').split("\t")
    if splitline[1] not in relationships:
        relationships[splitline[1] + "!"] = {}
    relationships[splitline[1] + "!"][splitline[0]] = int(splitline[2])
    #model.add_edge(splitline[1] + "!", splitline[0])



#parse symptoms, count total
file = open("/Users/henrymoore/Desktop/mathai 2/symptoms.txt","r")
symptomsdata = file.readlines()
file.close()
symptomsdata.remove(symptomsdata[0])
symptoms = {}
symptomoccurences = 0
for line in symptomsdata:
    splitline = line.lower().strip('\n').split("\t")
    symptoms[splitline[0]] = int(splitline[1])
    symptomoccurences += int(splitline[1])

#parse diseases, counts total
file = open("/Users/henrymoore/Desktop/mathai 2/diseases.txt","r")
diseasesdata = file.readlines()
file.close()
diseasesdata.remove(diseasesdata[0])
diseases = {}
diseaseoccurences = 0
for line in diseasesdata:
    splitline = line.lower().strip('\n').split("\t")
    if splitline[0] + "!" in relationships:
        diseases[splitline[0] + "!"] = int(splitline[1])
    diseaseoccurences += int(splitline[1])

# bayes theorem for a disease given a symptom
def conditional_prob(disease, symptom):
    if symptom in relationships[disease]:
        return (relationships[disease][symptom]/diseases[disease])*(diseases[disease]/diseaseoccurences)/(symptoms[symptom]/symptomoccurences)
    else:
        return .0000001

# bayes theorem for a disease given the lack of a symptom
def not_conditional_prob(disease, symptom):
    if symptom in relationships[disease]:
        return (1-(relationships[disease][symptom]/diseases[disease]))*(diseases[disease]/diseaseoccurences)/(1-(symptoms[symptom]/symptomoccurences))
    else:
        return diseases[disease]/diseaseoccurences


# calculates most likely disease given list of symptoms
def most_likely_disease(user_symptoms):
    for symptom in user_symptoms:
        if symptom not in symptoms:
            return "unknown symptom"
    bestprob = 0
    bestdisease = ""
    for disease in diseases:
        tempprob = 1 
        for symptom in user_symptoms:
                tempprob *= conditional_prob(disease, symptom)
        for symptom in relationships[disease]:
            if symptom not in user_symptoms:
                tempprob *= not_conditional_prob(disease, symptom)
        if tempprob > bestprob:
            bestprob = tempprob
            bestdisease = disease
    return bestdisease, bestprob

def main_interface():
    print("Welcome to disease predictor!")
    print("Please enter your symptoms, separated by commas.")
    user_input = input("Symptoms: ")
    observed_symptoms = [symptom.strip().lower() for symptom in user_input.split(",")]
    print(observed_symptoms)
    print(most_likely_disease(observed_symptoms))
main_interface()

1e-07
1e-07
1
Welcome to disease predictor!
Please enter your symptoms, separated by commas.
['headache', 'diarrhea', 'fever']
('colitis, microscopic!', 1.2805802451716924e-18)
