In [10]:
import pandas as pd
from collections import Counter

# conditional probability present-present
def count_pp(symptom):
  df = pd.read_csv('dataset_disease.csv')
  return list(df[symptom].value_counts())[1]

# conditional probability present-absent
def count_pa(symptom):
  df = pd.read_csv('dataset_disease_2.csv')
  return list(df[symptom].value_counts())[1]

# prior
def get_prior(symptom):
  prior = None
  if symptom == 'S01':
    prior = 0.9
  elif symptom == 'S02':
    prior = 0.8
  elif symptom == 'S03':
    prior = 0.8
  elif symptom == 'S04':
    prior = 0.7
  elif symptom == 'S05':
    prior = 0.6
  return prior

def get_disease(symptom):
  disease = None
  if symptom == 'S01':
    disease = [('S01', 'D01','D02')]
  elif symptom == 'S02':
    disease = [('S02', 'D01','D02')]
  elif symptom == 'S03':
    disease = [('S03','D02')]
  elif symptom == 'S04':
    disease = [('S04', 'D01')]
  elif symptom == 'S05':
    disease = [('S05', 'D01','D02')]
  return disease


def get_disease_name(code_disease):
  disease_name = None
  if code_disease == 'D01':
    disease_name = 'Osteoporosis'
  elif code_disease == 'D02':
    disease_name = 'Osteoarthritis'
  return disease_name


# main program
def main():
  print('''
  Code Symptoms:
  S01. Had a broken bone
  S02. Lack of exercise
  S03. Swelling in the joints
  S04. Pain in the joints
  S05. Stiffness in joints

  Select the symptoms.
  ''')
  input_symptom = input('''Enter the symptoms: ''')
  list_symptom = input_symptom.split(' ')
  list_symptom = [symptom.upper() for symptom in list_symptom]
  list_disease = []
  for symptom in list_symptom:
    list_disease.append(get_disease(symptom))

  list_disease = [x for xs in list_disease for x in xs]
  result = []
  for disease in list_disease:
    # conditional probability
    pp = count_pp(disease[0])/20
    pa = count_pa(disease[0])/20

    # Joint Probability Distribution
    jpd_pp = pp * get_prior(disease[0])
    jpd_pa = pa * (1-get_prior(disease[0]))

    # posterior
    res = jpd_pp/(jpd_pp+jpd_pa)
    data = {
        'disease': disease[1],
        'value': res
    }

    result.append(data)

  # Probabilistic Inference
  list_disease = [item[1] for item in list_disease]  
  count = dict(Counter(list_disease))
  unique = []
  spell = []
  for item in result:
    if item['disease'] not in unique:
      unique.append(item['disease'])
      spell.append(item)
    else:
      for index, value in enumerate(spell):
        if value['disease'] == item['disease']:
          spell[index]['value'] +=item['value']


  z = [(item['disease'], item['value']/count[item['disease']]*100) for item in spell]
  final = max(z, key=lambda x:x[1])

  print(f'''
  Possible disease: {get_disease_name(final[0])} with a percentage of: {str(final[1])[:5]}%
  ''')

# call main(0) function
if __name__ == '__main__':    
    main()


  Code Symptoms:
  S01. Had a broken bone
  S02. Lack of exercise
  S03. Swelling in the joints
  S04. Pain in the joints
  S05. Stiffness in joints

  Select the symptoms.
  
Enter the symptoms: s01

  Possible disease: Osteoporosis with a percentage of: 95.74%
  


In [9]:
df = pd.read_csv('dataset_disease.csv')
df

Unnamed: 0,DISEASE,S01,S02,S03,S04,S05
0,Osteoporosis,1,1,0,1,0
1,Osteoporosis,1,0,0,1,0
2,Osteoporosis,1,1,0,0,0
3,Osteoporosis,0,0,0,1,0
4,Osteoporosis,0,1,0,0,0
5,Osteoporosis,0,1,0,1,0
6,Osteoporosis,1,0,0,0,0
7,Osteoporosis,0,0,0,1,0
8,Osteoporosis,0,0,0,1,0
9,Osteoporosis,0,1,0,0,0
