In [1]:
import warnings
import logging
import pandas as pd 
import bnlearn as bn
from pgmpy.global_vars import logger
from utils import * # Helper functions

In [2]:
warnings.filterwarnings('ignore')
logger.setLevel(logging.ERROR)
logging.getLogger('pandas').setLevel(logging.ERROR)

# Dementia Queries
---

In [3]:
dementia_df = pd.read_csv('data/dementia_data_processed.csv')

In [4]:
dementia_df = discretise(dementia_df.copy(), method='freedman-diaconis')

## Training
---

### Structure Learning
---

In [5]:
dementia_structure = bn.structure_learning.fit(dementia_df, methodtype='tan', class_node='Group')

[bnlearn] >Computing best DAG using [tan]


Building tree:   0%|          | 0/66.0 [00:00<?, ?it/s]

Building tree:   0%|          | 0/66.0 [00:00<?, ?it/s]

[bnlearn] >Compute structure scores for model comparison (higher is better).


### Parameter Learning
---

In [6]:
dementia_model = bn.parameter_learning.fit(dementia_structure, dementia_df, methodtype='bayes')

[bnlearn] >Parameter learning> Computing parameters using [bayes]
[bnlearn] >Converting [<class 'pgmpy.base.DAG.DAG'>] to BayesianNetwork model.
[bnlearn] >Converting adjmat to BayesianNetwork.
[bnlearn] >CPD of eTIV:
+----------+----------------------+----------------------+
| Group    | Group(0.0)           | Group(1.0)           |
+----------+----------------------+----------------------+
| eTIV(0)  | 0.048650568181818184 | 0.05007102272727273  |
+----------+----------------------+----------------------+
| eTIV(1)  | 0.05433238636363636  | 0.05149147727272727  |
+----------+----------------------+----------------------+
| eTIV(2)  | 0.05575284090909091  | 0.057173295454545456 |
+----------+----------------------+----------------------+
| eTIV(3)  | 0.06711647727272728  | 0.06995738636363637  |
+----------+----------------------+----------------------+
| eTIV(4)  | 0.09410511363636363  | 0.07137784090909091  |
+----------+----------------------+----------------------+
| eTIV(5)  | 0.

## Inference
---

In [7]:
query_1 = {
    'Visit': 2, 
    'Age': 88,
    'EDUC': 14, 
    'SES': 2, 
    'MMSE': 30, 
    'CDR': 0, 
    'eTIV': 2004, 
    'nWBV': 0.681, 
    'ASF': 0.876
}

query_2 = {
    'Visit': 3, 
    'Age': 80,
    'EDUC': 12, 
    'MMSE': 22, 
    'CDR': 0.5, 
    'eTIV': 1698, 
    'nWBV': 0.701, 
    'ASF': 1.034
}

In [8]:
query_1 = discretise_query(query_1, dementia_df, method='freedman-diaconis')
query_2 = discretise_query(query_2, dementia_df, method='freedman-diaconis')

In [9]:
query_1 = pd.DataFrame([query_1])
query_2 = pd.DataFrame([query_2])

### Query 1
---

In [10]:
prediction = bn.predict(dementia_model, query_1, 'Group')

[bnlearn]> Remaining columns for inference: 9


100%|██████████| 1/1 [00:00<00:00, 448.73it/s]


In [11]:
prob_dementia = prediction.loc[0, 'p']
prob_no_dementia = 1 - prob_dementia

In [12]:
print(f"Probability of Dementia: {prob_dementia:.6f} (or {prob_dementia * 100:.2f}%)")
print(f"Probability of no Dementia: {prob_no_dementia:.6f} (or {prob_no_dementia * 100:.2f}%)")

Probability of Dementia: 0.817611 (or 81.76%)
Probability of no Dementia: 0.182389 (or 18.24%)


### Query 2
---

In [13]:
prediction = bn.predict(dementia_model, query_2, 'Group')

[bnlearn]> Remaining columns for inference: 8


100%|██████████| 1/1 [00:00<00:00, 427.99it/s]


In [14]:
prob_dementia = prediction.loc[0, 'p']
prob_no_dementia = 1 - prob_dementia

In [15]:
print(f"Probability of Dementia: {prob_dementia:.6f} (or {prob_dementia * 100:.2f}%)")
print(f"Probability of no Dementia: {prob_no_dementia:.6f} (or {prob_no_dementia * 100:.2f}%)")

Probability of Dementia: 0.797139 (or 79.71%)
Probability of no Dementia: 0.202861 (or 20.29%)


# Parkinson's Disease Queries
---

In [16]:
parkinsons_df_int = pd.read_csv('data/parkinsons_data_processed.csv')

In [17]:
parkinsons_df_int = discretise(parkinsons_df_int.copy(), method='sturges')

In [18]:
parkinsons_df = parkinsons_df_int.astype(object)

## Training
---

### Structure Learning
---

In [19]:
parkinsons_structure = bn.structure_learning.fit(parkinsons_df, methodtype='tan', class_node='Status')

[bnlearn] >Computing best DAG using [tan]


Building tree:   0%|          | 0/253.0 [00:00<?, ?it/s]

Building tree:   0%|          | 0/253.0 [00:00<?, ?it/s]

[bnlearn] >Compute structure scores for model comparison (higher is better).


### Parameter Learning
---

In [20]:
parkinsons_model = bn.parameter_learning.fit(parkinsons_structure, parkinsons_df, methodtype='bayes')

[bnlearn] >Parameter learning> Computing parameters using [bayes]
[bnlearn] >Converting [<class 'pgmpy.base.DAG.DAG'>] to BayesianNetwork model.
[bnlearn] >Converting adjmat to BayesianNetwork.
[bnlearn] >CPD of Shimmer:APQ3:
+-----------------+---------------------+---------------------+
| Status          | Status(0)           | Status(1)           |
+-----------------+---------------------+---------------------+
| Shimmer:APQ3(0) | 0.2156963764382621  | 0.15232697922033317 |
+-----------------+---------------------+---------------------+
| Shimmer:APQ3(1) | 0.1755109050317706  | 0.13532543362527907 |
+-----------------+---------------------+---------------------+
| Shimmer:APQ3(2) | 0.09204877211059594 | 0.12141507813841662 |
+-----------------+---------------------+---------------------+
| Shimmer:APQ3(3) | 0.08741198694830846 | 0.1136871028679375  |
+-----------------+---------------------+---------------------+
| Shimmer:APQ3(4) | 0.08586639189421262 | 0.1028679374892667  |
+-----

## Inference
---

In [21]:
query_3 = {
    'MDVP:Fo(Hz)': 197.076,
    'MDVP:Fhi(Hz)': 206.896,
    'MDVP:Flo(Hz)': 192.055,
    'MDVP:Jitter(%)': 0.00289,
    'MDVP:Jitter(Abs)': 0.00001,
    'MDVP:RAP': 0.00166,
    'MDVP:PPQ': 0.00168,
    'Jitter:DDP': 0.00498,
    'MDVP:Shimmer': 0.01098,
    'MDVP:Shimmer(dB)': 0.097,
    'Shimmer:APQ3': 0.00563,
    'Shimmer:APQ5': 0.0068,
    'MDVP:APQ': 0.00802,
    'Shimmer:DDA': 0.01689,
    'NHR': 0.00339,
    'HNR': 26.775
}

query_4 = {
    'MDVP:Fo(Hz)': 162.568,
    'MDVP:Fhi(Hz)': 198.346,
    'MDVP:Flo(Hz)': 77.63,
    'MDVP:Jitter(%)': 0.00502,
    'MDVP:Jitter(Abs)': 0.00003,
    'MDVP:RAP': 0.0028,
    'MDVP:PPQ': 0.00253,
    'Jitter:DDP': 0.00841,
    'MDVP:Shimmer': 0.01791,
    'MDVP:Shimmer(dB)': 0.168,
    'Shimmer:APQ3': 0.00793,
    'Shimmer:APQ5': 0.01057,
    'MDVP:APQ': 0.01799,
    'Shimmer:DDA': 0.0238,
    'NHR': 0.0117,
    'HNR': 25.678
}

In [22]:
query_3 = discretise_query(query_3, parkinsons_df_int, method='sturges')
query_4 = discretise_query(query_4, parkinsons_df_int, method='sturges')

In [23]:
query_3 = pd.DataFrame([query_3], dtype=object)
query_4 = pd.DataFrame([query_4], dtype=object)

In [24]:
print(query_3)
print(query_4)

  MDVP:Fo(Hz) MDVP:Fhi(Hz) MDVP:Flo(Hz) MDVP:Jitter(%) MDVP:Jitter(Abs)  \
0           4            4            4              4                4   

  MDVP:RAP MDVP:PPQ Jitter:DDP MDVP:Shimmer MDVP:Shimmer(dB) Shimmer:APQ3  \
0        4        4          4            4                4            4   

  Shimmer:APQ5 MDVP:APQ Shimmer:DDA NHR HNR  
0            4        4           4   4   4  
  MDVP:Fo(Hz) MDVP:Fhi(Hz) MDVP:Flo(Hz) MDVP:Jitter(%) MDVP:Jitter(Abs)  \
0           4            4            4              4                4   

  MDVP:RAP MDVP:PPQ Jitter:DDP MDVP:Shimmer MDVP:Shimmer(dB) Shimmer:APQ3  \
0        4        4          4            4                4            4   

  Shimmer:APQ5 MDVP:APQ Shimmer:DDA NHR HNR  
0            4        4           4   4   4  


In [25]:
print(parkinsons_model)

{'model': <pgmpy.models.BayesianNetwork.BayesianNetwork object at 0x7f1ebf8dcc20>, 'adjmat': target            Shimmer:APQ3  Shimmer:DDA  MDVP:Shimmer    HNR  MDVP:APQ  \
source                                                                       
Shimmer:APQ3             False         True          True   True     False   
Shimmer:DDA              False        False         False  False      True   
MDVP:Shimmer             False        False         False  False     False   
HNR                      False        False         False  False     False   
MDVP:APQ                 False        False         False  False     False   
MDVP:Shimmer(dB)         False        False         False  False     False   
Shimmer:APQ5             False        False         False  False     False   
MDVP:Fo(Hz)              False        False         False  False     False   
MDVP:RAP                 False        False         False  False     False   
NHR                      False        False      

In [26]:
for node in parkinsons_model['model'].nodes():
    cpd = parkinsons_model['model'].get_cpds(node)
    if hasattr(cpd, 'state_names'):
        print(f"Possible states for {node}: {cpd.state_names}")

Possible states for Shimmer:APQ3: {'Shimmer:APQ3': [0, 1, 2, 3, 4, 5, 6, 7, 9], 'Status': [0, 1]}
Possible states for Shimmer:DDA: {'Shimmer:DDA': [0, 1, 2, 3, 4, 5, 6, 7, 9], 'Shimmer:APQ3': [0, 1, 2, 3, 4, 5, 6, 7, 9], 'Status': [0, 1]}
Possible states for MDVP:Shimmer: {'MDVP:Shimmer': [0, 1, 2, 3, 4, 5, 6, 7, 9], 'Shimmer:APQ3': [0, 1, 2, 3, 4, 5, 6, 7, 9], 'Status': [0, 1]}
Possible states for HNR: {'HNR': [0, 1, 2, 3, 4, 5, 6, 7, 8, 9], 'Shimmer:APQ3': [0, 1, 2, 3, 4, 5, 6, 7, 9], 'Status': [0, 1]}
Possible states for MDVP:APQ: {'MDVP:APQ': [0, 1, 2, 3, 4, 5, 6, 9], 'Shimmer:DDA': [0, 1, 2, 3, 4, 5, 6, 7, 9], 'Status': [0, 1]}
Possible states for MDVP:Shimmer(dB): {'MDVP:Shimmer(dB)': [0, 1, 2, 3, 4, 5, 6, 7, 9], 'MDVP:Shimmer': [0, 1, 2, 3, 4, 5, 6, 7, 9], 'Status': [0, 1]}
Possible states for Shimmer:APQ5: {'Shimmer:APQ5': [0, 1, 2, 3, 4, 5, 6, 9], 'MDVP:Shimmer': [0, 1, 2, 3, 4, 5, 6, 7, 9], 'Status': [0, 1]}
Possible states for MDVP:Fo(Hz): {'MDVP:Fo(Hz)': [0, 1, 2, 3, 4, 5, 

### Query 3
---

In [27]:
prediction = bn.predict(parkinsons_model, query_3, 'Status')

[bnlearn]> Remaining columns for inference: 16


  0%|          | 0/1 [00:00<?, ?it/s]


KeyError: 4

In [None]:
prob_parkinsons = prediction.loc[0, 'p']
prob_no_parkinsons = 1 - prob_dementia

In [None]:
print(f"Probability of Parkinson's Disease: {prob_parkinsons:.6f} (or {prob_parkinsons * 100:.2f}%)")
print(f"Probability of no Parkinson's Disease: {prob_no_parkinsons:.6f} (or {prob_no_parkinsons * 100:.2f}%)")

### Query 4
---

In [None]:
prediction = bn.predict(parkinsons_model, query_4, 'Status')

[bnlearn]> Remaining columns for inference: 16


  0%|          | 0/1 [00:00<?, ?it/s]


KeyError: 4

In [None]:
prob_parkinsons = prediction.loc[0, 'p']
prob_no_parkinsons = 1 - prob_dementia

In [None]:
print(f"Probability of Parkinson's Disease: {prob_parkinsons:.6f} (or {prob_parkinsons * 100:.2f}%)")
print(f"Probability of no Parkinson's Disease: {prob_no_parkinsons:.6f} (or {prob_no_parkinsons * 100:.2f}%)")