In [48]:
import pandas as pd
import numpy as np

In [49]:
dataset=pd.read_csv('playTennis.csv',names=['outlook','temperature','humidity','wind','class'])
print(dataset)

     outlook temperature  humidity   wind class
0    outlook        temp  humidity  windy  play
1      sunny         hot      high  false    no
2      sunny         hot      high   true    no
3   overcast         hot      high  false   yes
4      rainy        mild      high  false   yes
5      rainy        cool    normal  false   yes
6      rainy        cool    normal   true    no
7   overcast        cool    normal   true   yes
8      sunny        mild      high  false    no
9      sunny        cool    normal  false   yes
10     rainy        mild    normal  false   yes
11     sunny        mild    normal   true   yes
12  overcast        mild      high   true   yes
13  overcast         hot    normal  false   yes
14     rainy        mild      high   true    no


In [50]:
def entropy(target_col):
    elements,counts=np.unique(target_col,return_counts=True)
    entropy=np.sum([(-counts[i]/np.sum(counts))*np.log2(counts[i]/np.sum(counts)) for i in range(len(elements))])
    return entropy

In [51]:
def InfoGain(data,split_attribute_name,target_name="class"):
    total_entropy=entropy(data[target_name])
    vals,counts=np.unique(data[split_attribute_name],return_counts=True)
    Weighted_Entropy=np.sum([(counts[i]/np.sum(counts))*entropy(data.where(data[split_attribute_name]==vals[i]).dropna()[target_name]) for i in range(len(vals))])
    Information_Gain=total_entropy-Weighted_Entropy
    return Information_Gain

In [52]:
def ID3(data,originaldata,features,target_attribute_name="class",parent_node_class=None):
    if len(np.unique(data[target_attribute_name]))<=1:
        return np.unique(data[target_attribute_name])[0]
    elif len(data)==0:
        return np.unique(originaldata[target_attribute_name])[np.argmax(np.unique(originaldata[target_attribute_name],return_counts=True)[1])]
    elif len(features)==0:
        return parent_node_class
    else:
        parent_node_class=np.unique(data[target_attribute_name])[np.argmax(np.unique(data[target_attribute_name],return_counts=True)[1])]
        item_values=[InfoGain(data,feature,target_attribute_name) for feature in features]
        best_feature_index=np.argmax(item_values)
        best_feature=features[best_feature_index]
        tree={best_feature:{}}
        features=[i for i in features if i!=best_feature]
        for value in np.unique(data[best_feature]):
            value=value
            sub_data=data.where(data[best_feature]==value).dropna()
            subtree=ID3(sub_data,dataset,features,target_attribute_name,parent_node_class)
            tree[best_feature][value]=subtree
        return (tree)
    

In [53]:
tree=ID3(dataset,dataset,dataset.columns[:-1])
print('\n Display Tree:\n',tree)


 Display Tree:
 {'outlook': {'outlook': 'play', 'overcast': 'yes', 'rainy': {'wind': {'false': 'yes', 'true': 'no'}}, 'sunny': {'humidity': {'high': 'no', 'normal': 'yes'}}}}


In [3]:
import numpy as np
import pandas as pd
from pgmpy.models import BayesianNetwork
from pgmpy.estimators import MaximumLikelihoodEstimator
from pgmpy.inference import VariableElimination
from sklearn.preprocessing import KBinsDiscretizer

# Load the dataset
names = ['age', 'sex', 'cp', 'trestbps', 'chol', 'fbs', 'restecg', 'thalach', 'exang', 'oldpeak', 'slope', 'ca', 'thal', 'heartdisease']
heartDisease = pd.read_csv('heart.csv', names = names)
heartDisease = heartDisease.replace('?', np.nan)

# Replace missing values marked as '?' with NaN and handle them
heartDisease = heartDisease.apply(pd.to_numeric, errors='coerce')
heartDisease.fillna(heartDisease.mean(), inplace=True)

# Discretize continuous variables
discretizer = KBinsDiscretizer(n_bins=3, encode='ordinal', strategy='uniform')
heartDisease['age'] = discretizer.fit_transform(heartDisease[['age']])
heartDisease['trestbps'] = discretizer.fit_transform(heartDisease[['trestbps']])
heartDisease['chol'] = discretizer.fit_transform(heartDisease[['chol']])
heartDisease['thalach'] = discretizer.fit_transform(heartDisease[['thalach']])


# Define the structure of the Bayesian Network
model = BayesianNetwork([
    ('age', 'trestbps'), 
    ('age', 'fbs'), 
    ('sex', 'trestbps'), 
    ('exang', 'trestbps'),
    ('trestbps', 'heartdisease'), 
    ('fbs', 'heartdisease'), 
    ('heartdisease', 'restecg'), 
    ('heartdisease', 'thalach'), 
    ('heartdisease', 'chol')
])

# Fit the model using Maximum Likelihood Estimator
model.fit(heartDisease, estimator=MaximumLikelihoodEstimator)

# Perform inference
infer = VariableElimination(model)

# Example: Diagnose heart disease for a patient with specific attributes
# Discretize the age value to match the model
age_value = 37
discretized_age = int(discretizer.transform([[age_value]])[0][0])

evidence = {'age': discretized_age}

try:
    q = infer.query(variables=['heartdisease'], evidence=evidence)

    # Extract the probability of having heart disease
    prob_heart_disease = q.values[1]  # Assuming 1 indicates presence of heart disease

    # Use if-else statements to make decisions based on the probability
    if prob_heart_disease > 0.5:
        print(f"High risk of heart disease with probability: {prob_heart_disease:.2f}")
    else:
        print(f"Low risk of heart disease with probability: {prob_heart_disease:.2f}")

    # Print the entire distribution for more information
    print(q)
except Exception as e:
    print(f"An error occurred: {e}")


Low risk of heart disease with probability: 0.00
+----------------------------------+---------------------+
| heartdisease                     |   phi(heartdisease) |
| heartdisease(0.0)                |              0.4414 |
+----------------------------------+---------------------+
| heartdisease(0.5445544554455446) |              0.0000 |
+----------------------------------+---------------------+
| heartdisease(1.0)                |              0.5586 |
+----------------------------------+---------------------+


