- Bayesian network model to predict the probability of children playing outdoors based on various weather conditions such as outlook, temperature, humidity, and windiness.
- Using the pgmpy library in Python, the notebook performs inference tasks to compute the probabilities of playing outdoors under different weather conditions and presents the results in a clear and concise tabular format.

**Read in data**

In [20]:
import pandas as pd

# Define the data
data = {
    'Outlook': ['Sunny', 'Sunny', 'Overcast', 'Rainy', 'Rainy', 'Rainy', 'Overcast', 'Sunny', 'Sunny', 'Rainy', 'Sunny', 'Overcast', 'Overcast', 'Rainy'],
    'Temperature': ['Hot', 'Hot', 'Hot', 'Mild', 'Cool', 'Cool', 'Cool', 'Mild', 'Cool', 'Mild', 'Mild', 'Mild', 'Hot', 'Mild'],
    'Humidity': ['High', 'High', 'High', 'High', 'Normal', 'Normal', 'Normal', 'High', 'Normal', 'Normal', 'Normal', 'High', 'Normal', 'High'],
    'Windy': [False, True, False, False, False, True, True, False, False, False, True, True, False, True],
    'Play': ['No', 'No', 'Yes', 'Yes', 'Yes', 'No', 'Yes', 'No', 'Yes', 'Yes', 'Yes', 'Yes', 'Yes', 'No']
}

# Create a DataFrame
df = pd.DataFrame(data)

# Print the DataFrame to check if it's read correctly
print(df)

     Outlook Temperature Humidity  Windy Play
0      Sunny         Hot     High  False   No
1      Sunny         Hot     High   True   No
2   Overcast         Hot     High  False  Yes
3      Rainy        Mild     High  False  Yes
4      Rainy        Cool   Normal  False  Yes
5      Rainy        Cool   Normal   True   No
6   Overcast        Cool   Normal   True  Yes
7      Sunny        Mild     High  False   No
8      Sunny        Cool   Normal  False  Yes
9      Rainy        Mild   Normal  False  Yes
10     Sunny        Mild   Normal   True  Yes
11  Overcast        Mild     High   True  Yes
12  Overcast         Hot   Normal  False  Yes
13     Rainy        Mild     High   True   No


**Define the structure of the Bayesian network based on the variables in dataset**

In [21]:
from pgmpy.models import BayesianNetwork

# Define the structure of the Bayesian network
model = BayesianNetwork([('Outlook', 'Play'),
                         ('Temperature', 'Play'),
                         ('Humidity', 'Play'),
                         ('Windy', 'Play')])

# Print the nodes of the Bayesian network
print("Nodes of the Bayesian network:", model.nodes())

Nodes of the Bayesian network: ['Outlook', 'Play', 'Temperature', 'Humidity', 'Windy']


Estimate the parameters from dataset. Add CPDs to the nodes based on dataset:

In [22]:
from pgmpy.estimators import MaximumLikelihoodEstimator

# Estimate CPDs using Maximum Likelihood Estimation
estimator = MaximumLikelihoodEstimator(model, df)

# Add CPDs to the model
model.fit(df)

model.get_cpds()

[<TabularCPD representing P(Outlook:3) at 0x21e0682eaf0>,
 <TabularCPD representing P(Play:2 | Humidity:2, Outlook:3, Temperature:3, Windy:2) at 0x21e7cd87eb0>,
 <TabularCPD representing P(Temperature:3) at 0x21e065a25e0>,
 <TabularCPD representing P(Humidity:2) at 0x21e04a86400>,
 <TabularCPD representing P(Windy:2) at 0x21e04a86eb0>]

**Actual Prediction Task**

In [23]:
from pgmpy.inference import VariableElimination

# Create an inference object
inference = VariableElimination(model)

# Define the evidence for different weather conditions
evidences = {
    'Sunny': {'Outlook': 'Sunny'},
    'Overcast': {'Outlook': 'Overcast'},
    'Rainy': {'Outlook': 'Rainy'}
}

# Initialize a dictionary to store the results
results = {}

# Perform inference for each weather condition
for condition, evidence in evidences.items():
    play_probability = inference.query(variables=['Play'], evidence=evidence)
    results[condition] = play_probability.values[1]

# Print the results in a tabular format
print("Weather Condition   Probability of Playing")
print("-----------------------------------------")
for condition, probability in results.items():
    print(f"{condition:<20} {probability:.2f}")


Weather Condition   Probability of Playing
-----------------------------------------
Sunny                0.45
Overcast             0.66
Rainy                0.59


In [24]:
# Define the evidence for different temperature conditions
temperature_evidences = {
    'Hot': {'Temperature': 'Hot'},
    'Mild': {'Temperature': 'Mild'},
    'Cool': {'Temperature': 'Cool'}
}

# Initialize a dictionary to store the results
temperature_results = {}

# Perform inference for each temperature condition
for condition, evidence in temperature_evidences.items():
    play_probability_temp = inference.query(variables=['Play'], evidence=evidence)
    temperature_results[condition] = play_probability_temp.values[1]

# Print the results in a tabular format
print("Temperature Condition   Probability of Playing")
print("----------------------------------------------")
for condition, probability in temperature_results.items():
    print(f"{condition:<23} {probability:.2f}")

Temperature Condition   Probability of Playing
----------------------------------------------
Hot                     0.49
Mild                    0.58
Cool                    0.59


In [25]:
# Define the evidence for different humidity conditions
humidity_evidences = {
    'High': {'Humidity': 'High'},
    'Normal': {'Humidity': 'Normal'}
}

# Initialize a dictionary to store the results
humidity_results = {}

# Perform inference for each humidity condition
for condition, evidence in humidity_evidences.items():
    play_probability_humidity = inference.query(variables=['Play'], evidence=evidence)
    humidity_results[condition] = play_probability_humidity.values[1]

# Print the results in a tabular format
print("Humidity Condition   Probability of Playing")
print("-------------------------------------------")
for condition, probability in humidity_results.items():
    print(f"{condition:<20} {probability:.2f}")

Humidity Condition   Probability of Playing
-------------------------------------------
High                 0.47
Normal               0.65


In [26]:
# Define the evidence for different windy conditions
windy_evidences = {
    'Windy': {'Windy': True},
    'Not Windy': {'Windy': False}
}

# Initialize a dictionary to store the results
windy_results = {}

# Perform inference for each windy condition
for condition, evidence in windy_evidences.items():
    play_probability_windy = inference.query(variables=['Play'], evidence=evidence)
    windy_results[condition] = play_probability_windy.values[1]

# Print the results in a tabular format
print("Windy Condition   Probability of Playing")
print("---------------------------------------")
for condition, probability in windy_results.items():
    print(f"{condition:<16} {probability:.2f}")

Windy Condition   Probability of Playing
---------------------------------------
Windy            0.50
Not Windy        0.60
