In [151]:
import itertools
from functools import reduce
from operator import mul
import matplotlib.pyplot as plt
import networkx as nx
from daft import PGM
from pgmpy.models import BayesianModel, BayesianNetwork
from pgmpy.estimators import MaximumLikelihoodEstimator, ParameterEstimator
from pgmpy.factors.discrete import TabularCPD, JointProbabilityDistribution, DiscreteFactor
from pgmpy.inference import VariableElimination
import pandas as pd
import numpy as np
from pgmpy.independencies import Independencies

**Problem 5**
We are given the following joint distribution over the random variables A, B, C, and D. 

| A | B | C | D | P(A,B,C,D) |
|---|---|---|---|------------|
| T | T | T | T | 0.0448     |
| T | T | T | F | 0.0252     |
| T | T | F | T | 0.0112     |
| T | T | F | F | 0.0588     |
| T | F | T | T | 0.0144     |
| T | F | T | F | 0.0144     |
| T | F | F | T | 0.096      |
| F | F | F | F | 0.216      |
| F | T | T | T | 0.1024     |
| F | T | T | F | 0.0576     |
| F | T | F | T | 0.0256     |
| F | T | F | F | 0.1344     |
| F | F | T | T | 0.1152     |
| F | F | T | F | 0.1152     |
| F | F | F | T | 0.0768     |
| F | F | F | F | 0.1728     |

 1. Are A and B independent? 
 2. Are A and C independent?
 3. Are A and C independent given B?
 4. Are A and D independent?
 5. Are A and D independent given C?


**Using JointProbabilityDistribution in pgmpy library**

In [152]:
# Define the variables and their states
variables = ['A', 'B', 'C', 'D']
cardinality = [2, 2, 2, 2]  # Two states for each variable: True and False

probabilities = [
    0.0448, 0.0252, 0.0112, 0.0588,
    0.0144, 0.0144, 0.0096, 0.0216,
    0.1024, 0.0576, 0.0256, 0.1344,
    0.1152, 0.1152, 0.0768, 0.1728
]

jpd_table = JointProbabilityDistribution(variables, cardinality, np.array(probabilities))

# Check for independence between variables
independence_results = {
    "A and B independent": jpd_table.check_independence(['A'], ['B']),
    "A and C independent": jpd_table.check_independence(['A'], ['C']),
    "A and C independent given B": jpd_table.check_independence(['A'], ['C'], ('B',), condition_random_variable=True),
    "A and D independent": jpd_table.check_independence(['A'], ['D']),
    "A and D independent given C": jpd_table.check_independence(['A'], ['D'], ('C',), condition_random_variable=True)
}

for question, result in independence_results.items():
    print(f"{question}: {'Yes' if result else 'No'}")

A and B independent: No
A and C independent: No
A and C independent given B: Yes
A and D independent: Yes
A and D independent given C: No


**Using Pandas and TabularCPD in pgmpy library**

In [153]:
import pandas as pd

# Define your JPD data
jpd_data = [
    ('A(0)', 'B(0)', 'C(0)', 'D(0)', 0.0448),
    ('A(0)', 'B(0)', 'C(0)', 'D(1)', 0.0252),
    ('A(0)', 'B(0)', 'C(1)', 'D(0)', 0.0112),
    ('A(0)', 'B(0)', 'C(1)', 'D(1)', 0.0588),
    ('A(0)', 'B(1)', 'C(0)', 'D(0)', 0.0144),
    ('A(0)', 'B(1)', 'C(0)', 'D(1)', 0.0144),
    ('A(0)', 'B(1)', 'C(1)', 'D(0)', 0.0096),
    ('A(0)', 'B(1)', 'C(1)', 'D(1)', 0.0216),
    ('A(1)', 'B(0)', 'C(0)', 'D(0)', 0.1024),
    ('A(1)', 'B(0)', 'C(0)', 'D(1)', 0.0576),
    ('A(1)', 'B(0)', 'C(1)', 'D(0)', 0.0256),
    ('A(1)', 'B(0)', 'C(1)', 'D(1)', 0.1344),
    ('A(1)', 'B(1)', 'C(0)', 'D(0)', 0.1152),
    ('A(1)', 'B(1)', 'C(0)', 'D(1)', 0.1152),
    ('A(1)', 'B(1)', 'C(1)', 'D(0)', 0.0768),
    ('A(1)', 'B(1)', 'C(1)', 'D(1)', 0.1728)
]

# Create a DataFrame from the JPD data
columns = ['A', 'B', 'C', 'D', 'P(A,B,C,D)']
df = pd.DataFrame(jpd_data, columns=columns)
print(df)

       A     B     C     D  P(A,B,C,D)
0   A(0)  B(0)  C(0)  D(0)      0.0448
1   A(0)  B(0)  C(0)  D(1)      0.0252
2   A(0)  B(0)  C(1)  D(0)      0.0112
3   A(0)  B(0)  C(1)  D(1)      0.0588
4   A(0)  B(1)  C(0)  D(0)      0.0144
5   A(0)  B(1)  C(0)  D(1)      0.0144
6   A(0)  B(1)  C(1)  D(0)      0.0096
7   A(0)  B(1)  C(1)  D(1)      0.0216
8   A(1)  B(0)  C(0)  D(0)      0.1024
9   A(1)  B(0)  C(0)  D(1)      0.0576
10  A(1)  B(0)  C(1)  D(0)      0.0256
11  A(1)  B(0)  C(1)  D(1)      0.1344
12  A(1)  B(1)  C(0)  D(0)      0.1152
13  A(1)  B(1)  C(0)  D(1)      0.1152
14  A(1)  B(1)  C(1)  D(0)      0.0768
15  A(1)  B(1)  C(1)  D(1)      0.1728


In [182]:
model = BayesianNetwork([('A', 'P(A,B,C,D)'), ('B', 'P(A,B,C,D)'), ('C', 'P(A,B,C,D)'), ('D','P(A,B,C,D)')])

# Initialize dictionaries to hold sums for each variable level
sums_A = {'A(0)': 0, 'A(1)': 0}
sums_B = {'B(0)': 0, 'B(1)': 0}
sums_C = {'C(0)': 0, 'C(1)': 0}
sums_D = {'D(0)': 0, 'D(1)': 0}

# Iterate over rows in the DataFrame and sum up probabilities for A, B, C, and D
for _, row in df.iterrows():
    a_val = row['A']
    b_val = row['B']
    c_val = row['C']
    d_val = row['D']
    probability = row['P(A,B,C,D)']

    # Sum probabilities for A
    sums_A[a_val] += probability

    # Sum probabilities for B
    sums_B[b_val] += probability

    # Sum probabilities for C
    sums_C[c_val] += probability

    # Sum probabilities for D
    sums_D[d_val] += probability

# Print the sums
print("Sum of probabilities for A:")
for key, value in sums_A.items():
    print(f"{key}: {value}")

print("\nSum of probabilities for B:")
for key, value in sums_B.items():
    print(f"{key}: {value}")

print("\nSum of probabilities for C:")
for key, value in sums_C.items():
    print(f"{key}: {value}")

print("\nSum of probabilities for D:")
for key, value in sums_D.items():
    print(f"{key}: {value}")


# Define the Conditional Probability Distributions (CPDs) with computed values
cpd_A = TabularCPD(variable='A', variable_card=2, values=[[sums_A['A(0)']], [sums_A['A(1)']]])
cpd_B = TabularCPD(variable='B', variable_card=2, values=[[sums_B['B(0)']], [sums_B['B(1)']]])
cpd_C = TabularCPD(variable='C', variable_card=2, values=[[sums_C['C(0)']], [sums_C['C(1)']]])
cpd_D = TabularCPD(variable='D', variable_card=2, values=[[sums_D['D(0)']], [sums_D['D(1)']]])
cpd_P = TabularCPD(variable='P(A,B,C,D)', variable_card=2,
                   values=[[0.9552, 0.9748, 0.9888, 0.9412,
                            0.9856, 0.9856, 0.9904, 0.9784,
                            0.8976, 0.9424, 0.9744, 0.8656,
                            0.8848, 0.8848, 0.9232, 0.8272],
                           [0.0448, 0.0252, 0.0112, 0.0588,
                            0.0144, 0.0144, 0.0096, 0.0216,
                            0.1024, 0.0576, 0.0256, 0.1344,
                            0.1152, 0.1152, 0.0768, 0.1728]
                           ],
                   evidence=['A', 'B', 'C', 'D'],
                   evidence_card=[2, 2, 2, 2])

model.add_cpds(cpd_P,cpd_A,cpd_B,cpd_C,cpd_D)

Sum of probabilities for A:
A(0): 0.2
A(1): 0.8

Sum of probabilities for B:
B(0): 0.45999999999999996
B(1): 0.54

Sum of probabilities for C:
C(0): 0.48919999999999997
C(1): 0.5107999999999999

Sum of probabilities for D:
D(0): 0.4
D(1): 0.5999999999999999


In [183]:
# check_model checks for the network structure and CPDs and verifies that the CPDs are correctly
# defined and sum to 1.
model.check_model()

True

In [184]:
infer = VariableElimination(model)

**Are A and B independent?**
A and B are (unconditionally) independent if and only if
Is P(A) × P(B) = P(A and B)?
Is P(A|B) = P(A)?
Is P(B|A) = P(B)?

In [190]:
# Perform inference to get the marginal distribution of A
# A_dist = infer.query(['A'])
#rint(cpd_B.state_names)
# Perform inference to get the distribution of A given B=1
AgivenB_dist = infer.query(['A'], evidence={'B': 1})


print("\nConditional distribution of A given B=1:")
print(AgivenB_dist)


Conditional distribution of A given B=1:
+------+----------+
| A    |   phi(A) |
| A(0) |   0.2000 |
+------+----------+
| A(1) |   0.8000 |
+------+----------+


**Are A and C independent?**

In [156]:
AC_dist = infer.query(['A', 'C'])

filtered_data_AC = df[(df['A'] == 'A(1)') & (df['C'] == 'C(1)')]

# Calculate the sum over column 'P(A,B,C,D)' for the filtered rows
sum_p_AC = filtered_data_AC['P(A,B,C,D)'].sum()

# Calculate the product of probabilities for the filtered rows
product_p_AC = filtered_data_AC['P(A,B,C,D)'].prod()

# Check if the sum and product match
if sum_p_AC == product_p_AC:
    print("A and C are independent.")
else:
    print("A and C are dependent.")

A and C are dependent.


**Are A and C independent given B?**

In [157]:
# Calculate P(B=false)
p_b_false = infer.query(['B']).get_value(B=0)
print("P(B=false)=", p_b_false)

# Calculate P(A=false, B=false)
p_a_false_b_false = df[(df['A'] == 'A(0)') & (df['B'] == 'B(0)')]['P(A,B,C,D)'].sum()
print("P(A=true, B=true)=", p_a_false_b_false)
print("P(A|B=false)=", p_a_false_b_false / p_b_false)
print()

# Calculate P(C=false, B=false)
p_c_false_b_false = df[(df['C'] == 'C(0)') & (df['B'] == 'B(0)')]['P(A,B,C,D)'].sum()
print("P(C=false, B=false)=", p_c_false_b_false)
print("P(C|B=false)=", p_c_false_b_false / p_b_false)

# Check independence
independence_product = (p_c_false_b_false / p_b_false) * (p_a_false_b_false / p_b_false)
print("P(A|B=false) * P(C|B=false)=", independence_product)
print()

# Calculate P(A=false, C=false, B=false) 
p_a_false_c_false_b_false = df[(df['A'] == 'A(0)') & (df['C'] == 'C(0)') & (df['B'] == 'B(0)')]['P(A,B,C,D)'].sum()
p_b_false = df[df['B'] == 'B(0)']['P(A,B,C,D)'].sum()

# Calculate P(A=false, C=false | B=false)
p_a_false_c_false_given_b_false = p_a_false_c_false_b_false / p_b_false
print("P(A=false, C=false | B=false)=", p_a_false_c_false_given_b_false)

# Check for independence
if np.isclose(p_a_false_c_false_given_b_false, independence_product):
    print("A and C are independent given B.")
else:
    print("A and C are dependent given B.")


P(B=false)= 0.45999999999999996
P(A=true, B=true)= 0.14
P(A|B=false)= 0.3043478260869566

P(C=false, B=false)= 0.22999999999999998
P(C|B=false)= 0.5
P(A|B=false) * P(C|B=false)= 0.1521739130434783

P(A=false, C=false | B=false)= 0.15217391304347827
A and C are independent given B.
