## Distribution generation
---

for the DAG: Z->A->M->Y with hidden common variables U_AY, U_MY

$P_{ZAMY}$ as a dictionary

In [1]:
import itertools
import random

def generate_probabilities():
    # Generate base probabilities for P(Z|A) and P(M|A)
    base_probs_z_given_a = {(z, a): random.random() for z in [0, 1] for a in [0, 1]}
    base_probs_m_given_a = {(m, a): random.random() for m in [0, 1] for a in [0, 1]}
    
    # Normalize these probabilities to ensure they sum to 1 for each value of A
    for a in [0, 1]:
        total_z = sum(base_probs_z_given_a[(z, a)] for z in [0, 1])
        total_m = sum(base_probs_m_given_a[(m, a)] for m in [0, 1])
        for z in [0, 1]:
            base_probs_z_given_a[(z, a)] /= total_z
        for m in [0, 1]:
            base_probs_m_given_a[(m, a)] /= total_m
    
    # Generate probabilities for Y independently (simplification)
    probs_y = {y: random.random() for y in [0, 1]}
    total_y = sum(probs_y.values())
    for y in [0, 1]:
        probs_y[y] /= total_y
    
    # Combine to generate P(Z, A, M, Y) respecting Z ⊥ M | A
    distribution = {}
    for z, a, m, y in itertools.product([0, 1], repeat=4):
        p_z_given_a = base_probs_z_given_a[(z, a)]
        p_m_given_a = base_probs_m_given_a[(m, a)]
        p_y = probs_y[y]
        # Assuming P(A) is uniform for simplification
        p_a = 0.5
        distribution[(z, a, m, y)] = round(p_z_given_a * p_m_given_a * p_y * p_a, 3)
    
    return distribution

# Generate the distribution
# P_obs_dict = generate_probabilities()
# P_obs_dict

list of pre-processed distributions w/ valid conditional independences:

In [2]:
# (Z, A, M, Y) -> P(Z, A, M, Y)
P_obs1 = {(0, 0, 0, 0): 0.336,
 (0, 0, 1, 0): 0.084,
 (0, 1, 0, 0): 0.056,
 (0, 1, 1, 0): 0.224,
 (1, 0, 0, 0): 0.096,
 (1, 0, 1, 0): 0.024,
 (1, 1, 0, 0): 0.036,
 (1, 1, 1, 0): 0.144,
 (0, 0, 0, 1): 0.144,
 (0, 0, 1, 1): 0.036,
 (0, 1, 0, 1): 0.024,
 (0, 1, 1, 1): 0.096,
 (1, 0, 0, 1): 0.224,
 (1, 0, 1, 1): 0.056,
 (1, 1, 0, 1): 0.084,
 (1, 1, 1, 1): 0.336}
P_obs2 = {(0, 0, 0, 0): 0.015,
(0, 0, 1, 0): 0.087,
(0, 1, 0, 0): 0.176,
(0, 1, 1, 0): 0.2,
(1, 0, 0, 0): 0.038,
(1, 0, 1, 0): 0.221,
(1, 1, 0, 0): 0.124,
(1, 1, 1, 0): 0.141,
(0, 0, 0, 1): 0.016,
(0, 0, 1, 1): 0.094,
(0, 1, 0, 1): 0.19,
(0, 1, 1, 1): 0.216,
(1, 0, 0, 1): 0.035,
(1, 0, 1, 1): 0.205,
(1, 1, 0, 1): 0.115,
(1, 1, 1, 1): 0.131}
P_obs3 = {(0, 0, 0, 0): 0.097,
 (0, 0, 0, 1): 0.08,
 (0, 0, 1, 0): 0.052,
 (0, 0, 1, 1): 0.043,
 (0, 1, 0, 0): 0.02,
 (0, 1, 0, 1): 0.016,
 (0, 1, 1, 0): 0.078,
 (0, 1, 1, 1): 0.065,
 (1, 0, 0, 0): 0.081,
 (1, 0, 0, 1): 0.067,
 (1, 0, 1, 0): 0.044,
 (1, 0, 1, 1): 0.036,
 (1, 1, 0, 0): 0.035,
 (1, 1, 0, 1): 0.029,
 (1, 1, 1, 0): 0.141,
 (1, 1, 1, 1): 0.116}
P_obs4 = {(0, 0, 0, 0): 0.097,
 (0, 0, 0, 1): 0.08,
 (0, 0, 1, 0): 0.052,
 (0, 0, 1, 1): 0.043,
 (0, 1, 0, 0): 0.02,
 (0, 1, 0, 1): 0.016,
 (0, 1, 1, 0): 0.078,
 (0, 1, 1, 1): 0.065,
 (1, 0, 0, 0): 0.081,
 (1, 0, 0, 1): 0.067,
 (1, 0, 1, 0): 0.044,
 (1, 0, 1, 1): 0.036,
 (1, 1, 0, 0): 0.035,
 (1, 1, 0, 1): 0.029,
 (1, 1, 1, 0): 0.141,
 (1, 1, 1, 1): 0.116}


 # false:
# invalid:
P_obs5 =  {(0, 0, 0, 0): 0.062,
 (0, 0, 0, 1): 0.067,
 (0, 0, 1, 0): 0.018,
 (0, 0, 1, 1): 0.124,
 (0, 1, 0, 0): 0.036,
 (0, 1, 0, 1): 0.019,
 (0, 1, 1, 0): 0.012,
 (0, 1, 1, 1): 0.099,
 (1, 0, 0, 0): 0.051,
 (1, 0, 0, 1): 0.011,
 (1, 0, 1, 0): 0.1,
 (1, 0, 1, 1): 0.086,
 (1, 1, 0, 0): 0.075,
 (1, 1, 0, 1): 0.117,
 (1, 1, 1, 0): 0.103,
 (1, 1, 1, 1): 0.02}

In [3]:
num = 0 # chose which to use
P_obs_lst = [P_obs1, P_obs2, P_obs3, P_obs4, P_obs5]
P_obs_dict = P_obs_lst[num]

testing conditinoal independences:

In [4]:
def test_conditional_independence(distribution):
    """Test for conditional independence Z ⊥ M | A."""
    # Calculate conditional probabilities
    p_z_given_a = {(z, a): 0 for z in [0, 1] for a in [0, 1]}
    p_m_given_a = {(m, a): 0 for m in [0, 1] for a in [0, 1]}
    p_z_m_given_a = {(z, m, a): 0 for z in [0, 1] for m in [0, 1] for a in [0, 1]}
    
    # Sum probabilities for calculating conditional probabilities
    for (z, a, m, y), prob in distribution.items():
        p_z_given_a[(z, a)] += prob
        p_m_given_a[(m, a)] += prob
        p_z_m_given_a[(z, m, a)] += prob
    
    # Normalize by P(A) to get conditional probabilities
    for key in p_z_given_a.keys():
        _, a = key
        p_a = sum(distribution[(z, a, m, y)] for z in [0, 1] for m in [0, 1] for y in [0, 1])
        p_z_given_a[key] /= p_a
        p_m_given_a[key] /= p_a
        for m in [0, 1]:
            p_z_m_given_a[(key[0], m, a)] /= p_a
    
    # Test for conditional independence
    for z in [0, 1]:
        for m in [0, 1]:
            for a in [0, 1]:
                if round(p_z_m_given_a[(z, m, a)], 3) != round(p_z_given_a[(z, a)] * p_m_given_a[(m, a)], 3):
                    return False
    return True

# Example usage with a given distribution dictionary

# Verify the distribution
# print(f"Respects conditional independence Z ⊥ M | A: {test_conditional_independence(P_obs_dict)}")

In [5]:
print(f"Dist number {num} respects Z ⊥ M | A: {test_conditional_independence(P_obs_dict)}")

Dist number 0 respects Z ⊥ M | A: True


getting $P_{AMY|Z}$ from $P_{ZAMY}$:

In [6]:
def calculate_p_amy_given_z(joint_distribution):
    # Calculate P(Z) by summing over A, M, and Y for each Z
    p_z = {z: 0 for z in [0, 1]}
    for (z, a, m, y), prob in joint_distribution.items():
        p_z[z] += prob

    # Calculate P(A, M, Y | Z)
    p_amy_given_z = {}
    for (z, a, m, y), joint_prob in joint_distribution.items():
        conditional_prob = joint_prob / p_z[z]
        p_amy_given_z[(a, m, y, z)] = round(conditional_prob, 3)  # Round to 3 decimal places as specified

    return p_amy_given_z

print("P_ZAMY:")
print(P_obs_dict)

print("P_AMY|Z:")
print(calculate_p_amy_given_z(P_obs_dict))

P_ZAMY:
{(0, 0, 0, 0): 0.336, (0, 0, 1, 0): 0.084, (0, 1, 0, 0): 0.056, (0, 1, 1, 0): 0.224, (1, 0, 0, 0): 0.096, (1, 0, 1, 0): 0.024, (1, 1, 0, 0): 0.036, (1, 1, 1, 0): 0.144, (0, 0, 0, 1): 0.144, (0, 0, 1, 1): 0.036, (0, 1, 0, 1): 0.024, (0, 1, 1, 1): 0.096, (1, 0, 0, 1): 0.224, (1, 0, 1, 1): 0.056, (1, 1, 0, 1): 0.084, (1, 1, 1, 1): 0.336}
P_AMY|Z:
{(0, 0, 0, 0): 0.336, (0, 1, 0, 0): 0.084, (1, 0, 0, 0): 0.056, (1, 1, 0, 0): 0.224, (0, 0, 0, 1): 0.096, (0, 1, 0, 1): 0.024, (1, 0, 0, 1): 0.036, (1, 1, 0, 1): 0.144, (0, 0, 1, 0): 0.144, (0, 1, 1, 0): 0.036, (1, 0, 1, 0): 0.024, (1, 1, 1, 0): 0.096, (0, 0, 1, 1): 0.224, (0, 1, 1, 1): 0.056, (1, 0, 1, 1): 0.084, (1, 1, 1, 1): 0.336}


In [7]:
import itertools
import random

def generate_probabilities_with_independence():
    # Generate base probabilities for P(A)
    p_a = {a: random.random() for a in [0, 1]}
    total_p_a = sum(p_a.values())
    p_a = {a: p_a[a] / total_p_a for a in [0, 1]}  # Normalize P(A)
    
    # Generate conditional probabilities P(Z|A), P(M|A), and P(Y|A)
    p_z_given_a = {(z, a): random.random() for z in [0, 1] for a in [0, 1]}
    p_m_given_a = {(m, a): random.random() for m in [0, 1] for a in [0, 1]}
    p_y_given_a = {(y, a): random.random() for y in [0, 1] for a in [0, 1]}
    
    # Normalize conditional probabilities
    for a in [0, 1]:
        total_z = sum(p_z_given_a[(z, a)] for z in [0, 1])
        total_m = sum(p_m_given_a[(m, a)] for m in [0, 1])
        total_y = sum(p_y_given_a[(y, a)] for y in [0, 1])
        
        for z in [0, 1]:
            p_z_given_a[(z, a)] /= total_z
        for m in [0, 1]:
            p_m_given_a[(m, a)] /= total_m
        for y in [0, 1]:
            p_y_given_a[(y, a)] /= total_y
    
    # Combine to generate P(Z, A, M, Y) respecting the conditional independences
    distribution = {}
    for z, a, m, y in itertools.product([0, 1], repeat=4):
        p_z_a = p_z_given_a[(z, a)]
        p_m_a = p_m_given_a[(m, a)]
        p_y_a = p_y_given_a[(y, a)]
        p_a_val = p_a[a]
        # Assuming independence, combine probabilities
        distribution[(z, a, m, y)] = round(p_z_a * p_m_a * p_y_a * p_a_val, 3)
    
    return distribution

# Generate the distribution
distribution = generate_probabilities_with_independence()

for key, value in distribution.items():
    print(f"{key}: {value}")

(0, 0, 0, 0): 0.086
(0, 0, 0, 1): 0.109
(0, 0, 1, 0): 0.101
(0, 0, 1, 1): 0.129
(0, 1, 0, 0): 0.01
(0, 1, 0, 1): 0.005
(0, 1, 1, 0): 0.004
(0, 1, 1, 1): 0.002
(1, 0, 0, 0): 0.109
(1, 0, 0, 1): 0.14
(1, 0, 1, 0): 0.129
(1, 0, 1, 1): 0.164
(1, 1, 0, 0): 0.006
(1, 1, 0, 1): 0.003
(1, 1, 1, 0): 0.002
(1, 1, 1, 1): 0.001


In [8]:
def calculate_conditional_probabilities(distribution):
    # Calculate marginal probabilities P(A), P(Z|A), P(M|A), and P(Y|A)
    p_a = {a: sum(prob for (z, a_, m, y), prob in distribution.items() if a == a_) for a in [0, 1]}
    p_z_given_a = {(z, a): sum(prob for (z_, a_, m, y), prob in distribution.items() if z == z_ and a == a_) / p_a[a] for z in [0, 1] for a in [0, 1]}
    p_m_given_a = {(m, a): sum(prob for (z, a_, m_, y), prob in distribution.items() if m == m_ and a == a_) / p_a[a] for m in [0, 1] for a in [0, 1]}
    p_y_given_a = {(y, a): sum(prob for (z, a_, m, y_), prob in distribution.items() if y == y_ and a == a_) / p_a[a] for y in [0, 1] for a in [0, 1]}

    # Check conditional independences
    z_y_independence = True
    z_m_independence = True

    for a in [0, 1]:
        for z in [0, 1]:
            for y in [0, 1]:
                # P(Z, Y | A) = P(Z | A) * P(Y | A)
                p_z_y_given_a = sum(prob for (z_, a_, m, y_), prob in distribution.items() if z == z_ and y == y_ and a == a_) / p_a[a]
                if round(p_z_y_given_a, 3) != round(p_z_given_a[(z, a)] * p_y_given_a[(y, a)], 3):
                    z_y_independence = False

            for m in [0, 1]:
                # P(Z, M | A) = P(Z | A) * P(M | A)
                p_z_m_given_a = sum(prob for (z_, a_, m_, y), prob in distribution.items() if z == z_ and m == m_ and a == a_) / p_a[a]
                if round(p_z_m_given_a, 3) != round(p_z_given_a[(z, a)] * p_m_given_a[(m, a)], 3):
                    z_m_independence = False

    return z_y_independence, z_m_independence

# Assuming 'distribution' is your P(Z, A, M, Y) distribution
# distribution = generate_probabilities_with_independence()  # Use the distribution generation function from before
z_y_independence, z_m_independence = calculate_conditional_probabilities(distribution)

print(f"Z ⊥ Y | A holds: {z_y_independence}")
print(f"Z ⊥ M | A holds: {z_m_independence}")


Z ⊥ Y | A holds: True
Z ⊥ M | A holds: False


In [12]:
# from pgmpy.models import BayesianNetwork
# from pgmpy.factors.discrete import TabularCPD
# from pgmpy.sampling import BayesianModelSampling
# import numpy as np

# def normalize_cpt(values):
#     """Normalize the CPT values to ensure they sum to 1."""
#     return values / values.sum(axis=0, keepdims=True)

# # Define the structure of the Bayesian Network
# model = BayesianNetwork([('A', 'Z'), ('A', 'M'), ('A', 'Y')])

# # Define the CPTs with random but normalized probabilities
# cpd_a = TabularCPD(variable='A', variable_card=2, values=[[0.5], [0.5]])
# cpd_z = TabularCPD(variable='Z', variable_card=2, 
#                    values=normalize_cpt(np.random.rand(2,2)), # Normalized probabilities
#                    evidence=['A'], 
#                    evidence_card=[2])
# cpd_m = TabularCPD(variable='M', variable_card=2, 
#                    values=normalize_cpt(np.random.rand(2,2)), # Normalized probabilities
#                    evidence=['A'], 
#                    evidence_card=[2])
# cpd_y = TabularCPD(variable='Y', variable_card=2, 
#                    values=normalize_cpt(np.random.rand(2,2)), # Normalized probabilities
#                    evidence=['A'], 
#                    evidence_card=[2])

# # Add the CPTs to the model
# model.add_cpds(cpd_a, cpd_z, cpd_m, cpd_y)

# # Verify the model
# assert model.check_model()

# # Sampling from the model
# sampler = BayesianModelSampling(model)
# samples = sampler.forward_sample(size=1000)



# # Assuming `samples` is the DataFrame obtained from the sampler.forward_sample method
# # First, let's convert the samples DataFrame into the required dictionary format.
# def samples_to_prob_dict(samples):
#     # Count the occurrences of each unique combination
#     combination_counts = samples.value_counts(normalize=True)
    
#     # Convert the Series to a dictionary with tuple keys
#     prob_dict = {tuple(index): prob for index, prob in combination_counts.items()}
#     return prob_dict

# # Convert the samples to the probability dictionary
# P_ZAMY = samples_to_prob_dict(samples)
# P_ZAMY

  0%|          | 0/4 [00:00<?, ?it/s]

{(1, 0, 0, 1): 0.222,
 (0, 0, 1, 1): 0.145,
 (1, 1, 0, 1): 0.139,
 (0, 0, 0, 1): 0.138,
 (0, 1, 0, 1): 0.098,
 (0, 1, 1, 1): 0.081,
 (1, 0, 1, 1): 0.066,
 (1, 0, 0, 0): 0.03,
 (1, 1, 0, 0): 0.029,
 (1, 1, 1, 1): 0.028,
 (1, 0, 1, 0): 0.009,
 (0, 0, 1, 0): 0.004,
 (0, 1, 0, 0): 0.004,
 (0, 1, 1, 0): 0.003,
 (0, 0, 0, 0): 0.002,
 (1, 1, 1, 0): 0.002}