In [139]:
import numpy as np
import pandas as pd
import math

#Exercise A

dataset = np.array([
    [0, 1],
    [0, 0],
    [0, 0],
    [0, 0],
    [1, 1],
    [1, 1],
    [1, 0],
    [1, 0],
    [1, 0],
    [1, 0]
])


#P(Y|Xo), P(Y|X1)
#P(X|Yo). P(X|Y1)

def conditional_probability(data):
    prob_y_giv_x = np.zeros((2,2))
    for x in [0,1]:
        for y in [0, 1]:
            prob_y_giv_x[y, x] = np.mean(data[data[:, 1] == x][:, 0] == y)
    prob_x_giv_y = np.zeros((2,2))
    for y in [0,1]:
        for x in [0,1]:
            prob_x_giv_y[x, y] = np.mean(data[data[:, 0] == y][:, 1] == x)
    
    return prob_x_giv_y, prob_y_giv_x


p_xy, p_yx = conditional_probability(dataset)

def compute_matrix(y_giv_x, x_giv_y):
    A_matrix = np.zeros((2, 2))
    for y in [0, 1]:
        for y_prime in [0, 1]:
            A_matrix[y, y_prime] = np.sum(p_yx[y, :] * p_xy[:, y_prime])
    
    return A_matrix

compute_matrix(p_xy, p_yx)

array([[0.4047619, 0.3968254],
       [0.5952381, 0.6031746]])

In [131]:
import random
#Q17 --> Causal Graph
# x-->z1<--y    |     x-->z3-->y

#P(z3 = 0) = 0.6, P(z3 = 1) = 0.4
total_samples = int(10e5)
prob_1_z3 = 0.6
z3 = np.asarray([0 if random.random() > prob_1_z3 else 1 for _ in range(total_samples)])

#P(x == 0 | z3 == 0) = 0.8, P(x==0 | z3 == 1) = 0.7   ##do random values for y
#P(x == 1 | z3 == 0) = 0.2, P(x==1 | z3 == 1) = 0.3
#P(y == 0 | z3 == 0) = 0.9, P(y == 1 | z3 == 1) = 0.5
#P(y == 0 | z3 == 0) = 0.1, P(y == 1 | z3 == 1) = 0.5 

prob_x_giv_z3 = [[0.8, 0.2], [0.7, 0.3]]
prob_y_giv_z3 = [[0.9, 0.1], [0.5, 0.5]]

def get_synthetic_values(prob_list, cond_prob, multiple = False):
    x = []
    for _ in range(total_samples):
        z3_instance = random.choices([0, 1], [cond_prob, 1-cond_prob])[0]
        # print(z3_instance)
        if z3_instance == 0:
            updater = random.choices([0, 1], [prob_list[0][0], prob_list[0][1]])[0]
            x.append(updater)
        elif z3_instance == 1:
            updater = random.choices([0, 1], [prob_list[1][0], prob_list[1][1]])[0]
            x.append(updater)
    return x

x, y = np.asarray(get_synthetic_values(prob_x_giv_z3, prob_1_z3)), np.asarray(get_synthetic_values(prob_y_giv_z3, prob_1_z3))

#P(z1 == 0 | x == 1, y == 1) = 0.1, P(z1 == 1 | x == 1, y == 1) = 0.9
#P(z1 == 0 | x == 0, y == 0) = 0.35, P(z1 == 1 | x == 0, y == 0) = 0.65
#P(z1 == 0 | x == 0, y == 1) = 0.25, P(z1 == 1 | x == 0, y == 1) = 0.75
#P(z1 == 0 | x == 1, y == 0) = 0.85, P(z1 == 1| x == 0, y == 1) = 0.15

def multi_get_synthetic_values(prob_list, cond_prob):
    z1 = []
    for _ in range(total_samples):
        z3_instance = random.choices([0, 1], [cond_prob, 1-cond_prob])[0]
        if z3_instance == 0:
            x_cond_prob = prob_list[0]
            y_cond_prob = prob_list[2]
            x = random.choices([0, 1], x_cond_prob)[0]
            y = random.choices([0, 1], y_cond_prob)[0]
        elif z3_instance == 1:
            x_cond_prob = prob_list[1]
            y_cond_prob = prob_list[3]
            x = random.choices([0, 1], x_cond_prob)[0]
            y = random.choices([0, 1], y_cond_prob)[0]
        
        if x == 0 and y == 0:
            z1_instance = random.choices([0, 1], [0.35, 0.65])[0]
        elif x == 0 and y == 1:
            z1_instance = random.choices([0, 1], [0.25, 0.75])[0]
        elif x == 1 and y == 0:
            z1_instance = random.choices([0, 1], [0.85, 0.15])[0]
        elif x == 1 and y == 1:
            z1_instance = random.choices([0, 1], [0.1, 0.9])[0]
        z1.append(z1_instance)
    
    return z1

prob_lists = prob_x_giv_z3 + prob_y_giv_z3
z1 = np.asarray(multi_get_synthetic_values(prob_lists, prob_1_z3))

In [132]:
df = pd.DataFrame({'z3':z3, 'x':x, 'y':y, 'z1':z1})
df.head()

Unnamed: 0,z3,x,y,z1
0,1,1,0,1
1,0,0,0,0
2,0,0,0,0
3,0,1,1,1
4,0,1,1,1


In [138]:
x, z1, y = 1, 1, 1 #change for differenting conditions

s_num = 0
s_den = 0

for z3_instance in [0, 1]:
    z1_giv_x_z3 = len([i for idx, i in enumerate(df['z1']) if i == z1 and df['x'][idx]==x and df['z3'][idx]==z3_instance])
    x_z3 = len([i for idx, i in enumerate(df['x']) if i == x and df['z3'][idx] == z3_instance])
    x_z3_z1 = len([i for idx, i in enumerate(df['x']) if i == x and df['z3'][idx] == z3_instance and df['z1'][idx] == z1])
    y_giv_x_z3_z1 = len([i for idx, i in enumerate(df['y']) if i == y and df['x'][idx]==x and df['z3'][idx]==z3_instance and df['z1'][idx]==z1])
    prob_z3 = len([i for i in df['z3'] if i == z3_instance]) / total_samples

    s_num += (z1_giv_x_z3 / x_z3) * (y_giv_x_z3_z1 / x_z3_z1) * prob_z3
    s_den += (y_giv_x_z3_z1 / x_z3_z1) * prob_z3

print('EQ1', s_num/s_den)

s_num = len([i for idx, i in enumerate(df['z1']) if i == z1 and df['x'][idx] == x and df['y'][idx] == y]) / len([i for idx, i in enumerate(df['x']) if i == x and df['y'][idx] == y])
s_num *= len([i for i in df['y'] if i == y])

print('EQ2', s_num/(s_den*total_samples))

EQ1 0.6027593534711564
EQ2 0.60741619756423
