# Multivariate Probabilty

## Joint probability

### Dice example (2 random variables)

|  | 1 | 2  | 3 | 4  | 5 | 6 |
| :---: | :---: | :---: | :---: | :---: | :---: | :---: |
| $\mathrm{A}$  | 0  | 1  | 0  | 1  | 0  | 1 |
| $\mathrm{B}$  | 0  | 1  | 1  | 0  | 1  | 0 |

In [1]:
import numpy as np
from __future__ import division

In [2]:
def isPrime(n):
    if n == 1: return False
    for i in range(2, n//2 + 1):
        if n%i == 0:
            return False
    return True

In [3]:
f_A = lambda x: 1 if x%2==0 else 0
f_B = lambda x: 1 if isPrime(x) else 0

In [4]:
sides = 6

In [5]:
N = 100000

In [6]:
A_samples = []
B_samples = []
JAB_samples = []

for i in range(N):
    x = np.random.randint(1, sides+1) # Stochastic
    a = f_A(x) # Deterministic
    b = f_B(x) # Deterministic
    jAB = (a, b)
    
    A_samples.append(a)
    B_samples.append(b)
    JAB_samples.append(jAB)

In [7]:
from collections import Counter

def get_P(samples):
    counter = dict(Counter(samples))
    P = dict.fromkeys(counter.keys())
    for key, value in counter.iteritems():
        P[key] = value/len(samples)
    return P

In [8]:
P_A = get_P(A_samples)
P_B = get_P(B_samples)
P_JAB = get_P(JAB_samples)

In [9]:
A = list(set(P_A))
B = list(set(P_B))

In [10]:
P_JAB

{(0, 0): 0.16881, (0, 1): 0.33029, (1, 0): 0.33374, (1, 1): 0.16716}

In [11]:
P_JAB[(0, 0)]/P_A[0]

0.3382288118613504

## Marginal probability

In [12]:
sum([P_JAB[(0, b)] for b in B])

0.4991

### Dice example (3 random variables)

|  | 1 | 2  | 3 | 4  | 5 | 6 |
| :---: | :---: | :---: | :---: | :---: | :---: | :---: |
| $\mathrm{A}$  | 0  | 1  | 0  | 1  | 0  | 1 |
| $\mathrm{B}$  | 0  | 1  | 1  | 0  | 1  | 0 |
| $\mathrm{C}$  | 0  | 0  | 1  | 0  | 0  | 1 |

In [13]:
f_C = lambda x: 1 if x%3==0 else 0

In [14]:
C_samples = []
JABC_samples = []

for i in range(N):
    x = np.random.randint(1, sides+1)
    a = f_A(x)
    b = f_B(x)
    c = f_C(x)
    jABC = (a, b, c)
    
    C_samples.append(c)
    JABC_samples.append(jABC)

In [15]:
P_C = get_P(C_samples)
P_JABC = get_P(JABC_samples)

In [16]:
C = list(set(P_C))

In [17]:
import itertools

In [18]:
for a, b, c in itertools.product(A, B, C):
    if (a, b, c) not in P_JABC.keys():
        P_JABC[(a, b, c)] = 0.

In [19]:
P_JABC

{(0, 0, 0): 0.16526,
 (0, 0, 1): 0.0,
 (0, 1, 0): 0.16741,
 (0, 1, 1): 0.16556,
 (1, 0, 0): 0.16883,
 (1, 0, 1): 0.16702,
 (1, 1, 0): 0.16592,
 (1, 1, 1): 0.0}

In [20]:
sum([P_JABC[(0, b, c)] for b, c in itertools.product(B, C)])

0.49823000000000006

In [21]:
sum([P_JABC[(a, b, 1)] for b, c in itertools.product(A, B)])

0.33404

## Chain rule of conditional probability

In [22]:
JAC_samples = []
JBC_samples = []

for i in range(N):
    x = np.random.randint(1, sides+1)
    a, b, c = f_A(x), f_B(x), f_C(x)
    
    jAC = (a, c)
    jBC = (b, c)
    
    JAC_samples.append(jAC)
    JBC_samples.append(jBC)
    
P_JAC = get_P(JAC_samples)
P_JBC = get_P(JBC_samples)

In [23]:
# Probability of getting a multiple of 3 knowing that is not prime but multiple of 2
P_JABC[(1, 0, 1)]/P_JAB[(1, 0)]

0.5004494516689639

## Independence

In [24]:
A_samples = []
B_samples = []
JAB_samples = []

for i in range(N):
    x = np.random.randint(1, sides+1)
    a = f_A(x)
    
    y = np.random.randint(1, sides+1)
    b = f_B(y)
    jAB = (a, b)
    
    A_samples.append(a)
    B_samples.append(b)
    JAB_samples.append(jAB)

In [25]:
P_A = get_P(A_samples)
P_B = get_P(B_samples)
P_JAB = get_P(JAB_samples)

In [26]:
P_JAB[(0, 0)]

0.25172

In [27]:
P_A[0] * P_B[0]

0.2516821663