In [87]:
#!pip install prettytable

In [88]:
import numpy as np
import matplotlib.pyplot as plt
import prettytable as pt
from mpl_toolkits.mplot3d import Axes3D
from IPython.display import set_matplotlib_formats
import tensorflow as tf
set_matplotlib_formats('retina')
%matplotlib inline

# Exercise 1

In [89]:
xs = np.array([0, 1])
ys = np.array([10, 20, 30])
f = np.array([[0.2, 0.15, 0.25], [0.1, 0.2, 0.1]])
f_cum = np.cumsum(f)

In [97]:
class discrete_bijoint:

    def __init__(self, f, xs, ys):
        '''initialization
        -----------------
        parameters:
        f: the bivariate joint probability matrix
        xs: values of x vector
        ys: values of y vector
        '''
        self.f, self.xs, self.ys = f, xs, ys

    def legitimatePD(self):
        '''provide a check in the class to make sure that it is a legitimate joint distribution.'''
        if np.cumsum(f)[-1]==1:
            print("it is a legitimate joint distribution.")

        else:
            print("it is not a legitimate joint distribution.")

    def joint_tb(self):
        '''print the joint distribution table'''
        xs = self.xs
        ys = self.ys
        f = self.f
        jtb = pt.PrettyTable()
        jtb.field_names = ['x_value/y_value', *ys, 'marginal sum for x']
        for i in range(len(xs)):
            jtb.add_row([xs[i], *f[i, :], np.sum(f[i, :])])
        jtb.add_row(['marginal_sum for y', *np.sum(f, 0), np.sum(f)])
        print("\nThe joint probability distribution for x and y\n", jtb)
        self.jtb = jtb

    def joint_draw(self, n):
        '''draw random numbers
        ----------------------
        parameters:
        n: number of random numbers to draw
        '''
        xs = self.xs
        ys = self.ys
        f_cum = np.cumsum(self.f)
        p = np.random.rand(n)
        self.p = p
        x = np.empty([2, p.shape[0]])
        lf = len(f_cum)
        lx = len(xs)-1
        ly = len(ys)-1
        for i in range(lf):
            x[0, p < f_cum[lf-1-i]] = xs[lx]
            x[1, p < f_cum[lf-1-i]] = ys[ly]
            if ly == 0:
                lx -= 1
                ly = len(ys)-1
            else:
                ly -= 1

        print(n, "x's and y's drawn from the joint distribution:\n", list(zip(x[0], x[1])))    

    def marginal_draw(self, n):
        '''draw random numbers from marginal distribution
        ----------------------
        parameters:
        n: number of random numbers to draw
        '''
        xs = self.xs
        ys = self.ys
        f = self.f

        p = np.random.rand(n)
        point = np.empty([2, p.shape[0]])

        mx = np.empty([1, len(xs)])
        my = np.empty([1, len(ys)])

        for i in range(max(len(xs), len(ys))):
            if i < len(xs):
                mx[0,i] = np.sum(f[i,:])

            if i < len(ys):
                my[0,i] = np.sum(f[:,i])

        fx_cum = np.cumsum(mx[0])
        fy_cum = np.cumsum(my[0])


        for i in range(len(xs)):
            point[0, p < fx_cum[len(xs)-i-1]] = xs[len(xs)-i-1]

        for i in range(len(ys)):
            point[1, p < fy_cum[len(ys)-i-1]] = ys[len(ys)-i-1]

        self.x = point
        print("\nrandom numbers from marginal distribution of x\n", point[0])
        print("\nrandom numbers from marginal distribution of y\n", point[1])

    def conditional_draw(self, n):
        '''draw random numbers from conditional distribution
        ----------------------
        parameters:
        n: number of random numbers to draw
        '''
        xs = self.xs
        ys = self.ys
        f = self.f
        tf = np.transpose(f)

        p = np.random.rand(n)

        mx = np.empty([1, len(xs)])
        my = np.empty([1, len(ys)])

        for i in range(max(len(xs), len(ys))):
            if i < len(xs):
                mx[0, i] = np.sum(f[i, :])

            if i < len(ys):
                my[0, i] = np.sum(f[:, i])

        for i in range(len(ys)):    #y different
            pointx = np.empty([1, p.shape[0]])   #generate n points
            for j in range(len(xs)):    #change the value of x
                pointx[0, p < (np.cumsum(f[:,i])[len(xs) - j - 1])/np.sum(f[:,i])   ] = xs[len(xs) - j - 1]
            print("The sample drawn from P(x | y=", ys[i] , ") is\n" , pointx)

        for i in range(len(xs)):    #y different
            pointy = np.empty([1, p.shape[0]])   #generate n points
            for j in range(len(ys)):    #change the value of x
                pointy[0, p < (np.cumsum(f[i,:])[len(ys) - j - 1])/np.sum(f[i,:])   ] = ys[len(ys) - j - 1]
            print("The sample drawn from P(y | x=", xs[i] , ") is\n" , pointy)

    def marginal_pop(self):
        '''print the population marginal distribution table'''
        xs = self.xs
        ys = self.ys
        f = self.f

        # print output
        xmtb = pt.PrettyTable()
        ymtb = pt.PrettyTable()
        xmtb.field_names = ['x_value', 'x_prob']
        ymtb.field_names = ['y_value', 'y_prob']
        for i in range(max(len(xs), len(ys))):
            if i < len(xs):
                xmtb.add_row([xs[i], np.sum(f[i,])])
            if i < len(ys):
                ymtb.add_row([ys[i], np.sum(f[:,i])])

        print("\nmarginal distribution for x\n", xmtb)
        print("\nmarginal distribution for y\n", ymtb)

    def conditional_pop(self):
        '''conditional distribution'''
        xs = self.xs
        ys = self.ys
        f = self.f
        tf = np.transpose(f)

        xcp = np.empty([len(ys), len(xs)])
        ycp = np.empty([len(xs), len(ys)])

        for i in range(len(ys)):
            for j in range(len(xs)):
                xcp[i,j] = f[j,i]/np.sum(tf[i,:])

        for i in range(len(xs)):
            for j in range(len(ys)):
                ycp[i,j] = f[i,j] / np.sum(tf[:, i])

        # print output
        xctb = pt.PrettyTable()
        yctb = pt.PrettyTable()
        xctb.field_names = ['x_value', *xs]
        yctb.field_names = ['y_value', *ys]
        for i in range(max(len(xs), len(ys))):
            if i < len(ys):
                xctb.add_row([    ys[i], *xcp[i]    ])
            if i < len(xs):
                yctb.add_row([    xs[i], *ycp[i]    ])
        print("\nconditional distribution for x\n", xctb)
        print("\nconditional distribution for y\n", yctb)

    def joint_draw(self, n):
        '''draw random numbers
        ----------------------
        parameters:
        n: number of random numbers to draw
        '''
        xs = self.xs
        ys = self.ys
        f_cum = np.cumsum(self.f)
        p = np.random.rand(n)
        self.p = p
        x = np.empty([2, p.shape[0]])
        lf = len(f_cum)
        lx = len(xs)-1
        ly = len(ys)-1
        for i in range(lf):
            x[0, p < f_cum[lf-1-i]] = xs[lx]
            x[1, p < f_cum[lf-1-i]] = ys[ly]
            if ly == 0:
                lx -= 1
                ly = len(ys)-1
            else:
                ly -= 1

        print(n, "x's and y's drawn from the joint distribution:\n", list(zip(x[0], x[1])))

    def marginal_draw(self, n):
        '''draw random numbers from marginal distribution
        ----------------------
        parameters:
        n: number of random numbers to draw
        '''
        xs = self.xs
        ys = self.ys
        f = self.f

        p = np.random.rand(n)
        point = np.empty([2, p.shape[0]])

        mx = np.empty([1, len(xs)])
        my = np.empty([1, len(ys)])

        for i in range(max(len(xs), len(ys))):
            if i < len(xs):
                mx[0,i] = np.sum(f[i,:])

            if i < len(ys):
                my[0,i] = np.sum(f[:,i])

        fx_cum = np.cumsum(mx[0])
        fy_cum = np.cumsum(my[0])


        for i in range(len(xs)):
            point[0, p < fx_cum[len(xs)-i-1]] = xs[len(xs)-i-1]

        for i in range(len(ys)):
            point[1, p < fy_cum[len(ys)-i-1]] = ys[len(ys)-i-1]

        self.x = point
        print("\nrandom numbers from marginal distribution of x\n", point[0])
        print("\nrandom numbers from marginal distribution of y\n", point[1])

    def conditional_draw(self, n):
        '''draw random numbers from conditional distribution
        ----------------------
        parameters:
        n: number of random numbers to draw
        '''
        xs = self.xs
        ys = self.ys
        f = self.f
        tf = np.transpose(f)

        p = np.random.rand(n)

        mx = np.empty([1, len(xs)])
        my = np.empty([1, len(ys)])

        for i in range(max(len(xs), len(ys))):
            if i < len(xs):
                mx[0, i] = np.sum(f[i, :])

            if i < len(ys):
                my[0, i] = np.sum(f[:, i])

        for i in range(len(ys)):    #y different
            pointx = np.empty([1, p.shape[0]])   #generate n points
            for j in range(len(xs)):    #change the value of x
                pointx[0, p < (np.cumsum(f[:,i])[len(xs) - j - 1])/np.sum(f[:,i])   ] = xs[len(xs) - j - 1]
            print("The sample drawn from P(x | y=", ys[i] , ") is\n" , pointx)

        for i in range(len(xs)):    #y different
            pointy = np.empty([1, p.shape[0]])   #generate n points
            for j in range(len(ys)):    #change the value of x
                pointy[0, p < (np.cumsum(f[i,:])[len(ys) - j - 1])/np.sum(f[i,:])   ] = ys[len(ys) - j - 1]
            print("The sample drawn from P(y | x=", xs[i] , ") is\n" , pointy)

In [98]:
d = discrete_bijoint(f, xs, ys)
d

<__main__.discrete_bijoint at 0x7ff3170b21f0>

In [99]:
d.legitimatePD()

it is not a legitimate joint distribution.


In [100]:
d.joint_tb()


The joint probability distribution for x and y
 +--------------------+---------------------+------+------+--------------------+
|  x_value/y_value   |          10         |  20  |  30  | marginal sum for x |
+--------------------+---------------------+------+------+--------------------+
|         0          |         0.2         | 0.15 | 0.25 |        0.6         |
|         1          |         0.1         | 0.2  | 0.1  |        0.4         |
| marginal_sum for y | 0.30000000000000004 | 0.35 | 0.35 | 0.9999999999999999 |
+--------------------+---------------------+------+------+--------------------+


In [101]:
#conditional distribution
d.marginal_pop()


marginal distribution for x
 +---------+--------+
| x_value | x_prob |
+---------+--------+
|    0    |  0.6   |
|    1    |  0.4   |
+---------+--------+

marginal distribution for y
 +---------+---------------------+
| y_value |        y_prob       |
+---------+---------------------+
|    10   | 0.30000000000000004 |
|    20   |         0.35        |
|    30   |         0.35        |
+---------+---------------------+


In [102]:
#conditional distribution
d.conditional_pop()


conditional distribution for x
 +---------+--------------------+---------------------+
| x_value |         0          |          1          |
+---------+--------------------+---------------------+
|    10   | 0.6666666666666666 |  0.3333333333333333 |
|    20   | 0.4285714285714286 |  0.5714285714285715 |
|    30   | 0.7142857142857143 | 0.28571428571428575 |
+---------+--------------------+---------------------+

conditional distribution for y
 +---------+---------------------+------+--------------------+
| y_value |          10         |  20  |         30         |
+---------+---------------------+------+--------------------+
|    0    | 0.33333333333333337 | 0.25 | 0.4166666666666667 |
|    1    |         0.25        | 0.5  |        0.25        |
+---------+---------------------+------+--------------------+


In [95]:
#draw
d.joint_draw(50)
d.marginal_draw(50)
d.conditional_draw(50)

50 x's and y's drawn from the joint distribution:
 [(0.0, 30.0), (1.0, 10.0), (0.0, 20.0), (0.0, 30.0), (1.0, 20.0), (0.0, 30.0), (0.0, 10.0), (0.0, 20.0), (0.0, 30.0), (0.0, 30.0), (1.0, 20.0), (1.0, 20.0), (0.0, 20.0), (1.0, 30.0), (0.0, 20.0), (1.0, 10.0), (1.0, 10.0), (1.0, 20.0), (1.0, 20.0), (0.0, 20.0), (0.0, 30.0), (0.0, 10.0), (0.0, 30.0), (0.0, 20.0), (0.0, 30.0), (0.0, 30.0), (1.0, 10.0), (1.0, 20.0), (1.0, 30.0), (1.0, 20.0), (0.0, 30.0), (0.0, 10.0), (1.0, 20.0), (0.0, 10.0), (0.0, 30.0), (1.0, 20.0), (1.0, 20.0), (0.0, 10.0), (0.0, 30.0), (1.0, 30.0), (1.0, 30.0), (0.0, 20.0), (1.0, 20.0), (1.0, 20.0), (1.0, 20.0), (0.0, 10.0), (0.0, 20.0), (0.0, 30.0), (0.0, 10.0), (1.0, 10.0)]

random numbers from marginal distribution of x
 [1. 1. 1. 0. 1. 0. 0. 0. 1. 1. 1. 1. 0. 1. 1. 1. 1. 0. 0. 1. 0. 0. 1. 1.
 0. 1. 0. 1. 1. 1. 0. 0. 0. 1. 1. 0. 1. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0.
 0. 1.]

random numbers from marginal distribution of y
 [20. 30. 30. 10. 30. 20. 20. 20. 30. 30. 30. 3