In [697]:
#!cp 0-mean_cov.py multinormal.py

In [3]:
!chmod +x *.py

In [497]:
#!/usr/bin/env python3
""" mean cov """
import numpy as np


def mean_cov(X):
    """
    calculate the mean and
    covariance of a data set
    """
    mean = X.mean(axis=0, keepdims=True)
    n = X.shape[0] - 1
    x = X - mean
    cov = np.dot(x.T, X.conj()) / n
    return mean, cov

In [498]:
#!/usr/bin/env python3

if __name__ == '__main__':
    import numpy as np
    #mean_cov = __import__('0-mean_cov').mean_cov

    np.random.seed(0)
    X = np.random.multivariate_normal([12, 30, 10], [[36, -30, 15], [-30, 100, -20], [15, -20, 25]], 10000)
    #print(X)
    mean, cov = mean_cov(X)
    print(mean)
    print(np.cov(X.T))
    print(cov)
    print(cov.T)
    

[[12.04341828 29.92870885 10.00515808]]
[[ 36.2007391  -29.79405239  15.37992641]
 [-29.79405239  97.77730626 -20.67970134]
 [ 15.37992641 -20.67970134  24.93956823]]
[[ 36.2007391  -29.79405239  15.37992641]
 [-29.79405239  97.77730626 -20.67970134]
 [ 15.37992641 -20.67970134  24.93956823]]
[[ 36.2007391  -29.79405239  15.37992641]
 [-29.79405239  97.77730626 -20.67970134]
 [ 15.37992641 -20.67970134  24.93956823]]


In [695]:
def correlation(C):
    """
    calculate a correlation matrix
    """
    if type(C) is not np.ndarray:
        raise TypeError("C must be a numpy.ndarray")
    if C.shape[0] != C.shape[1]:
        raise ValueError("C must be a 2D square")
    corr = np.ndarray((C.shape))
    d = C.shape[0]
    for i in range(d):
        for j in range(d):
            corr[i,j] = C[i,j]/(np.sqrt(C[i,i]*C[j,j]))
    return corr
    

In [696]:
#!/usr/bin/env python3

if __name__ == '__main__':
    import numpy as np
    #correlation = __import__('1-correlation').correlation

    C = np.array([[36, -30, 15], [-30, 100, -20], [15, -20, 25]])
    Co = correlation(C)
    print(C)
    print(Co)

[[ 36 -30  15]
 [-30 100 -20]
 [ 15 -20  25]]
[[ 1.  -0.5  0.5]
 [-0.5  1.  -0.4]
 [ 0.5 -0.4  1. ]]


In [774]:
#!/usr/bin/env python3
""" mean cov """
import numpy as np


def mean_cov(X):
    """
    calculate the mean and
    covariance of a data set
    """
    if type(X) != np.ndarray or (len(X.shape) != 2):
        raise TypeError("X must be a 2D numpy.ndarray")
    n = X.shape[0] - 1
    if n < 2:
        raise ValueError("X must contain multiple data points")
    mean = X.mean(axis=0, keepdims=True)
    x = X - mean
    cov = np.dot(x.T, X.conj()) / n
    return mean, cov


class MultiNormal():
    """ Multivariate Normal distribution """

    def __init__(self, data):
        """ constructor """
        if type(data) is not np.ndarray or len(data.shape) < 2:
            raise TypeError("data must be a 2D numpy.ndarray")
        if data.shape[1] < 2:
            raise ValueError("data must contain multiple data points")

        self.mean = data.mean(axis=1, keepdims=True)
        _, self.cov = mean_cov(data.T)

    def pdf(self, x):
        """ calculate the PDF at a data point """
        if type(x) is not np.ndarray:
            raise TypeError("x must be a numpy.ndarray")
        if len(x.shape) != 2 or x.shape[1] != 1\
            or x.shape[0] != self.mean.shape[0]:
            raise ValueError("x must have the shape ({d}, 1)"
                             .format(self.mean.shape[0]))
        d = x.shape[0]
        x_m = x - self.mean
        sqrt = np.sqrt((2 * np.pi)**d * np.linalg.det(self.cov))
        exp = np.exp(-(np.linalg.solve(self.cov, x_m).T.dot(x_m)) / 2)
        return (1 / (sqrt) * exp)[0][0]


In [776]:
#!/usr/bin/env python3

if __name__ == '__main__':
    import numpy as np
    #from multinormal import MultiNormal

    np.random.seed(0)
    data = np.random.multivariate_normal([12, 30, 10], [[36, -30, 15], [-30, 100, -20], [15, -20, 25]], 10000).T
    mn = MultiNormal(data)
    x = np.random.multivariate_normal([12, 30, 10], [[36, -30, 15], [-30, 100, -20], [15, -20, 25]], 1).T
    print(x)
    print(mn.pdf(x))

[[ 8.20311936]
 [32.84231319]
 [ 9.67254478]]
0.00022930236202143217


In [703]:
if __name__ == '__main__':
    import numpy as np
    #from multinormal import MultiNormal

    np.random.seed(0)
    data = np.random.multivariate_normal([12, 30, 10], [[36, -30, 15], [-30, 100, -20], [15, -20, 25]], 10000).T
    mn = MultiNormal(data)
    print(mn.mean)
    print(mn.cov)

[[12.04341828]
 [29.92870885]
 [10.00515808]]
[[ 36.2007391  -29.79405239  15.37992641]
 [-29.79405239  97.77730626 -20.67970134]
 [ 15.37992641 -20.67970134  24.93956823]]


In [None]:
np.trapz(y, x=None, dx=1.0, axis=-1) # integral definite

In [773]:
len('00022930236202143')

17