In [1]:
# imports
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import scipy as sp

from scipy import array, linalg, dot
from sklearn import preprocessing

from scipy.stats import t
from scipy.stats import norm
from scipy.stats import chi2
from scipy.stats import multivariate_normal

from pandas.tools.plotting import scatter_matrix


In [2]:
# Motivational example: Smirnov's theorem 
X = norm.rvs(0,1,10000)
U = norm.cdf(X)
plt.hist(X, bins='auto', alpha=0.5)
plt.hist(U, bins='auto', alpha=0.5)
plt.show()

In [869]:
# settings
m = 2
k = m+1
n = 1000
# generate covariance matrix
P = np.random.uniform(0,1,m*k).reshape(k,m)
P = preprocessing.scale(P)
P = dot(P.T,P)/k
P0 = P
P

array([[ 1.        ,  0.05842025],
       [ 0.05842025,  1.        ]])

In [870]:
# check positive definetness
np.linalg.eigvals(P)

array([ 1.05842025,  0.94157975])

In [871]:
# Cholesky decomposition of P
L = linalg.cholesky(P, lower=True)

In [872]:
# check decomposition
dot(L,L.T)

array([[ 1.        ,  0.05842025],
       [ 0.05842025,  1.        ]])

In [793]:
# simulation loop (gaussian copula)
U = np.zeros((n,m))
for i in range(0,n):    
    Z = norm.rvs(0,1,m)
    X = dot(L,Z)
    U[i,] = norm.cdf(X)

In [794]:
# simulation loop (gaussian copula) - easy way
U = np.zeros((n,m))
for i in range(0,n):
    Z = multivariate_normal.rvs(np.zeros(m),P0)
    U[i,] = norm.cdf(Z)

In [881]:
# simulation loop (t-copula)
nu = 1
U = np.zeros((n,m))
for i in range(0,n):
    c = chi2.rvs(nu,size=1)/nu
    Z = multivariate_normal.rvs(np.zeros(m),P0)
    X = Z/np.sqrt(c)
    U[i,] = t.cdf(X, nu)

In [882]:
# scatter matrix plot for U
df = pd.DataFrame(U)
scatter_matrix(df, alpha=0.2,diagonal='hist')
plt.show()

In [883]:
# dependent normal r.v. simulation
V = np.zeros((n,m))
for i in range(0,n):
    V[i,] = norm.ppf(U[i,])

In [843]:
# dependent Student's r.v. simulation
V = np.zeros((n,m))
for i in range(0,n):
    V[i,] = t.ppf(U[i,], nu)

In [884]:
# scatter matrix plot for V
dfV = pd.DataFrame(V)
scatter_matrix(dfV, alpha=0.2,diagonal='hist')
plt.show()

In [885]:
# simulate multivariate normal
rv = multivariate_normal([0, 0], P0)
W = np.zeros((n,2))
for i in range(0,n):
    W[i,] = rv.rvs()
dfW = pd.DataFrame(W)
plt.scatter(dfW[0],dfW[1])
plt.show()

In [834]:
# simulate multivariate Student's
rv = multivariate_normal([0, 0], P0)
W = np.zeros((n,2))
for i in range(0,n):
    c = chi2.rvs(nu,size=1)/nu
    Z = multivariate_normal.rvs(np.zeros(m),P0)
    W[i,] = Z/np.sqrt(c)
dfW = pd.DataFrame(W)
plt.scatter(dfW[0],dfW[1])
plt.show()

In [886]:
# multivariate distrbution via covariation and via copula
if m==2:
    I1 = ['r']*n
    I2 = ['b']*n
    colors = np.hstack((I1, I2))
    Z = np.vstack((V,W))
    x = Z[:,0]
    y = Z[:,1]    
    plt.scatter(x,y,c=colors,alpha=0.5)
    plt.show()