# CCA

canonical correlation analysis

\begin{align*}
S_{11}^{-1}S_{12}S_{22}^{-1}S_{21}a=\rho^2a \\
S_{22}^{-1}S_{21}S_{11}^{-1}S_{12}b=\rho^2b
\end{align*}
find eigenvalues and eigenvectors

In [91]:
import numpy as np
from numpy import dot
import scipy.linalg as la
from sklearn.cross_decomposition import CCA

In [2]:
def cov(X, Y):
    """
    Find covariance matrix
    """
    return np.cov(X, Y)

In [None]:
def var_single(X):
    """
    Find variance matrix
    """
    v = np.cov(X)
    r = np.matrix('v,1;1,v')
    return r
    

In [19]:
x = [[-2.1, -1,  4.3],[3,8,2]]
y = [3,  1.1,  0.12]

In [30]:
print(np.matrix('1,2;2,1'))

[[1 2]
 [2 1]]


In [203]:
def cca(X,Y):
    """
    Canonical Correlatio Analysis
    
    Input:
    X: observation matrix X, every column is one data point
    Y: observation matrix Y, every column is one data point
    
    Output:
    basis in X space, basis in Y space, correlation
    """
    # find variance and covariance matrix
    if len(X) == 1:
        cov_xx = var_single(X)
    else:
        cov_xx = np.cov(X)
    if len(Y) == 1:
        cov_yy = var_single(Y)
    else:
        cov_yy = np.cov(Y)
    n = len(X)

    cov_xy = np.cov(X, Y)[:n,n:]    
    cov_yx = np.transpose(cov_xy)
    # eigen
    cov_xx_evalue,cov_xx_evector = la.eig(cov_xx)
    cov_xx_isqrt = dot(dot(cov_xx_evector,np.diag(1/np.sqrt(cov_xx_evalue))),np.transpose(cov_xx_evector))
    
    cov_yy_evalue, cov_yy_evector = la.eig(cov_yy)
    cov_yy_isqrt = dot(dot(cov_yy_evector,np.diag(1/np.sqrt(cov_yy_evalue))), np.transpose(cov_yy_evector))
    a = la.inv(cov_yy)
    # Xmat and Ymat
    Xmat = dot(dot(dot(dot(cov_xx_isqrt,cov_xy),la.inv(cov_yy)),cov_yx),cov_xx_isqrt)
    ymat = dot(dot(dot(dot(cov_yy_isqrt,cov_yx),la.inv(cov_xx)),cov_xy),cov_yy_isqrt)
    
    r1=la.eig(Xmat)
    r2=la.eig(Ymat)
    
    return r1,r2
    

In [177]:
a=[-11.25,7.43, 15.48, 2.27, -48.90, -15.13, 49.28, 4.7, 61.32, -268.95, 8488]
b=[-10.87, 7.45, 14.97, 1.97, -47.71, -14.46, 44.36, 5.1, 61.76, -273.02, 8399]
c=[-11.18, 7.44, 14.20, 1.97, -48.29, -14.81, 43.66, 5.2, 64.16, -263.20, 8328]
d=[-10.62, 7.38, 15.02, 2.03, -49.06, -14.72, 44.80, 4.9, 64.04, -285.11, 8306]
e=[-11.02, 7.43, 12.92, 1.97, -47.44, -14.40, 41.20, 5.2, 57.46, -256.64, 8286]
f=[-10.83, 7.72, 13.58, 2.12, -48.34, -14.18, 43.06, 4.9, 52.18, -274.07, 8272]
g=[-11.18, 7.05, 14.12, 2.06, -49.34, -14.39, 41.68, 5.7, 61.60, -291.20, 8216]
h=[-11.05, 6.95, 15.34, 2.00, -48.21, -14.36, 41.32, 4.8, 63.00, -265.86, 8189]
i=[-11.15, 7.12, 14.52, 2.03, -49.15, -14.66, 42.36,4.9, 66.46, -269.62, 8180]

In [45]:
data = np.vstack((a,b,c,d,e,f,g,h,i))
run100=data[:,0]
long_jump = data[:,1]
shot = data[:,2]
high_jump = data[:,3]
run400 = data[:,4]
hurdle = data[:,5]
discus = data[:,6]
pole_vault = data[:,7]
javelin = data[:,8]
run1500 = data[:,9]
score = data[:,10]

X: shot, discus, javelin, pole_vault

Y: run100,run400,run1500,hurdle,long_jump,high_jump

In [171]:
X = np.vstack((shot, discus, javelin,pole_vault))
Y = np.vstack((run100, run400,run1500,hurdle,long_jump,high_jump))

In [205]:
r1,r2=cca(X,Y)

In [206]:
r1[0]

array([ 0.60595938+0.j,  0.94686012+0.j,  1.00000000+0.j,  1.00000000+0.j])

In [207]:
np.sqrt(r1[0])

array([ 0.77843392+0.j,  0.97306738+0.j,  1.00000000+0.j,  1.00000000+0.j])

In [208]:
np.sqrt(r2[0])

array([  7.78433925e-01 +0.00000000e+00j,
         9.73067377e-01 +0.00000000e+00j,
         0.00000000e+00 +7.82039080e-09j,
         1.42187762e-08 +0.00000000e+00j,
         1.00000000e+00 +0.00000000e+00j,   1.00000000e+00 +0.00000000e+00j])