# HDDA. Lab 4. Canonical Correlation Analysis.

- Chat...eld, C. and A.J.Collins, Introduction to multivariate analysis. Chapman & Hall

- Krzanowski, W.J. Principles of multivariate analysis. Oxford.2000

- Johnson, R.A.and D.W. Wichern Applied multivariate statistical analysis. Prentice Hall.

- Dubrov, A.M., Mkhitaryan, V.S., Troshin L.I. Multidimensional statistical methods. Finance and Statistics.

### A Little Book of Python for Multivariate Analysis

[book](https://python-for-multivariate-analysis.readthedocs.io/a_little_book_of_python_for_multivariate_analysis.html)

[ipynb](https://github.com/gatsoulis/a_little_book_of_python_for_multivariate_analysis/blob/master/a_little_book_of_python_for_multivariate_analysis.ipynb)

### Wiki

- [Probability density function](https://en.wikipedia.org/wiki/Probability_density_function)

- [Expected value](https://en.wikipedia.org/wiki/Expected_value)

- [Variance](https://en.wikipedia.org/wiki/Variance)

- [Covariance and correlation](https://en.wikipedia.org/wiki/Covariance_and_correlation)

- [Positive semi-definite matrix](https://en.wikipedia.org/wiki/Positive-definite_matrix)

- [Unbiased estimation](https://en.wikipedia.org/wiki/Unbiased_estimation_of_standard_deviation)

- [Quadratic form](https://en.wikipedia.org/wiki/Quadratic_form)

In [None]:
import numpy as np
from numpy import dot
import scipy.linalg as linalg

In [None]:
def cca(X,Y):
    """
    Canonical Correlatio Analysis
    
    Input:
    X: observation matrix X, every column is one data point
    Y: observation matrix Y, every column is one data point
    
    Output:
    basis in X space, basis in Y space, correlation
    """
    # find variance and covariance matrix
    if len(X) == 1:
        cov_xx = ???
    else:
        cov_xx = ???
    if len(Y) == 1:
        cov_yy = ???
    else:
        cov_yy = ???
    n = len(X)

    cov_xy = np.cov(X, Y)[:n,n:]    
    cov_yx = np.transpose(cov_xy)
    # eigen
    cov_xx_evalue,cov_xx_evector = la.eig(cov_xx)
    cov_xx_isqrt = dot(dot(cov_xx_evector,np.diag(1/np.sqrt(cov_xx_evalue))),np.transpose(cov_xx_evector))
    
    cov_yy_evalue, cov_yy_evector = la.eig(cov_yy)
    cov_yy_isqrt = dot(dot(cov_yy_evector,np.diag(1/np.sqrt(cov_yy_evalue))), np.transpose(cov_yy_evector))
    a = la.inv(cov_yy)
    # Xmat and Ymat
    Xmat = dot(dot(dot(dot(cov_xx_isqrt,cov_xy),la.inv(cov_yy)),cov_yx),cov_xx_isqrt)
    ymat = dot(dot(dot(dot(cov_yy_isqrt,cov_yx),la.inv(cov_xx)),cov_xy),cov_yy_isqrt)
    
    r1=la.eig(Xmat)
    r2=la.eig(Ymat)
    
    return r1,r2

In [None]:
a=[-11.25,7.43, 15.48, 2.27, -48.90, -15.13, 49.28, 4.7, 61.32, -268.95, 8488]
b=[-10.87, 7.45, 14.97, 1.97, -47.71, -14.46, 44.36, 5.1, 61.76, -273.02, 8399]
c=[-11.18, 7.44, 14.20, 1.97, -48.29, -14.81, 43.66, 5.2, 64.16, -263.20, 8328]
d=[-10.62, 7.38, 15.02, 2.03, -49.06, -14.72, 44.80, 4.9, 64.04, -285.11, 8306]
e=[-11.02, 7.43, 12.92, 1.97, -47.44, -14.40, 41.20, 5.2, 57.46, -256.64, 8286]
f=[-10.83, 7.72, 13.58, 2.12, -48.34, -14.18, 43.06, 4.9, 52.18, -274.07, 8272]
g=[-11.18, 7.05, 14.12, 2.06, -49.34, -14.39, 41.68, 5.7, 61.60, -291.20, 8216]
h=[-11.05, 6.95, 15.34, 2.00, -48.21, -14.36, 41.32, 4.8, 63.00, -265.86, 8189]
i=[-11.15, 7.12, 14.52, 2.03, -49.15, -14.66, 42.36,4.9, 66.46, -269.62, 8180]

In [None]:
data = np.vstack((a,b,c,d,e,f,g,h,i))
run100=data[:,0]
long_jump = data[:,1]
shot = data[:,2]
high_jump = data[:,3]
run400 = data[:,4]
hurdle = data[:,5]
discus = data[:,6]
pole_vault = data[:,7]
javelin = data[:,8]
run1500 = data[:,9]
score = data[:,10]

In [None]:
X = np.vstack((shot, discus, javelin,pole_vault))
Y = np.vstack((run100, run400,run1500,hurdle,long_jump,high_jump))
r1,r2=cca(X,Y)

In [None]:
print(r1[0])
print(np.sqrt(r1[0]))
print(r2[0])
print(np.sqrt(r2[0]))

In [None]:
def CCA_SVD(H1, H2, dim):
    # H1 and H2 are NxD matrices containing samples rowwise.
    # dim is the desired dimensionality of CCA space.
    
    d1 = H1.shape[0]
    d2 = H2.shape[0]
    N = H1.shape[1]
    
    # Remove mean
    m1 = np.mean(H1, axis=1)
    m2 = np.mean(H2, axis=1)
    H1 = H1 - np.reshape(m1,(d1,1))
    H2 = H2 - np.reshape(m2,(d2,1))
    
    S11 = (dot(H1,np.transpose(H1)))/(N-1)
    S22 = (dot(H2,np.transpose(H2)))/(N-1)
    S12 = (dot(H2,np.transpose(H1)))/(N-1)

    D1,V1 = la.eig(S11)
    D2,V2 = la.eig(S22)

    K11 = dot(dot(V1,np.diag(1/np.sqrt(D1))),np.transpose(V1))
    K22 = dot(dot(V2,np.diag(1/np.sqrt(D2))),np.transpose(V2))

    T = dot(dot(K22,S12),K11)
    U,D,V = np.linalg.svd(T)
    D = np.diag(D)
    A = dot(K11,np.transpose(V[0:dim,:]))
    B = dot(K22,np.transpose(U[0:dim,:]))
    D = D[0:dim]
    return A,B,D

In [None]:
from sklearn.cross_decomposition import CCA