### 计算特征值和特征向量

In [1]:
import pandas as pd
import numpy as np
iris = pd.read_csv("http://image.cador.cn/data/iris.csv")
corr = iris.corr()

# 将corr进行分块，1:2两个变量一组，3:4是另外一组，并进行两两组合
X11 = corr.iloc[0:2,0:2]
X12 = corr.iloc[0:2,2:4]
X21 = corr.iloc[2:4,0:2]
X22 = corr.iloc[2:4,2:4]

# 按公式求解矩阵A和B
A=np.matmul(np.matmul(np.matmul(np.linalg.inv(X11), X12),np.linalg.inv(X22)),X21)
B=np.matmul(np.matmul(np.matmul(np.linalg.inv(X22), X21),np.linalg.inv(X11)),X12)

# 求解典型相关系数
A_eig_values,A_eig_vectors = np.linalg.eig(A)
B_eig_values,B_eig_vectors = np.linalg.eig(B)
np.sqrt(A_eig_values)

array([0.940969  , 0.12393688])

### 比较A与XΛX^(-1)是否相等

In [2]:
round(A-np.matmul(np.matmul(A_eig_vectors,np.diag(A_eig_values)),np.linalg.inv(A_eig_vectors)),5)

Unnamed: 0,Petal.Length,Petal.Width
Sepal.Length,0.0,0.0
Sepal.Width,0.0,-0.0


### 比较B与YΛY^(-1)是否相等

In [3]:
round(B-np.matmul(np.matmul(B_eig_vectors,np.diag(B_eig_values)),np.linalg.inv(B_eig_vectors)),5)

Unnamed: 0,Sepal.Length,Sepal.Width
Petal.Length,0.0,0.0
Petal.Width,0.0,0.0


### 验证A对应的典型变量C1其标准差是否为1

In [4]:
# 将变量分组，并进行标准化处理
iris_g1 = iris.iloc[:,0:2]
iris_g1 = iris_g1.apply(lambda x:(x - np.mean(x))/np.std(x))
iris_g2 = iris.iloc[:,2:4]
iris_g2 = iris_g2.apply(lambda x:(x - np.mean(x))/np.std(x))
# 求解A对应的特征向量并计算典型向量C1
C1 = np.matmul(iris_g1,A_eig_vectors)
# 验证C1对应各变量的标准差是否为1，同时查看均值
C1.apply(np.std)

#  Sepal.Length    1.041196
#  Sepal.Width     0.951045
#  dtype: float64

C1.apply(np.mean)

# Sepal.Length   -1.894781e-16
# Sepal.Width    -9.000208e-16
# dtype: float64

# 由于均值为0，标准差不为1，这里对特征向量进行伸缩变换
eA=np.matmul(A_eig_vectors,np.diag(1/C1.apply(np.std)))

# 再次验证方差和均值
C1 = np.matmul(iris_g1,eA)
C1.apply(np.std)

# Sepal.Length    1.0
# Sepal.Width     1.0
# dtype: float64

C1.apply(np.mean)

# Sepal.Length   -1.894781e-16
# Sepal.Width    -9.000208e-16
# dtype: float64

#可见，特征向量已经满足要求，同理对B可得
C2 = np.matmul(iris_g2,B_eig_vectors)
C2.apply(np.std)

# Petal.Length    0.629124
# Petal.Width     0.200353
# dtype: float64

C2.apply(np.mean)

# Petal.Length   -1.421085e-16
# Petal.Width    -7.993606e-17
# dtype: float64

eB=np.matmul(B_eig_vectors,np.diag(1/C2.apply(np.std)))
C2 = np.matmul(iris_g2,eB)
C2.apply(np.std)

# Petal.Length    1.0
# Petal.Width     1.0
# dtype: float64

C2.apply(np.mean)

# Petal.Length   -2.842171e-16
# Petal.Width    -4.144833e-16
#dtype: float64

Petal.Length   -2.842171e-16
Petal.Width    -4.144833e-16
dtype: float64

### 验证C1和C2相关性

In [5]:
round(pd.concat([C1,C2],axis=1).corr(),5)

Unnamed: 0,Sepal.Length,Sepal.Width,Petal.Length,Petal.Width
Sepal.Length,1.0,-0.0,0.94097,-0.0
Sepal.Width,-0.0,1.0,0.0,0.12394
Petal.Length,0.94097,0.0,1.0,0.0
Petal.Width,-0.0,0.12394,0.0,1.0


### 求解典型相关系数

In [6]:
from sklearn.cross_decomposition import CCA
cca = CCA(n_components=2)
cca.fit(iris_g1,iris_g2)
# X_c与Y_c分别为转换之后的典型变量
X_c, Y_c = cca.transform(iris_g1, iris_g2)
round(pd.concat([pd.DataFrame(X_c,columns=iris_g1.columns),
                 pd.DataFrame(Y_c,columns=iris_g2.columns)],axis=1).corr(),5)

Unnamed: 0,Sepal.Length,Sepal.Width,Petal.Length,Petal.Width
Sepal.Length,1.0,0.0,0.94097,-0.0
Sepal.Width,0.0,1.0,-1e-05,0.12394
Petal.Length,0.94097,-1e-05,1.0,-0.0
Petal.Width,-0.0,0.12394,-0.0,1.0
