---
### 12.2.1 因子分析的数学理论
#### 1. 因子分析模型
将原始变量分解为若干个因子的线性组合
$$x_i=\mu_i+a_{i1}f_1+a_{i2}f_2+\cdots+a_{ip}f_p+\varepsilon_1$$
#### 2. 因子旋转
使因子载荷矩阵的结构简化
#### 3. 因子得分
由原始变量的数据计算每个因子的得分

In [1]:
import numpy as np

R = np.array([[1, 1/5, -1/5], [1/5, 1, -2/5], [-1/5, -2/5, 1]])
val, vec = np.linalg.eig(R)
ind = np.argsort(-val)
vec = (vec.T)[ind].T
val = val[ind]
A0 = vec*np.sqrt(val)
np.cumsum(val/(sum(val)))

array([0.51547005, 0.8       , 1.        ])

In [2]:
m = 2   # 选择因子数为 2
A = A0[:, :2]
Ac = np.sum(A**2, axis=0)   # 求贡献
Ar = np.sum(A**2, axis=1)   # 求共同度
Ac, Ar

(array([1.54641016, 0.85358984]), array([1. , 0.7, 0.7]))

### 12.2.2 因子分析的应用

In [3]:
from factor_analyzer import FactorAnalyzer as FA
from scipy.stats import zscore

data = np.loadtxt('../../12第12章  主成分分析与因子分析/data12_5_1.txt')
B = zscore(data, ddof=1)
R = np.corrcoef(B.T)
R

array([[1.        , 0.76868484, 0.68193448, 0.71187315, 0.84264892,
        0.72092259],
       [0.76868484, 1.        , 0.71243838, 0.68790154, 0.66910603,
        0.6414101 ],
       [0.68193448, 0.71243838, 1.        , 0.5557287 , 0.61561746,
        0.74972839],
       [0.71187315, 0.68790154, 0.5557287 , 1.        , 0.51866661,
        0.58407218],
       [0.84264892, 0.66910603, 0.61561746, 0.51866661, 1.        ,
        0.59610918],
       [0.72092259, 0.6414101 , 0.74972839, 0.58407218, 0.59610918,
        1.        ]])

In [4]:
val, vec = np.linalg.eig(R)
val = sorted(val, reverse=True)
r = val/(sum(val))
np.cumsum(r)

array([0.72711268, 0.81204139, 0.89526749, 0.94785951, 0.982305  ,
       1.        ])

In [5]:
m = 3   # 公共因子个数
fa = FA(n_factors=m, rotation='varimax')    # 方差最大
fa.fit(B)
A = fa.loadings_
contri = np.sum(A**2, axis=0)
contri  # 各因子的贡献

array([1.99144025, 1.67999651, 1.37520757])

In [6]:
s2 = 1 - np.sum(A**2, axis=1)
s2  # 特殊方差

array([0.02208709, 0.30829785, 0.00237122, 0.00410953, 0.25144177,
       0.36504823])

In [7]:
ss = np.linalg.inv(np.diag(s2))
f_coef = ss @ A @ np.linalg.inv(A.T @ ss @ A)    # 因子得分函数系数
f = B @ f_coef
grade = f @ contri / sum(contri)
grade   # 得分

array([ 0.78776279,  0.5124789 ,  0.57804384,  0.22459248,  0.58197416,
        0.52503398,  0.40057203,  0.24481888,  0.22316675, -0.13035823,
       -0.084063  , -0.34414324, -0.31843212, -0.5535271 , -0.51002544,
       -0.72442783, -1.41346685])

In [8]:
ind0 = np.argsort(-grade)  #从大到小的排名地址
ind = np.zeros(17)
ind[ind0]=np.arange(1,18)
ind

array([ 1.,  5.,  3.,  8.,  2.,  4.,  6.,  7.,  9., 11., 10., 13., 12.,
       15., 14., 16., 17.])