In [121]:
import numpy as np
import pandas as pd
from scipy.optimize import minimize

In [113]:
np.random.seed(2023)


In [110]:
print(U)
print(D)
print(Vt)

[[-0.70710678  0.70710678]
 [ 0.70710678  0.70710678]]
[[5.53957773e+00 0.00000000e+00 0.00000000e+00]
 [0.00000000e+00 8.88178420e-16 0.00000000e+00]]
[[-0.24941099 -0.95617472 -0.15337557]
 [-0.78457054  0.10667465  0.61079423]
 [ 0.56766472 -0.27267275  0.77679234]]


In [130]:
# Pre step 1: get and center data

X = np.random.random((100,50)) * 10
mean_subtr = np.mean(X, axis = 0)

# Important, center data
X = X - mean_subtr

Algorithm 1. General SPCA algorithm
1. let A start at V[,1:k], the loadings of the first k ordinary principal components
2. given A = [$\alpha_1, \cdots, \alpha_k$] solve


In [221]:
# Step 1: Get first k loadings of ordinary principal components
U, s, Vt = np.linalg.svd(X, full_matrices=True)
D = np.zeros((U.shape[0], Vt.shape[0]))
D[:s.size, :s.size] = np.diag(s)



In [225]:
print((U @ D)[:20, :20])

[[  3.98736251   3.70614863   1.84110787   0.95884644   0.56813618
   -4.88074095   0.80544925  -1.45708475  -5.75110992  -5.68230247
    5.17553438  -0.04255997  -0.25651487  -7.18308915  -3.13205374
    1.17402255   7.20843922  -0.80822661  -1.30016033  -3.63787939]
 [  2.01136308  -7.48053455  -1.87893094   2.31888582   0.69186919
   -0.32984607 -10.96563357  -0.38720442   2.21103451  -0.83041705
    2.17088125  -0.73653404   0.14132157   1.39258128   2.5818932
   -2.25259584  -5.66733535  -3.78690189  -2.44059336  -0.62098823]
 [ -7.66729556  -2.04151692   3.4870533   -0.58675031  -4.17412628
   -5.83435696  -3.21584894  -5.5912662    4.2399861    8.01800045
    5.53483877   0.84527577  -3.25397113  -1.82668219   2.69762724
    2.56378845   3.17049875   5.39780725   0.2874412   -1.42029407]
 [ -8.18841764  -0.9370256   -8.74270428   0.66691291  -3.29975551
   -1.85893367   1.52176558  -0.92662723   3.08158902  -2.2081214
   -3.24550504   0.90283651   3.82045149  -2.05786973  -0.092

In [197]:
# Step 2: solve elastic net problem for j in 1 ... k

def objective_elastic_net(beta, alpha, X, l, l2):
    assert len(beta) == len(alpha), "shape of principal comp. vector alpha does not coincide with beta"
    return (alpha - beta).T @ X.T @ X @ (alpha - beta) + l * np.linalg.norm(beta) ** 2 + l2 * np.linalg.norm(beta)



In [227]:
from scipy.linalg import sqrtm
k = 10
A = Vt.T[:,:k]
B = np.zeros((A[:,0].shape[0], k))

for iter in range(100):
    for i in range(k):
        B[:,i] = minimize(objective_elastic_net, np.zeros(A[:,0].shape[0]), args = (A[:,i], X, 1, 1)).x
    
    # Step 3:
    U, s, V_temp = np.linalg.svd(X.T @ X @ B)
    Vt = np.zeros((U.shape[0], U.shape[0]))
    Vt[:V_temp.shape[0], :V_temp.shape[0]] = V_temp

    A_old = A
    A = X.T @ X @ B @ np.linalg.inv(sqrtm(B.T @ X.T @ X @ X.T @ X @ B))
    if iter > 2:
        print(np.linalg.norm(A - A_old))



0.44897796836077214
0.3282464608358238
0.2402377826006819
0.18151984237309254
0.1431514320242255
0.11732044658659939
0.0990708883498555
0.08551665609292775
0.07499757757616592
0.06654897195006329
0.05959628884230117
0.05378206087643788
0.04886942884735598
0.04468975924937062
0.04111503642700163
0.03804362293640763


KeyboardInterrupt: 

In [192]:
A.shape

(50, 50)

In [220]:
Vt[:20, :10]

array([[ 0.2645956 ,  0.16380297, -0.41964405, -0.07131232, -0.49030115,
        -0.28905728, -0.41528587,  0.23142141,  0.3611361 , -0.20392238,
         0.        ,  0.        ,  0.        ,  0.        ,  0.        ,
         0.        ,  0.        ,  0.        ,  0.        ,  0.        ],
       [-0.59388305, -0.49690187, -0.44785562,  0.11759463, -0.0080129 ,
        -0.40996557,  0.06211579, -0.09172226, -0.06564013, -0.03566999,
         0.        ,  0.        ,  0.        ,  0.        ,  0.        ,
         0.        ,  0.        ,  0.        ,  0.        ,  0.        ],
       [-0.1518803 ,  0.072964  , -0.25497264, -0.4662443 ,  0.59158334,
         0.13657008,  0.03315592,  0.20817758,  0.52458485,  0.03106621,
         0.        ,  0.        ,  0.        ,  0.        ,  0.        ,
         0.        ,  0.        ,  0.        ,  0.        ,  0.        ],
       [-0.06363074,  0.08105702, -0.11658051,  0.66052161, -0.10807036,
         0.27269522,  0.266139  , -0.12630266,  

In [211]:
np.linalg.norm(Vt - A)

4.713041188430437