In [145]:
import numpy as np
import time
from sklearn.decomposition import PCA, SparsePCA
from scipy.linalg import svd

In [461]:
def piecewise(val,cond):
    return val * cond

def scad(vec, lam, a=3.7):
    return np.multiply(np.multiply(vec - lam * np.sign(vec), np.abs(vec) > lam), np.abs(vec) < (2*lam)) + \
            np.multiply(vec, np.abs(vec) > a * lam) + \
            np.multiply(np.multiply(((a - 1) * vec - np.sign(vec) * lam * a )/ (a - 2), \
                                    np.abs(vec) > (2*lam)), np.abs(vec) < (a*lam))

def l2_project(beta):
    if np.linalg.norm(beta) > 0:
        return beta/np.linalg.norm(beta)
    else:
        return beta

def scad_obj(vec, lamb, a=3.7):
    piece1 = piecewise(lamb*np.abs(vec),np.abs(vec)<=lamb)
    piece2 = piecewise(1/(2*(a-1))*(2*a*lamb*np.abs(vec)-vec**2-lamb**2),np.logical_and(lamb<np.abs(vec),np.abs(vec)<=a*lamb))
    piece3 = piecewise(lamb**2*(a+1)/2,a*lamb<np.abs(vec))
    return np.sum(piece1+piece2+piece3)

# Gradient of pca objective ||Sigma-beta beta^T||_F^2
def PCA_grad(Sigma,beta):
    return 4*(-np.dot(Sigma,beta)+np.sum(np.square(beta))*beta)

# SCAD = L1 - cvx
# This is the gradient of the -cvx part
def SCAD_ccv_grad(beta,lamb,a=3.7):
    piece1 = piecewise(0,np.abs(beta)<=lamb)
    piece2 = piecewise(1/(a-1)*(beta-lamb*np.sign(beta)),np.logical_and(lamb<np.abs(beta),np.abs(beta)<=a*lamb))
    piece3 = piecewise(lamb*np.sign(beta),np.abs(beta)>a*lamb)
    return -(piece1+piece2+piece3)

# Proximal operator for L1
def prox_l1(v,c):
    return np.sign(v) * np.maximum(np.abs(v)-c,0)

def PCA_obj(Sigma,beta):
    return -2*np.dot(beta,np.dot(Sigma,beta))+np.linalg.norm(beta)**4

def l1_obj(beta,lamb):
    return lamb*np.sum(np.abs(beta))

# This is the objective function for SCAD
def full_scad_obj(Sigma,beta,lamb,a=3.7):
    return PCA_obj(Sigma,beta)+l1_obj(beta,lamb)+scad_obj(beta,lamb,a)

# def prox_line_search(f,x,lamb,beta=.5):
#    while f()

In [428]:
# Squared error between both beta1 and beta2 and the normalized versions
def sq_error(beta1,beta2):
    return np.linalg.norm(beta1-beta2), np.linalg.norm(l2_project(beta1)-l2_project(beta2))

# Spiked covariance

In [409]:
# Generates n samples from spiked covariance model of dimension d, spike at u with signal strength gamma
def spiked_cov(n,d,u,gamma=1,return_cov=False):
    u = u / np.linalg.norm(u)
    cov = np.eye(d) + gamma * np.outer(u,u)
    X = np.random.multivariate_normal(np.zeros(d),cov,size=n)
    if return_cov:
        return X, cov
    else:
        return X

# First s entries are 1
def equal_spike(n,d,s,gamma=1,return_cov=False):
    u = np.zeros(d)
    u[:s] = 1
    return spiked_cov(n,d,u,gamma,return_cov)

# First entry is alpha, next s-1 entries are 1
def heavy_spike(n,d,s,gamma=1,alpha=10,return_cov=False):
    u = np.zeros(d)
    u[0] = alpha
    u[1:s] = 1
    return spiked_cov(n,d,u,gamma,return_cov)

In [414]:
n = 100
d = 100
s = 3
X, pop_cov = equal_spike(n,d,s,return_cov=True)
Sigma = 1/n*np.dot(X.T,X)

In [415]:
sample_vals, sample_vecs = np.linalg.eigh(Sigma)
sample_vecs[:,d-1]*sample_vals[d-1]

array([ 0.49103353,  0.29244547, -0.26435044, -0.15085905, -0.27344925,
       -0.12109424,  0.03061356,  0.23979675, -0.46211643, -0.35414544,
        0.10609232, -0.39355174, -0.50341342,  0.28083273,  0.19105065,
       -0.27204018,  0.87212716,  0.43091088, -1.05716247, -0.12479894,
       -0.85729299, -0.09514107,  0.03233783,  0.28643319,  0.06686729,
        0.1382467 ,  0.50146514, -0.34881104,  0.15927123,  0.15474773,
       -0.62193887, -0.00481112,  0.89110927, -0.6387605 , -0.03228966,
        0.25348733, -0.00242237, -0.07988267,  0.13281305,  0.30014211,
       -0.50343714,  0.0071035 ,  0.13959575, -0.77137577,  0.40728385,
       -0.12284313,  0.62295034, -0.25545596,  0.02335565,  0.55835883,
        0.10199057, -0.11032168,  1.27460622, -0.05667704, -0.26534071,
        0.24923834,  0.47479569,  0.08377471, -0.2873278 , -0.35100261,
        0.15552442, -0.15743252, -0.21745082,  0.55090305,  0.83135824,
       -0.28514954, -0.11717398, -0.13867947, -0.15315109, -0.24

In [416]:
pop_vals, pop_vecs = np.linalg.eigh(pop_cov)
pop_vecs[:,d-1]*pop_vals[d-1]

array([-1.15470054, -1.15470054, -1.15470054,  0.        ,  0.        ,
        0.        ,  0.        ,  0.        ,  0.        ,  0.        ,
        0.        ,  0.        ,  0.        ,  0.        ,  0.        ,
        0.        ,  0.        ,  0.        ,  0.        ,  0.        ,
        0.        ,  0.        ,  0.        ,  0.        ,  0.        ,
        0.        ,  0.        ,  0.        ,  0.        ,  0.        ,
        0.        ,  0.        ,  0.        ,  0.        ,  0.        ,
        0.        ,  0.        ,  0.        ,  0.        ,  0.        ,
        0.        ,  0.        ,  0.        ,  0.        ,  0.        ,
        0.        ,  0.        ,  0.        ,  0.        ,  0.        ,
        0.        ,  0.        ,  0.        ,  0.        ,  0.        ,
        0.        ,  0.        ,  0.        ,  0.        ,  0.        ,
        0.        ,  0.        ,  0.        ,  0.        ,  0.        ,
        0.        ,  0.        ,  0.        ,  0.        ,  0.  

In [413]:
pop_vecs[:,d-1]

array([0.99014754, 0.09901475, 0.09901475, 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.     

In [429]:
beta_star = pop_vecs[:,d-1]*pop_vals[d-1]
beta_star_normalized = pop_vecs[:,d-1]

In [417]:
start = time.time()
spca = SparsePCA(n_components=1, method='cd',normalize_components=True)
SPCAResult = spca.fit(X)
spca_time_result = time.time() - start
SPCAResult.components_

array([[ 0.15382809,  0.        , -0.        , -0.        , -0.02777704,
        -0.        ,  0.        ,  0.        , -0.08088788, -0.07362778,
         0.        , -0.        , -0.16957108,  0.        ,  0.03434963,
        -0.02472015,  0.27023191,  0.07351617, -0.38875987, -0.        ,
        -0.27801209, -0.        ,  0.        ,  0.04009413,  0.        ,
         0.        ,  0.08549262, -0.04285539,  0.        ,  0.        ,
        -0.16516448, -0.        ,  0.1966918 , -0.16582011, -0.        ,
         0.        ,  0.        ,  0.        ,  0.        ,  0.01931189,
        -0.17336274,  0.        ,  0.        , -0.223052  ,  0.08631443,
        -0.        ,  0.11180472, -0.        , -0.        ,  0.09858329,
         0.        , -0.        ,  0.44339876,  0.        , -0.00314691,
         0.        ,  0.05221759,  0.        , -0.        , -0.02323712,
         0.        , -0.        , -0.        ,  0.16124078,  0.12804103,
        -0.        ,  0.        , -0.        , -0. 

# Useful functions

In [317]:
# Gradient of pca objective ||Sigma-beta beta^T||_F^2
def PCA_grad(Sigma,beta):
    return 4*(-np.dot(Sigma,beta)+np.sum(np.square(beta))*beta)

# SCAD = L1 - cvx
# This is the gradient of the -cvx part
def SCAD_ccv_grad(beta,lamb,a=3.7):
    piece1 = piecewise(0,np.abs(beta)<=lamb)
    piece2 = piecewise(1/(a-1)*(beta-lamb*np.sign(beta)),np.logical_and(lamb<np.abs(beta),np.abs(beta)<=a*lamb))
    piece3 = piecewise(lamb*np.sign(beta),np.abs(beta)>a*lamb)
    return -(piece1+piece2+piece3)

# Proximal operator for L1
def prox_l1(v,c):
    return np.sign(v) * np.maximum(np.abs(v)-c,0)

# SCAD subgradient descent

In [462]:
lr = 0.005
lamb = 1
(U, s, Vh) = svd(X)
beta = Vh[0]
np.random.seed(70)
beta=np.random.normal(size=d)
for i in range(10000):
    beta = beta - lr/np.sqrt(i+1) * (PCA_grad(Sigma,beta)+SCAD_ccv_grad(beta,lamb)+lamb*np.sign(beta))
    # beta = scad(beta,lr * lamb)
    #beta = beta + 0.001 * np.dot(Sigma, beta)
    #beta=beta/np.linalg.norm(beta)
print(beta)
print(sq_error(beta,beta_star))
print(full_scad_obj(Sigma,beta,lamb))

[-7.44393726e-05 -3.72416438e-05 -9.61344601e-05 -1.70807216e-05
  9.24718979e-06  1.51962927e-06 -4.02313543e-05  7.59676775e-06
  3.99238289e-05  1.22758089e-05  1.02532751e-05 -2.23461435e-05
  3.91517565e-05 -3.47333810e-05 -4.23482325e-05  4.96041931e-05
  4.25177026e-05  1.56816894e-05  2.23820295e-05 -3.04108380e-05
  4.14031091e-05 -4.28779939e-05 -3.63079635e-05 -1.89604372e-01
 -2.03922150e-05 -3.03995444e-05  4.64583440e-05 -8.71695468e-07
 -4.02382179e-05  7.18524928e-06  5.28107157e-05 -7.41936036e-06
 -9.02848520e-05 -3.46960633e-05  1.69810074e-05  9.11796262e-06
  3.29341836e-05 -1.46443791e-05 -1.91407311e-05 -8.15435693e-06
  2.91783953e-06  3.71023669e-05  2.50790424e-01 -5.67586899e-05
  1.16871305e-05  2.96898601e-05 -3.87417513e-05 -3.12648471e-05
 -3.53440030e-05  8.16237661e-06  4.06993420e-05 -3.70182089e-07
  3.43755413e-05 -6.69383310e-06 -2.66981912e-05 -3.15271483e-05
 -3.28440043e-05  3.00457884e-05 -1.25340012e-05 -3.52124426e-05
  3.99490688e-05 -7.63313

# Naive prox with scad

In [464]:
lr = 0.001
lamb = 1
(U, s, Vh) = svd(X)
beta = Vh[0]
np.random.seed(70)
beta=np.random.normal(size=d)
for i in range(10000):
    beta = beta - lr * PCA_grad(Sigma,beta)
    beta = scad(beta,lr * lamb)
    #beta = beta + 0.001 * np.dot(Sigma, beta)
    #beta=beta/np.linalg.norm(beta)
print(beta)
print(sq_error(beta,beta_star))
print(full_scad_obj(Sigma,beta,lamb))

[ 0.1774826   0.12993044 -0.23967228 -0.08165004  0.         -0.06197503
  0.          0.12380861 -0.26884604 -0.16445795  0.10224173 -0.14153886
 -0.29615802  0.1697793   0.         -0.17683545  0.4565205   0.18957072
 -0.58173457 -0.10202168 -0.37274699 -0.11050367  0.04049804  0.13996664
 -0.05687112 -0.04356633  0.23288091 -0.16915059  0.          0.
 -0.34343587  0.12419591  0.42529974 -0.33964352  0.          0.
  0.          0.          0.09698431  0.23635116 -0.2581341   0.
  0.12521893 -0.37521095  0.2216513   0.          0.28432867 -0.19138299
 -0.00597474  0.26014487  0.09431562  0.          0.61835122  0.
 -0.15087829  0.11448623  0.24611873  0.03502403 -0.2248191  -0.2242475
  0.          0.         -0.17133837  0.31675971  0.29796274  0.
 -0.0945043  -0.1105142  -0.09866021  0.          0.41872746  0.13756427
  0.14022041 -0.11309197 -0.27919444  0.          0.06080437 -0.2548634
  0.         -0.31626801  0.07675778 -0.15665367 -0.03311831  0.
  0.13622498  0.07898932  0.

# Lasso via prox

In [469]:
lr = 0.001
lamb = 1
(U, s, Vh) = svd(X)
beta = Vh[0]
np.random.seed(70)
beta = np.random.normal(size=d)
for i in range(10000):
    # print(SCAD_ccv_grad(beta,lamb))
    beta = beta - lr * (PCA_grad(Sigma,beta))
    beta = prox_l1(beta,lr * lamb)
    #beta = beta + 0.001 * np.dot(Sigma, beta)
    #beta=beta/np.linalg.norm(beta)
print(beta)
print(sq_error(beta,beta_star))
print(full_scad_obj(Sigma,beta,lamb))

[-0.33329601 -0.         -0.99692768 -0.20910204  0.          0.
  0.          0.         -0.         -0.          0.         -0.
  0.          0.15687137 -0.27536408 -0.          0.         -0.
 -0.         -0.          0.         -0.         -0.         -0.
 -0.14374506 -0.          0.01808777 -0.         -0.         -0.03909722
 -0.          0.49437944  0.         -0.          0.00694845  0.
 -0.08762613 -0.          0.03954046  0.14013731  0.10473867  0.
  0.08114674 -0.         -0.         -0.          0.         -0.06873214
  0.          0.          0.          0.          0.          0.
 -0.19359634 -0.          0.14941613 -0.         -0.32933604 -0.08725352
 -0.          0.         -0.12064495  0.03177555 -0.         -0.
 -0.22188636 -0.          0.         -0.         -0.0048389   0.3348858
  0.08501829  0.         -0.          0.         -0.         -0.15517755
  0.          0.          0.05922836  0.         -0.          0.
  0.          0.          0.03985812 -0.          0

# Prox by splitting scad

In [468]:
lr = 0.001
lamb = 1
(U, s, Vh) = svd(X)
beta = Vh[0]
np.random.seed(70)
beta = np.random.normal(size=d)
for i in range(10000):
    # print(SCAD_ccv_grad(beta,lamb))
    beta = beta - lr * (PCA_grad(Sigma,beta)+SCAD_ccv_grad(beta,lamb,a=3.7))
    beta = prox_l1(beta,lr * lamb)
    #beta = beta + 0.001 * np.dot(Sigma, beta)
    #beta=beta/np.linalg.norm(beta)
print(beta)
print(sq_error(beta,beta_star))
print(full_scad_obj(Sigma,beta,lamb))

[-0.33346489 -0.         -0.99701169 -0.20901014  0.          0.
  0.          0.         -0.         -0.          0.         -0.
  0.          0.15682869 -0.27532991 -0.          0.         -0.
 -0.         -0.          0.         -0.         -0.         -0.
 -0.14378173 -0.          0.01816192 -0.         -0.         -0.03922497
 -0.          0.49424789  0.         -0.          0.0069703   0.
 -0.08757461 -0.          0.03965312  0.14023662  0.10470074  0.
  0.08119612 -0.         -0.         -0.          0.         -0.06880874
  0.          0.          0.          0.          0.          0.
 -0.19362532 -0.          0.14943068 -0.         -0.32908911 -0.08728581
 -0.          0.         -0.12073808  0.03186506 -0.         -0.
 -0.22181599 -0.          0.         -0.         -0.00480704  0.33481593
  0.08496825  0.         -0.          0.         -0.         -0.15517858
  0.          0.          0.05921364  0.         -0.          0.
  0.          0.          0.03992101 -0.          

In [335]:
l2_project(beta)

array([ 0.99995717,  0.00850883,  0.        , -0.        ,  0.        ,
        0.        , -0.        , -0.        , -0.        ,  0.00364157])

In [362]:
PCA_grad(Sigma,beta)+SCAD_ccv_grad(beta,lamb,a=3.7)+lamb*np.sign(beta)

array([-1.55431223e-15, -5.55111512e-17, -1.77692011e-01,  7.24407440e-02,
       -2.29567260e-01, -2.89558597e-01,  1.46486986e-01,  4.79587529e-02,
        5.68151406e-02,  0.00000000e+00])

In [366]:
PCA_grad(Sigma,beta)+lamb*np.sign(beta)

array([-1.09912079e-14, -5.55111512e-17, -1.74526818e-01,  7.09576265e-02,
       -2.25288305e-01, -2.84212705e-01,  1.43867883e-01,  4.68470857e-02,
        5.61566606e-02,  0.00000000e+00])

# Prox-MCP

In [435]:
def MCP_ccv_grad(beta,lamb,a=3.7):
    piece1 = piecewise(beta/a,np.abs(beta)<=a*lamb)
    piece2 = piecewise(lamb*np.sign(beta),a*lamb<np.abs(beta))
    return -(piece1+piece2)

In [467]:
lr = 0.001
lamb = 1
(U, s, Vh) = svd(X)
beta = Vh[0]
np.random.seed(70)
beta = np.random.normal(size=d)
for i in range(1000):
    # print(SCAD_ccv_grad(beta,lamb))
    beta = beta - lr * (PCA_grad(Sigma,beta)+MCP_ccv_grad(beta,lamb))
    beta = prox(beta,lr * lamb)
    #beta = beta + 0.001 * np.dot(Sigma, beta)
    #beta=beta/np.linalg.norm(beta)
print(beta)
print(sq_error(beta,beta_star))
print(full_scad_obj(Sigma,beta,lamb))

[-0.00535729 -0.         -0.61348257 -0.15478699  0.         -0.
  0.          0.         -0.10489682 -0.          0.         -0.
 -0.          0.110186   -0.28179378 -0.          0.07998022  0.
 -0.03674899 -0.         -0.         -0.         -0.         -0.13830803
 -0.00389978 -0.          0.         -0.1771918  -0.         -0.
 -0.08442501  0.4784156   0.         -0.          0.          0.
 -0.         -0.          0.0153236   0.03333093  0.          0.
  0.23405114 -0.          0.         -0.          0.         -0.06245266
  0.          0.20226264  0.          0.          0.40387902 -0.
 -0.04598196 -0.          0.03784555 -0.         -0.4877225  -0.02093607
  0.          0.         -0.17424607  0.01187938  0.         -0.
 -0.03523897 -0.         -0.          0.          0.          0.25485659
  0.07739468  0.         -0.          0.          0.         -0.40503046
 -0.         -0.          0.         -0.         -0.          0.
  0.08065139 -0.          0.         -0.04719326  

In [327]:
l2_project(beta)

array([ 9.99983578e-01,  5.68058341e-03,  0.00000000e+00, -0.00000000e+00,
        0.00000000e+00,  0.00000000e+00, -0.00000000e+00, -0.00000000e+00,
       -0.00000000e+00,  7.58097185e-04])

# Misc: Sparse linear regression with scad

In [367]:
X = np.random.multivariate_normal(np.zeros(6),np.eye(6),size=500)
beta_star = np.array([1,2,3,0,0,0])
Y = np.dot(X,beta_star) + np.random.normal(scale=.2,size=500)

In [368]:
# LASSO
lr = 0.01
lamb = 0.25
samp_cov = np.dot(X.T,X)
xty = np.dot(X.T,Y)
beta = np.array([0.1,-0.1,0.2,0.1,0.3,-0.4])
for i in range(10000):
    beta = beta - lr * 1/500 *(np.dot(samp_cov,beta)-xty)
    beta = prox_l1(beta,lr * lamb)

In [369]:
beta

array([0.74472985, 1.7636387 , 2.7332998 , 0.        , 0.        ,
       0.        ])

In [381]:
# Splitting SCAD
lr = 0.01
lamb = .4
samp_cov = np.dot(X.T,X)
xty = np.dot(X.T,Y)
beta = np.array([0.1,-0.1,0.2,0.1,0.3,-0.4])
for i in range(10000):
    beta = beta - lr * 1/500 *(np.dot(samp_cov,beta)-xty)-lr*SCAD_ccv_grad(beta,lamb)
    beta = prox_l1(beta,lr * lamb)
beta

array([0.72265217, 1.99978766, 2.98029579, 0.        , 0.        ,
       0.        ])

In [244]:
1/500 *(np.dot(samp_cov,beta)-xty)+lamb*np.sign(beta)+SCAD_ccv_grad(beta,lamb)

array([ 3.32622818e-13, -1.10778053e-12, -2.22005747e-12, -1.08399197e-02,
        1.99455401e-03,  1.91817508e-02])

In [245]:
beta_scad1 = beta

In [377]:
# Prox gradient with scad, this actually works for debiasing (but check the scad penalty?)
lr = 0.01
lamb = 3
samp_cov = np.dot(X.T,X)
xty = np.dot(X.T,Y)
beta = np.array([0.1,-0.1,0.2,0.1,0.3,-0.4])
for i in range(10000):
    beta = beta - lr * 1/500 *(np.dot(samp_cov,beta)-xty)
    beta = scad(beta,lr * lamb)
beta

array([0.98852003, 0.        , 3.08700832, 0.        , 0.        ,
       0.        ])

In [247]:
1/500 *(np.dot(samp_cov,beta)-xty)+lamb*np.sign(beta)+SCAD_ccv_grad(beta,lamb)

array([ 3.15775310e-01, -2.20379270e-14, -3.63598041e-14, -7.60001987e-03,
        5.00000000e-01, -7.88898215e-03])