In [145]:
import numpy as np
import time
from sklearn.decomposition import PCA, SparsePCA
from scipy.linalg import svd

In [146]:
def scad(vec, lam, a=3.7):
    return np.multiply(np.multiply(vec - lam * np.sign(vec), np.abs(vec) > lam), np.abs(vec) < (2*lam)) + \
            np.multiply(vec, np.abs(vec) > a * lam) + \
            np.multiply(np.multiply(((a - 1) * vec - np.sign(vec) * lam * a )/ (a - 2), \
                                    np.abs(vec) > (2*lam)), np.abs(vec) < (a*lam))

# Spiked covariance

In [147]:
# Generates n samples from spiked covariance model of dimension d, spike at u with signal strength gamma
def spiked_cov(n,d,u,gamma=1,return_cov=False):
    u = u / np.linalg.norm(u)
    cov = np.eye(d) + gamma * np.outer(u,u)
    X = np.random.multivariate_normal(mu,cov,size=n)
    if return_cov:
        return X, cov
    else:
        return X

# First s entries are 1
def equal_spike(n,d,s,gamma=1,return_cov=False):
    u = np.zeros(d)
    u[:s] = 1
    return spiked_cov(n,d,u,gamma,return_cov)

# First entry is alpha, next s-1 entries are 1
def heavy_spike(n,d,s,gamma=1,alpha=10,return_cov=False):
    u = np.zeros(d)
    u[0] = alpha
    u[1:s] = 1
    return spiked_cov(n,d,u,gamma,return_cov)

In [148]:
n = 200
d = 10
s = 3
X, pop_cov = heavy_spike(n,d,s,return_cov=True)
Sigma = 1/n*np.dot(X.T,X)

In [149]:
sample_vals, sample_vecs = np.linalg.eigh(Sigma)
sample_vecs[:,d-1]*sample_vals[d-1]

array([-1.57947298, -0.39661853, -0.34366175, -0.16656548, -0.19186559,
       -0.33501456, -0.15234172, -0.15672689, -0.19132956,  0.10243337])

In [150]:
pop_vals, pop_vecs = np.linalg.eigh(pop_cov)
pop_vecs[:,d-1]*pop_vals[d-1]

array([1.98029509, 0.19802951, 0.19802951, 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ])

# Useful functions

In [151]:
def piecewise(val,cond):
    return val * cond

# Gradient of pca objective ||Sigma-beta beta^T||_F^2
def PCA_grad(Sigma,beta):
    return 4*(-np.dot(Sigma,beta)+np.sum(np.square(beta))*beta)

# SCAD = L1 - cvx
# This is the gradient of the cvx part
def SCAD_ccv_grad(beta,lamb,a=3.7):
    piece1 = piecewise(0,np.abs(beta)<=lamb)
    piece2 = piecewise(1/(a-1)*(beta-lamb*np.sign(beta)),np.logical_and(lamb<np.abs(beta),np.abs(beta)<=a*lamb))
    piece3 = piecewise(lamb*np.sign(beta),np.abs(beta)>a*lamb)
    return piece1+piece2+piece3

# Proximal operator for L1
def prox_l1(v,c):
    return np.sign(v) * np.maximum(np.abs(v)-c,0)

# Naive prox with scad

In [152]:
lr = 0.001
lamb = 2
(U, s, Vh) = svd(X)
beta = Vh[0]
for i in range(10000):
    beta = beta - lr * PCA_grad(Sigma,beta)
    beta = scad(beta,lr * lamb)
    #beta = beta + 0.001 * np.dot(Sigma, beta)
    #beta=beta/np.linalg.norm(beta)
beta

array([-1.19599482, -0.30032404, -0.26022457, -0.12612527, -0.1452828 ,
       -0.25367682, -0.11535487, -0.11867537, -0.1448769 ,  0.0775637 ])

# Lasso via prox

In [154]:
lr = 0.01
lamb = 0.5
(U, s, Vh) = svd(X)
beta = Vh[0]
for i in range(1000):
    # print(SCAD_ccv_grad(beta,lamb))
    beta = beta - lr * (PCA_grad(Sigma,beta))
    beta = prox_l1(beta,lr * lamb)
    #beta = beta + 0.001 * np.dot(Sigma, beta)
    #beta=beta/np.linalg.norm(beta)
beta

array([-1.23346216, -0.07115705, -0.11594263, -0.        , -0.        ,
       -0.00434701, -0.        , -0.        , -0.        ,  0.        ])

# Prox by splitting scad

In [156]:
lr = 0.01
lamb = 0.5
(U, s, Vh) = svd(X)
beta = Vh[0]
for i in range(10000):
    # print(SCAD_ccv_grad(beta,lamb))
    beta = beta - lr * (PCA_grad(Sigma,beta)+SCAD_ccv_grad(beta,lamb))
    beta = prox_l1(beta,lr * lamb)
    #beta = beta + 0.001 * np.dot(Sigma, beta)
    #beta=beta/np.linalg.norm(beta)
beta

array([-1.20994616, -0.0731463 , -0.12498379, -0.        , -0.        ,
       -0.        , -0.        , -0.        , -0.        ,  0.        ])

# Prox-MCP

In [157]:
def MCP_ccv_grad(beta,lamb,a=3.7):
    piece1 = piecewise(beta/a,np.abs(beta)<=a*lamb)
    piece2 = piecewise(lamb*np.sign(beta),a*lamb<np.abs(beta))
    return piece1+piece2

In [159]:
lr = 0.01
lamb = 0.05
(U, s, Vh) = svd(X)
beta = Vh[0]
for i in range(1000):
    # print(SCAD_ccv_grad(beta,lamb))
    beta = beta - lr * (PCA_grad(Sigma,beta)+MCP_ccv_grad(beta,lamb))
    beta = prox(beta,lr * lamb)
    #beta = beta + 0.001 * np.dot(Sigma, beta)
    #beta=beta/np.linalg.norm(beta)
beta

array([-1.21119667, -0.27002091, -0.23841976, -0.10065838, -0.12502745,
       -0.21785346, -0.09882983, -0.09878744, -0.11976042,  0.05303728])

# Misc: Sparse linear regression with scad

In [168]:
X = np.random.multivariate_normal(np.zeros(6),np.eye(6),size=500)
beta_star = np.array([1,2,3,0,0,0])
Y = np.dot(X,beta_star) + np.random.normal(scale=.2,size=500)

In [169]:
# LASSO
lr = 0.01
lamb = 0.5
samp_cov = np.dot(X.T,X)
xty = np.dot(X.T,Y)
beta = np.array([0.1,-0.1,0.2,0.1,0.3,-0.4])
for i in range(500):
    beta = beta - lr * 1/500 *(np.dot(samp_cov,beta)-xty)
    beta = prox_l1(beta,lr * lamb)

In [170]:
beta

array([0.50670734, 1.49870626, 2.44394081, 0.        , 0.        ,
       0.        ])

In [171]:
# Splitting SCAD
lr = 0.01
lamb = 0.5
samp_cov = np.dot(X.T,X)
xty = np.dot(X.T,Y)
beta = np.array([0.1,-0.1,0.2,0.1,0.3,-0.4])
for i in range(500):
    beta = beta - lr * 1/500 *(np.dot(samp_cov,beta)-xty+SCAD_ccv_grad(beta,lamb))
    beta = prox_l1(beta,lr * lamb)
beta

array([0.50671657, 1.49799937, 2.44290671, 0.        , 0.        ,
       0.        ])

In [173]:
# Prox gradient with scad, this actually works for debiasing
lr = 0.01
lamb = 0.5
samp_cov = np.dot(X.T,X)
xty = np.dot(X.T,Y)
beta = np.array([0.1,-0.1,0.2,0.1,0.3,-0.4])
for i in range(500):
    beta = beta - lr * 1/500 *(np.dot(samp_cov,beta)-xty)
    beta = scad(beta,lr * lamb)
beta

array([0.99076671, 2.00055681, 2.9571877 , 0.        , 0.        ,
       0.        ])