In [1]:
import numpy as np

data

In [2]:
np.random.seed(0)
X = np.random.randn(10, 5)
W = np.zeros((5, 3))
W[2] = 1
Y = X @ W + np.random.randn(10, 3) * 0.1

## Efficient and robust feature selection via joint ℓ2, 1-norms minimization

In [3]:
class RFS:
    def __init__(self, gamma=1.0, max_iter=100):
        self.gamma = gamma
        self.max_iter = max_iter
        
    def fit(self, X, Y):
        n, m = X.shape
        A = np.hstack((X, self.gamma * np.identity(n)))
        D_inv = np.identity(m+n)
        for _ in range(self.max_iter):
            ADA = np.linalg.inv(A @ D_inv @ A.T)
            U = D_inv @ A.T @ ADA @ Y
            D_inv = 2 * np.diag(np.linalg.norm(U, axis=1))
        self.coef_ = U[:m]

In [4]:
%%time
clf = RFS(gamma=1.0, max_iter=100)
clf.fit(X, Y)

CPU times: user 10.2 ms, sys: 1.64 ms, total: 11.8 ms
Wall time: 9.69 ms


In [5]:
print(W)
print(clf.coef_)

[[0. 0. 0.]
 [0. 0. 0.]
 [1. 1. 1.]
 [0. 0. 0.]
 [0. 0. 0.]]
[[-1.67510159e-02  4.83887709e-02  4.62902058e-03]
 [ 5.54272334e-04  7.82781474e-03 -6.27622144e-03]
 [ 1.00539722e+00  1.02888328e+00  9.30122776e-01]
 [ 1.42832702e-02 -4.53268943e-03 -2.30547889e-02]
 [-5.22114126e-02 -3.63653342e-02  3.39052754e-02]]


## SPAMS
`list_groups` がよく分からない

In [6]:
import spams

In [7]:
%%time
list_groups = np.asfortranarray(np.array([0], dtype=np.int32))
alpha0 = np.asfortranarray(np.zeros((X.shape[1], Y.shape[1])))
W_spams = spams.l1L2BCD(
    X=np.asfortranarray(Y), D=np.asfortranarray(X), alpha0=alpha0, list_groups=list_groups, lambda1=100, itermax=100, mode=2
)

CPU times: user 163 µs, sys: 81 µs, total: 244 µs
Wall time: 208 µs


In [8]:
print(W)
print(W_spams)

[[0. 0. 0.]
 [0. 0. 0.]
 [1. 1. 1.]
 [0. 0. 0.]
 [0. 0. 0.]]
[[0. 0. 0.]
 [0. 0. 0.]
 [0. 0. 0.]
 [0. 0. 0.]
 [0. 0. 0.]]


SPAMS 公式のドキュメント  
http://spams-devel.gforge.inria.fr/doc-python/html/doc_spams005.html#sec20

In [9]:
import spams
import numpy as np
import six
import time
myfloat=np.float

np.random.seed(0)
X = np.asfortranarray(np.random.normal(size = (64,100)),dtype=myfloat)
D = np.asfortranarray(np.random.normal(size = (64,200)))
D = np.asfortranarray(D / np.tile(np.sqrt((D*D).sum(axis=0)),(D.shape[0],1)),dtype=myfloat)
ind_groups = np.array(six.moves.xrange(0,X.shape[1],10),dtype=np.int32) #indices of the first signals in each group
# parameters of the optimization procedure are chosen
itermax = 100
tol = 1e-3
mode = spams.PENALTY
lambda1 = 0.15 # squared norm of the residual should be less than 0.1
numThreads = -1 # number of processors/cores to use, the default choice is -1
                # and uses all the cores of the machine
alpha0 = np.zeros((D.shape[1],X.shape[1]),dtype= myfloat,order="F")
tic = time.time()
alpha = spams.l1L2BCD(X,D,alpha0,ind_groups,lambda1 = lambda1,mode = mode,itermax = itermax,tol = tol,numThreads = numThreads)
tac = time.time()
t = tac - tic
print("%f signals processed per second" %(X.shape[1] / t))

914.489044 signals processed per second
