In [367]:
import numpy as np

from sklearn.covariance import GraphLasso, GraphLassoCV

# Graph inference

## Graph Lasso (fixed point in time, $t$)

In [368]:
import numpy as np
def sparseinverse_cov(d):
    """Generate a covariance matrix with Sparse Inverse method.
    Sparse graph where each element depends only on a subset of others."""
    # W is a weight symmetric matrix.
    # It has -1 if two vertices are connected, 0 otherwise.
    # The diagonal is the sum of rows (or columns).
    W = np.zeros((d, d))
    for i in range(d):
        W[i, i+1:] = np.random.randint(-1, 1, d-i-1)
    W += W.T
    w = [abs(np.sum(vec)) for vec in W]
    laplacian = W + np.diag(w)
    return np.linalg.inv(laplacian + np.random.randn() ** 2 * np.eye(d))

In [377]:
theta_t = sparseinverse_cov(3)

In [378]:
theta_t

array([[ 0.35534036,  0.11340588,  0.        ],
       [ 0.11340588,  0.35534036,  0.        ],
       [ 0.        ,  0.        ,  0.46874623]])

In [365]:
from sklearn.covariance import GraphLasso, GraphLassoCV

In [444]:
def random_covariance(n):
    Sinv = np.eye(n)
    idx = np.random.randint(2, size=n*n);
    Sinv.flat[idx] = 1
    Sinv = Sinv + Sinv.T
    if np.min(np.linalg.eigh(Sinv)[0]) < 0:
        Sinv = Sinv + 1.1*np.abs(np.min(np.linalg.eigh(Sinv)[0]))*np.eye(n);

    S = np.linalg.inv(Sinv);
    return S, Sinv

In [452]:
n_dim = 10
n_samples = 20
alpha=2*np.sqrt(np.log(n_dim) / n_samples)
print "alpha:", alpha

# true_covariance = sparseinverse_cov(n_dim)
# true_covariance /= np.diag(true_covariance)[0]
true_covariance, Sinv = random_covariance(n_dim)
X = np.random.multivariate_normal(np.zeros(n_dim), true_covariance, n_samples)

alpha: 0.678614042442


In [456]:
gl = GraphLasso(mode='cd', alpha=alpha, verbose=1, max_iter=200)
gl = GraphLassoCV(verbose=1, max_iter=200)
gl.fit(X)

from rgi import admm_covariance; reload(admm_covariance)
from rgi.admm_covariance import covsel

Cov = covsel(X, .01,1,1)

# print gl.error_norm(true_covariance)
# print np.linalg.norm(gl.covariance_ - true_covariance)

[Parallel(n_jobs=1)]: Done   3 out of   3 | elapsed:    0.3s finished
[Parallel(n_jobs=1)]: Done   3 out of   3 | elapsed:    0.0s finished
[Parallel(n_jobs=1)]: Done   3 out of   3 | elapsed:    0.0s finished
[Parallel(n_jobs=1)]: Done   3 out of   3 | elapsed:    0.0s finished


[GraphLassoCV] Done refinement  1 out of 4:   0s
[GraphLassoCV] Done refinement  2 out of 4:   0s
[GraphLassoCV] Done refinement  3 out of 4:   0s
[GraphLassoCV] Done refinement  4 out of 4:   0s


In [461]:
gl.covariance_

array([[  6.13959457e-01,  -2.02034202e-01,   1.60224015e-02,
         -1.06832808e-08,   9.50221619e-02,   0.00000000e+00,
          1.42144938e-10,   4.29065483e-02,  -7.60040622e-02,
         -1.02499018e-01],
       [ -2.02034202e-01,   9.07437714e-01,  -1.66893041e-02,
          8.11251578e-09,  -9.89775130e-02,   0.00000000e+00,
         -1.07939974e-10,  -1.92714994e-01,   3.24677043e-02,
          4.71921097e-02],
       [  1.60224015e-02,  -1.66893041e-02,   2.53363837e-01,
         -1.67636502e-09,   1.05914459e-01,   0.00000000e+00,
          2.23046466e-11,   3.54435251e-03,  -1.33754240e-02,
         -2.85767487e-03],
       [ -1.06832808e-08,   8.11251578e-09,  -1.67636502e-09,
          4.22815912e-01,  -2.34816674e-09,   0.00000000e+00,
         -5.62571956e-03,  -1.72287685e-09,   1.39903284e-09,
          1.85714150e-09],
       [  9.50221619e-02,  -9.89775130e-02,   1.05914459e-01,
         -2.34816674e-09,   6.28135318e-01,   0.00000000e+00,
          3.12432129e-11

In [460]:
Cov

array([[ 0.12128279,  0.        ,  0.        ,  0.        ,  0.        ,
         0.        ,  0.        ,  0.        ,  0.        ,  0.        ],
       [ 0.        ,  0.14374562,  0.        ,  0.        ,  0.        ,
         0.        ,  0.        ,  0.        ,  0.        ,  0.        ],
       [ 0.        ,  0.        ,  0.00791029,  0.        ,  0.        ,
         0.        ,  0.        ,  0.        ,  0.        ,  0.        ],
       [ 0.        ,  0.        ,  0.        ,  0.1363366 ,  0.        ,
         0.        ,  0.        ,  0.        ,  0.        ,  0.        ],
       [ 0.        ,  0.        ,  0.        ,  0.        ,  0.13796079,
         0.        ,  0.        ,  0.        ,  0.        ,  0.        ],
       [ 0.        ,  0.        ,  0.        ,  0.        ,  0.        ,
         0.09562622,  0.        ,  0.        ,  0.        ,  0.        ],
       [ 0.        ,  0.        ,  0.        ,  0.        ,  0.        ,
         0.        ,  0.12545118,  0.        

In [455]:
true_covariance

array([[ 0.66666667, -0.33333333,  0.        ,  0.        ,  0.        ,
         0.        ,  0.        ,  0.        ,  0.        ,  0.        ],
       [-0.33333333,  0.66666667,  0.        ,  0.        ,  0.        ,
         0.        ,  0.        ,  0.        ,  0.        ,  0.        ],
       [ 0.        ,  0.        ,  0.5       ,  0.        ,  0.        ,
         0.        ,  0.        ,  0.        ,  0.        ,  0.        ],
       [ 0.        ,  0.        ,  0.        ,  0.5       ,  0.        ,
         0.        ,  0.        ,  0.        ,  0.        ,  0.        ],
       [ 0.        ,  0.        ,  0.        ,  0.        ,  0.5       ,
         0.        ,  0.        ,  0.        ,  0.        ,  0.        ],
       [ 0.        ,  0.        ,  0.        ,  0.        ,  0.        ,
         0.5       ,  0.        ,  0.        ,  0.        ,  0.        ],
       [ 0.        ,  0.        ,  0.        ,  0.        ,  0.        ,
         0.        ,  0.5       ,  0.        

In [28]:
import pandas as pd
import numpy as np
from sklearn.covariance import GraphLasso, GraphLassoCV, empirical_covariance

df_x = pd.read_csv("/home/fede/projects_local/kdvs/", delimiter='\t', comment='#')
df_y = pd.read_csv("/home/fede/projects_local/petretto/data/fmf_labels.txt", delimiter='\t')

X = df_x.values
y = df_y.values.ravel()

In [33]:
df_x.loc[:, df_x.columns.str.startswith("FMF_C")].values

array([[ 25.69016,  25.48889,  25.51938, ...,  26.62533,  25.77399,
         25.82825],
       [ 20.96733,  21.09571,  16.70307, ...,  24.32992,  23.06976,
         22.20353],
       [ 25.31748,  25.55324,  25.10913, ...,  24.79872,  25.27302,
         25.17329],
       ..., 
       [ 20.97036,  19.91314,  20.00703, ...,  17.62372,  20.86982,
         20.09159],
       [ 21.64689,  21.6591 ,  22.15527, ...,  21.91561,  22.12682,
         22.4864 ],
       [ 22.84217,  23.94699,  23.8261 , ...,  23.58451,  23.27881,
         22.5633 ]])

In [18]:
emp_cov = empirical_covariance(X[y=='A'], assume_centered=False)

In [37]:
gl = GraphLasso(verbose=1, max_iter=200)
gl.fit(df_x.loc[:, df_x.columns.str.startswith("FMF_C")].values.T)

[graph_lasso] Iteration   0, cost  6.99e+03, dual gap -6.170e+03
[graph_lasso] Iteration   1, cost  nan, dual gap nan


FloatingPointError: Non SPD result: the system is too ill-conditioned for this solver. The system is too ill-conditioned for this solver

In [361]:
n, d = 400, 100

X = np.random.randn(n, d)
beta = np.ones(d); beta[:d//2] = 0

y = X.dot(beta) + np.random.randn(n) * 0.1

In [362]:
beta

array([ 0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
        0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
        0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
        0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  1.,  1.,
        1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,
        1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,
        1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,
        1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.])

In [159]:
from sklearn.linear_model import Lasso

beta_lasso = Lasso(alpha=0.1).fit(X, y).coef_

In [97]:
beta_lasso

array([-0.        , -0.        ,  0.        ,  0.        , -0.        ,
        0.84188072,  0.84929439,  0.95823875,  0.88274812,  0.82727549])

In [164]:
from rgi import admm_lasso as lasso; reload(lasso)

z = lasso.lasso(X, y, lamda=0.1, rho=1, alpha=1)

In [165]:
z

array([ 0.02412681,  0.        , -0.02342118,  0.03319861, -0.0155281 ,
        0.99963769,  0.95408747,  1.01082704,  0.97659403,  0.98922421])

In [156]:
from rgi import admm_group_lasso as grouplasso; reload(grouplasso)

z_group = grouplasso.group_lasso(X, y, lamda=0.1, p=[[i] for i in range(10)],
                                 rho=1, alpha=1)

In [157]:
z_group

array([ 0.02291929, -0.        , -0.01804153,  0.03290291, -0.01004133,
        0.99917517,  0.95454922,  1.01078757,  0.97663331,  0.98924856])

In [158]:
print "Error skle: %.3f" % np.linalg.norm(beta_lasso - beta)
print "Error admm: %.3f" % np.linalg.norm(z - beta)
print "Error admm_group: %.3f" % np.linalg.norm(z_group - beta)

Error skle: 0.305
Error admm: 0.073
Error admm_group: 0.070


In [363]:
from rgi import admm_group_lasso_overlap as grouplassooverlap; reload(grouplassooverlap)

z_group = grouplassooverlap.group_lasso_overlap(
        X, y, lamda=2,rho=2, groups=[[0,1,2,3,4], [3,4,5], [5,6,7,8,9]], verbose=True, max_iter=1000, tol=1e-6)

obj: 1.7869, rnorm: 7.0658, snorm: 14.1317,eps_pri: 0.0707, eps_dual: 0.0000
obj: 1.7869, rnorm: 7.0658, snorm: 0.0005,eps_pri: 0.0707, eps_dual: 0.0009
obj: 1.7869, rnorm: 7.0658, snorm: 0.0005,eps_pri: 0.0707, eps_dual: 0.0017
obj: 1.7870, rnorm: 7.0658, snorm: 0.0004,eps_pri: 0.0707, eps_dual: 0.0025
obj: 1.7870, rnorm: 7.0658, snorm: 0.0004,eps_pri: 0.0707, eps_dual: 0.0033
obj: 1.7871, rnorm: 7.0658, snorm: 0.0004,eps_pri: 0.0707, eps_dual: 0.0042
obj: 1.7871, rnorm: 7.0658, snorm: 0.0004,eps_pri: 0.0707, eps_dual: 0.0050
obj: 1.7872, rnorm: 7.0658, snorm: 0.0004,eps_pri: 0.0707, eps_dual: 0.0058
obj: 1.7873, rnorm: 7.0658, snorm: 0.0004,eps_pri: 0.0707, eps_dual: 0.0066
obj: 1.7874, rnorm: 7.0658, snorm: 0.0004,eps_pri: 0.0707, eps_dual: 0.0073
obj: 1.7875, rnorm: 7.0658, snorm: 0.0004,eps_pri: 0.0707, eps_dual: 0.0081
obj: 1.7876, rnorm: 7.0658, snorm: 0.0004,eps_pri: 0.0707, eps_dual: 0.0089
obj: 1.7877, rnorm: 7.0658, snorm: 0.0004,eps_pri: 0.0707, eps_dual: 0.0097
obj: 1.7878

obj: 1.8492, rnorm: 7.0658, snorm: 0.0001,eps_pri: 0.0707, eps_dual: 0.0606
obj: 1.8343, rnorm: 7.0658, snorm: 0.0000,eps_pri: 0.0707, eps_dual: 0.0605
obj: 1.8258, rnorm: 7.0658, snorm: 0.0001,eps_pri: 0.0707, eps_dual: 0.0606
obj: 1.8322, rnorm: 7.0658, snorm: 0.0001,eps_pri: 0.0707, eps_dual: 0.0608
obj: 1.8470, rnorm: 7.0658, snorm: 0.0001,eps_pri: 0.0707, eps_dual: 0.0608
obj: 1.8554, rnorm: 7.0658, snorm: 0.0001,eps_pri: 0.0707, eps_dual: 0.0608
obj: 1.8492, rnorm: 7.0658, snorm: 0.0001,eps_pri: 0.0707, eps_dual: 0.0606
obj: 1.8347, rnorm: 7.0658, snorm: 0.0000,eps_pri: 0.0707, eps_dual: 0.0605
obj: 1.8262, rnorm: 7.0658, snorm: 0.0001,eps_pri: 0.0707, eps_dual: 0.0606
obj: 1.8323, rnorm: 7.0658, snorm: 0.0001,eps_pri: 0.0707, eps_dual: 0.0608
obj: 1.8466, rnorm: 7.0658, snorm: 0.0000,eps_pri: 0.0707, eps_dual: 0.0608
obj: 1.8551, rnorm: 7.0658, snorm: 0.0001,eps_pri: 0.0707, eps_dual: 0.0608
obj: 1.8492, rnorm: 7.0658, snorm: 0.0001,eps_pri: 0.0707, eps_dual: 0.0606
obj: 1.8350,

obj: 1.8337, rnorm: 7.0658, snorm: 0.0000,eps_pri: 0.0707, eps_dual: 0.0607
obj: 1.8342, rnorm: 7.0658, snorm: 0.0001,eps_pri: 0.0707, eps_dual: 0.0607
obj: 1.8412, rnorm: 7.0658, snorm: 0.0000,eps_pri: 0.0707, eps_dual: 0.0608
obj: 1.8478, rnorm: 7.0658, snorm: 0.0000,eps_pri: 0.0707, eps_dual: 0.0608
obj: 1.8474, rnorm: 7.0658, snorm: 0.0001,eps_pri: 0.0707, eps_dual: 0.0607
obj: 1.8404, rnorm: 7.0658, snorm: 0.0000,eps_pri: 0.0707, eps_dual: 0.0607
obj: 1.8339, rnorm: 7.0658, snorm: 0.0000,eps_pri: 0.0707, eps_dual: 0.0607
obj: 1.8343, rnorm: 7.0658, snorm: 0.0001,eps_pri: 0.0707, eps_dual: 0.0607
obj: 1.8411, rnorm: 7.0658, snorm: 0.0000,eps_pri: 0.0707, eps_dual: 0.0608
obj: 1.8476, rnorm: 7.0658, snorm: 0.0000,eps_pri: 0.0707, eps_dual: 0.0608
obj: 1.8473, rnorm: 7.0658, snorm: 0.0001,eps_pri: 0.0707, eps_dual: 0.0607
obj: 1.8405, rnorm: 7.0658, snorm: 0.0000,eps_pri: 0.0707, eps_dual: 0.0607
obj: 1.8341, rnorm: 7.0658, snorm: 0.0000,eps_pri: 0.0707, eps_dual: 0.0607
obj: 1.8344,

obj: 1.8448, rnorm: 7.0658, snorm: 0.0000,eps_pri: 0.0707, eps_dual: 0.0607
obj: 1.8422, rnorm: 7.0658, snorm: 0.0000,eps_pri: 0.0707, eps_dual: 0.0607
obj: 1.8382, rnorm: 7.0658, snorm: 0.0000,eps_pri: 0.0707, eps_dual: 0.0607
obj: 1.8368, rnorm: 7.0658, snorm: 0.0000,eps_pri: 0.0707, eps_dual: 0.0607
obj: 1.8394, rnorm: 7.0658, snorm: 0.0000,eps_pri: 0.0707, eps_dual: 0.0608
obj: 1.8434, rnorm: 7.0658, snorm: 0.0000,eps_pri: 0.0707, eps_dual: 0.0608
obj: 1.8447, rnorm: 7.0658, snorm: 0.0000,eps_pri: 0.0707, eps_dual: 0.0607
obj: 1.8422, rnorm: 7.0658, snorm: 0.0000,eps_pri: 0.0707, eps_dual: 0.0607
obj: 1.8383, rnorm: 7.0658, snorm: 0.0000,eps_pri: 0.0707, eps_dual: 0.0607
obj: 1.8369, rnorm: 7.0658, snorm: 0.0000,eps_pri: 0.0707, eps_dual: 0.0607
obj: 1.8394, rnorm: 7.0658, snorm: 0.0000,eps_pri: 0.0707, eps_dual: 0.0608
obj: 1.8433, rnorm: 7.0658, snorm: 0.0000,eps_pri: 0.0707, eps_dual: 0.0608
obj: 1.8447, rnorm: 7.0658, snorm: 0.0000,eps_pri: 0.0707, eps_dual: 0.0607
obj: 1.8422,

obj: 1.8414, rnorm: 7.0658, snorm: 0.0000,eps_pri: 0.0707, eps_dual: 0.0607
obj: 1.8428, rnorm: 7.0658, snorm: 0.0000,eps_pri: 0.0707, eps_dual: 0.0607
obj: 1.8422, rnorm: 7.0658, snorm: 0.0000,eps_pri: 0.0707, eps_dual: 0.0607
obj: 1.8402, rnorm: 7.0658, snorm: 0.0000,eps_pri: 0.0707, eps_dual: 0.0607
obj: 1.8388, rnorm: 7.0658, snorm: 0.0000,eps_pri: 0.0707, eps_dual: 0.0607
obj: 1.8394, rnorm: 7.0658, snorm: 0.0000,eps_pri: 0.0707, eps_dual: 0.0607
obj: 1.8413, rnorm: 7.0658, snorm: 0.0000,eps_pri: 0.0707, eps_dual: 0.0607
obj: 1.8427, rnorm: 7.0658, snorm: 0.0000,eps_pri: 0.0707, eps_dual: 0.0607
obj: 1.8422, rnorm: 7.0658, snorm: 0.0000,eps_pri: 0.0707, eps_dual: 0.0607
obj: 1.8403, rnorm: 7.0658, snorm: 0.0000,eps_pri: 0.0707, eps_dual: 0.0607
obj: 1.8389, rnorm: 7.0658, snorm: 0.0000,eps_pri: 0.0707, eps_dual: 0.0607
obj: 1.8394, rnorm: 7.0658, snorm: 0.0000,eps_pri: 0.0707, eps_dual: 0.0607
obj: 1.8413, rnorm: 7.0658, snorm: 0.0000,eps_pri: 0.0707, eps_dual: 0.0607
obj: 1.8427,

obj: 1.8409, rnorm: 7.0658, snorm: 0.0000,eps_pri: 0.0707, eps_dual: 0.0607
obj: 1.8400, rnorm: 7.0658, snorm: 0.0000,eps_pri: 0.0707, eps_dual: 0.0607
obj: 1.8398, rnorm: 7.0658, snorm: 0.0000,eps_pri: 0.0707, eps_dual: 0.0607
obj: 1.8407, rnorm: 7.0658, snorm: 0.0000,eps_pri: 0.0707, eps_dual: 0.0607
obj: 1.8416, rnorm: 7.0658, snorm: 0.0000,eps_pri: 0.0707, eps_dual: 0.0607
obj: 1.8417, rnorm: 7.0658, snorm: 0.0000,eps_pri: 0.0707, eps_dual: 0.0607
obj: 1.8409, rnorm: 7.0658, snorm: 0.0000,eps_pri: 0.0707, eps_dual: 0.0607
obj: 1.8400, rnorm: 7.0658, snorm: 0.0000,eps_pri: 0.0707, eps_dual: 0.0607
obj: 1.8399, rnorm: 7.0658, snorm: 0.0000,eps_pri: 0.0707, eps_dual: 0.0607
obj: 1.8407, rnorm: 7.0658, snorm: 0.0000,eps_pri: 0.0707, eps_dual: 0.0607
obj: 1.8416, rnorm: 7.0658, snorm: 0.0000,eps_pri: 0.0707, eps_dual: 0.0607
obj: 1.8417, rnorm: 7.0658, snorm: 0.0000,eps_pri: 0.0707, eps_dual: 0.0607
obj: 1.8409, rnorm: 7.0658, snorm: 0.0000,eps_pri: 0.0707, eps_dual: 0.0607
obj: 1.8400,

In [364]:
z_group

(array([ -1.99138114e-03,   1.20620857e-04,  -2.20907031e-03,
          9.61386058e-04,  -4.74016426e-04,  -2.27889848e-03,
          5.69542498e-03,   5.31156198e-05,   6.03339712e-03,
         -1.88663875e-03,  -2.88993605e-03,   1.83534437e-03,
         -1.25419177e-03,   1.10099644e-02,  -9.61258071e-03,
          5.50669212e-03,  -1.47086859e-03,  -2.49523872e-03,
          8.30950018e-03,   4.56670860e-04,  -1.85372389e-03,
          7.16902692e-03,   4.56240852e-03,   3.76636192e-03,
         -6.31259921e-03,  -1.01628901e-03,   2.52639282e-03,
          3.76011253e-03,  -1.09843852e-02,  -9.05048112e-03,
          9.15938239e-03,   6.68414597e-03,  -3.67748262e-04,
         -8.45160832e-04,  -7.04272561e-04,  -1.19063770e-02,
         -1.18318644e-03,   2.50184912e-04,   3.93907971e-03,
          5.51534846e-03,  -1.36510785e-03,  -4.85967819e-03,
         -7.81958267e-03,  -1.45899166e-03,   6.16639948e-03,
          4.23480840e-03,   7.36442930e-03,   7.51153654e-03,
        