In [1]:
%load_ext autoreload
%autoreload 3

import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
sns.set_theme(style="whitegrid")
import networkx as nx
import scipy
import sklearn
import time
import pickle
import warnings

# methods
from gaccord import GraphicalAccord, GraphicalConcord
from inverse_covariance import QuicGraphicalLasso
from rpy2.robjects.packages import importr
import rpy2.robjects.numpy2ri

# utils
import sys
sys.path.append('../utils')
from utils import standardize, partial_corr, partial_corr_to_precision, compute_average_norm, pseudo_BIC, gauss_BIC, proj_precision_mat
from generate_graphs import generate_erdos_renyi, generate_data

In [2]:
# select graph structure = ['hub_network', 'erdos_renyi']
graph_structure = 'hub_network'

### Generate graph and data

In [3]:
if graph_structure == 'hub_network':
    # we use pre-made hub-network graph structure, which was constructed by the following procedure:
    # (1) create a Barabasi-Albert scale-free graph
    # (2) randomly choose 5% of the nodes to be hub nodes
    # (3) for each hub node, construct a complete sub-graph (clique)
    Skel = np.genfromtxt('../data/hub_network_structure.txt', delimiter=',')

    n, p = 500, 1000
    n_prop_to_p = [0.5]
    random_state = 2023
    lower_weight, upper_weight = 0.5, 1.0

    # projection method
    np.random.seed(random_state)
    edge_weights = np.random.uniform(low=lower_weight, high=upper_weight, size=(p,p))
    edge_signs = np.random.choice([-1,1], size=(p,p))
    Theta = np.multiply(edge_weights, edge_signs)
    Theta = np.multiply(Skel, Theta)
    Theta = np.tril(Theta) + np.tril(Theta).T
    nz_indx = np.nonzero(Theta)
    for i in range(100):
        Theta = proj_precision_mat(Theta, nz_indx)
        if np.linalg.cond(Theta) < 20:
            break

    Theta = np.real(Theta)
    # spread diagonal of precision matrix
    spread_diag=[1, 3]
    d = np.random.uniform(spread_diag[0], spread_diag[1], p)
    Theta = np.diag(d) @ Theta @ np.diag(d)
    Rho = partial_corr(Theta)
    Sigma = np.linalg.inv(Theta)

    Xs = generate_data(p, n_prop_to_p, Sigma, N=1, standardize=False, random_state=2023)
    X = Xs[0]

elif graph_structure == 'erdos_renyi':
    n, p = 500, 1000
    n_prop_to_p = [0.5]
    Theta, Sigma = generate_erdos_renyi(p, type='proj', edge_prob=0.01, lower_weight=0.5, upper_weight=1.0, spread_diag=[1, 3], random_state=2023)
    Rho = partial_corr(Theta)

    Xs = generate_data(p, n_prop_to_p, Sigma, N=1, standardize=False, random_state=2023)
    X = Xs[0]

### Run ACCORD

In [4]:
accord_pbics = []

S = np.matmul(X.T, X)/n
S.flat[::S.shape[0] + 1] = 0
lam_max = np.max(np.abs(S))
lam_min = 0.1 * lam_max
lams = np.logspace(np.log10(lam_min), np.log10(lam_max), 30)
if graph_structure == 'hub_network':
    lams_accord = lams[::-1][15:27]
elif graph_structure == 'erdos_renyi':
    lams_accord = lams[::-1][8:20]

random_state = 2023
np.random.seed(random_state)
for lam in lams_accord:
    # ACCORD
    model = GraphicalAccord(Omega_star=np.eye(p), lam1=lam, stepsize_multiplier=1.0, backtracking=True, epstol=1e-7, maxitr=100)
    model.fit(X)
    Omega_hat = model.omega_.toarray()

    # set w_ij = w_ji = 0 if at least one of them is 0 and average nonzero values
    zero_indices = np.where((Omega_hat == 0) | (Omega_hat.T == 0))
    Omega_hat[zero_indices] = 0
    Theta_hat = 0.5 * ((np.diag(np.diag(Omega_hat)) @ Omega_hat) + (Omega_hat.T @ np.diag(np.diag(Omega_hat))))
    
    accord_pbics.append(pseudo_BIC(X, Theta_hat, modified=False))

# use optimal lambda based on p-bic
best_lam_accord = lams_accord[np.argmin(accord_pbics)]
model = GraphicalAccord(Omega_star=np.eye(p), lam1=best_lam_accord, stepsize_multiplier=1.0, backtracking=True, epstol=1e-7, maxitr=100)
model.fit(X)
Omega_hat = model.omega_.toarray()
zero_indices = np.where((Omega_hat == 0) | (Omega_hat.T == 0))
Omega_hat[zero_indices] = 0
Theta_hat = 0.5 * ((np.diag(np.diag(Omega_hat)) @ Omega_hat) + (Omega_hat.T @ np.diag(np.diag(Omega_hat))))
Rho_hat = partial_corr(Theta_hat)

total_accord_prec, TP_accord_prec, FP_accord_prec, FN_accord_prec, diag_accord_prec, count_TP_accord_prec, count_FP_accord_prec, count_FN_accord_prec = compute_average_norm(Theta, Theta_hat)
total_accord_corr, TP_accord_corr, FP_accord_corr, FN_accord_corr, diag_accord_corr, count_TP_accord_corr, count_FP_accord_corr, count_FN_accord_corr = compute_average_norm(Rho, Rho_hat)

### Run CONCORD

In [5]:
concord_pbics = []

S = np.matmul(X.T, X)/n
S.flat[::S.shape[0] + 1] = 0
lam_max = np.max(np.abs(S))
lam_min = 0.1 * lam_max
lams = np.logspace(np.log10(lam_min), np.log10(lam_max), 30)
if graph_structure == 'hub_network':
    lams_concord = lams[::-1][15:27]
elif graph_structure == 'erdos_renyi':
    lams_concord = lams[::-1][8:20]

random_state = 2023
np.random.seed(random_state)
for lam in lams_concord:
    # CONCORD
    model = GraphicalConcord(Omega_star=np.eye(p), lam1=lam, backtracking=True, epstol=1e-7, maxitr=100)
    model.fit(X)
    Theta_hat = model.omega_.toarray()
    
    concord_pbics.append(pseudo_BIC(X, Theta_hat, modified=False))

# use optimal lambda based on p-bic
best_lam_concord = lams_concord[np.argmin(concord_pbics)]
model = GraphicalConcord(Omega_star=np.eye(p), lam1=best_lam_concord, backtracking=True, epstol=1e-7, maxitr=100)
model.fit(X)
Theta_hat = model.omega_.toarray()
Rho_hat = partial_corr(Theta_hat)

total_concord_prec, TP_concord_prec, FP_concord_prec, FN_concord_prec, diag_concord_prec, count_TP_concord_prec, count_FP_concord_prec, count_FN_concord_prec = compute_average_norm(Theta, Theta_hat)
total_concord_corr, TP_concord_corr, FP_concord_corr, FN_concord_corr, diag_concord_corr, count_TP_concord_corr, count_FP_concord_corr, count_FN_concord_corr = compute_average_norm(Rho, Rho_hat)

### Run Glasso

In [6]:
%%capture

glasso_gbics = []

S = np.matmul(X.T, X)/n
S.flat[::S.shape[0] + 1] = 0
lam_max = np.max(np.abs(S))
lam_min = 0.1 * lam_max
lams = np.logspace(np.log10(lam_min), np.log10(lam_max), 30)
if graph_structure == 'hub_network':
    lams_glasso = lams[::-1][15:27]
elif graph_structure == 'erdos_renyi':
    lams_glasso = lams[::-1][12:24]

random_state = 2023
np.random.seed(random_state)
for lam in lams_glasso:
    quic = QuicGraphicalLasso(lam=lam, max_iter=100, init_method='cov', auto_scale=False).fit(X)
    Theta_hat = quic.precision_

    glasso_gbics.append(gauss_BIC(X, Theta_hat))

# use optimal lambda based on g-bic
best_lam_glasso = lams_glasso[np.argmin(glasso_gbics)]
quic = QuicGraphicalLasso(lam=best_lam_glasso, max_iter=100, init_method='cov', auto_scale=False).fit(X)
Theta_hat = quic.precision_
Rho_hat = partial_corr(Theta_hat)

total_glasso_prec, TP_glasso_prec, FP_glasso_prec, FN_glasso_prec, diag_glasso_prec, count_TP_glasso_prec, count_FP_glasso_prec, count_FN_glasso_prec = compute_average_norm(Theta, Theta_hat)
total_glasso_corr, TP_glasso_corr, FP_glasso_corr, FN_glasso_corr, diag_glasso_corr, count_TP_glasso_corr, count_FP_glasso_corr, count_FN_glasso_corr = compute_average_norm(Rho, Rho_hat)

### Run SPACE

In [7]:
%%capture

# import SPACE
rpy2.robjects.numpy2ri.activate()
space = importr('space')

space_pbics = []

if graph_structure == 'hub_network':
    lams_space = np.logspace(np.log10(40), np.log10(200), 12)
elif graph_structure == 'erdos_renyi':
    lams_space = np.logspace(np.log10(40), np.log10(200), 12)

random_state = 2023
np.random.seed(random_state)
for lam in lams_space:
    prec = space.space_joint(X, np.array([lam]))
    Theta_hat = np.array(prec[0])

    space_pbics.append(pseudo_BIC(X, Theta_hat, modified=False))

# use optimal lambda based on p-bic
best_lam_space = lams_space[np.argmin(space_pbics)]
prec = space.space_joint(X, np.array([best_lam_space]))
Theta_hat = partial_corr_to_precision(prec[0], prec[1])
Rho_hat = np.array(prec[0])

total_space_prec, TP_space_prec, FP_space_prec, FN_space_prec, diag_space_prec, count_TP_space_prec, count_FP_space_prec, count_FN_space_prec = compute_average_norm(Theta, Theta_hat)
total_space_corr, TP_space_corr, FP_space_corr, FN_space_corr, diag_space_corr, count_TP_space_corr, count_FP_space_corr, count_FN_space_corr = compute_average_norm(Rho, Rho_hat)

In [27]:
print('< Results based on partial correlation matrix >', '\n')

print('- ACCORD')
print(f'Total error: {total_accord_corr:.2f}')
print(f'TP error (# of TP): {TP_accord_corr:.3f} ({count_TP_accord_corr})')
print(f'FP error (# of FP): {FP_accord_corr:.3f} ({count_FP_accord_corr})')
print(f'FN error (# of FN): {FN_accord_corr:.3f} ({count_FN_accord_corr})', '\n')

print('- CONCORD')
print(f'Total error: {total_concord_corr:.2f}')
print(f'TP error (# of TP): {TP_concord_corr:.3f} ({count_TP_concord_corr})')
print(f'FP error (# of FP): {FP_concord_corr:.3f} ({count_FP_concord_corr})')
print(f'FN error (# of FN): {FN_concord_corr:.3f} ({count_FN_concord_corr})', '\n')

print('- Glasso')
print(f'Total error: {total_glasso_corr:.2f}')
print(f'TP error (# of TP): {TP_glasso_corr:.3f} ({count_TP_glasso_corr})')
print(f'FP error (# of FP): {FP_glasso_corr:.3f} ({count_FP_glasso_corr})')
print(f'FN error (# of FN): {FN_glasso_corr:.3f} ({count_FN_glasso_corr})', '\n')

print('- SPACE')
print(f'Total error: {total_space_corr:.2f}')
print(f'TP error (# of TP): {TP_space_corr:.3f} ({count_TP_space_corr})')
print(f'FP error (# of FP): {FP_space_corr:.3f} ({count_FP_space_corr})')
print(f'FN error (# of FN): {FN_space_corr:.3f} ({count_FN_space_corr})')

< Results based on partial correlation matrix > 

- ACCORD
Total error: 11.01
TP error (# of TP): 0.195 (1964)
FP error (# of FP): 0.027 (2536)
FN error (# of FN): 0.153 (1906) 

- CONCORD
Total error: 12.09
TP error (# of TP): 0.221 (2063)
FP error (# of FP): 0.017 (3598)
FN error (# of FN): 0.156 (1807) 

- Glasso
Total error: 15.50
TP error (# of TP): 0.325 (285)
FP error (# of FP): 0.016 (64)
FN error (# of FN): 0.242 (3585) 

- SPACE
Total error: 15.54
TP error (# of TP): 0.334 (218)
FP error (# of FP): 0.015 (10)
FN error (# of FN): 0.244 (3652)


In [25]:
print('< Results based on precision matrix >', '\n')

print('- ACCORD')
print(f'Total error: {total_accord_prec:.2f}')
print(f'TP error (# of TP): {TP_accord_prec:.3f} ({count_TP_accord_prec})')
print(f'FP error (# of FP): {FP_accord_prec:.3f} ({count_FP_accord_prec})')
print(f'FN error (# of FN): {FN_accord_prec:.3f} ({count_FN_accord_prec})')
print(f'Diag error: {diag_accord_prec:.3f}', '\n')

print('- CONCORD')
print(f'Total error: {total_concord_prec:.2f}')
print(f'TP error (# of TP): {TP_concord_prec:.3f} ({count_TP_concord_prec})')
print(f'FP error (# of FP): {FP_concord_prec:.3f} ({count_FP_concord_prec})')
print(f'FN error (# of FN): {FN_concord_prec:.3f} ({count_FN_concord_prec})')
print(f'Diag error: {diag_concord_prec:.3f}', '\n')

print('- Glasso')
print(f'Total error: {total_glasso_prec:.2f}')
print(f'TP error (# of TP): {TP_glasso_prec:.3f} ({count_TP_glasso_prec})')
print(f'FP error (# of FP): {FP_glasso_prec:.3f} ({count_FP_glasso_prec})')
print(f'FN error (# of FN): {FN_glasso_prec:.3f} ({count_FN_glasso_prec})')
print(f'Diag error: {diag_glasso_prec:.3f}', '\n')

print('- SPACE')
print(f'Total error: {total_space_prec:.2f}')
print(f'TP error (# of TP): {TP_space_prec:.3f} ({count_TP_space_prec})')
print(f'FP error (# of FP): {FP_space_prec:.3f} ({count_FP_space_prec})')
print(f'FN error (# of FN): {FN_space_prec:.3f} ({count_FN_space_prec})')
print(f'Diag error: {diag_space_prec:.3f}')

< Results based on precision matrix > 

- ACCORD
Total error: 73.52
TP error (# of TP): 1.054 (1964)
FP error (# of FP): 0.040 (2536)
FN error (# of FN): 0.750 (1906)
Diag error: 1.466 

- CONCORD
Total error: 122.38
TP error (# of TP): 1.202 (2063)
FP error (# of FP): 0.020 (3598)
FN error (# of FN): 0.797 (1807)
Diag error: 3.293 

- Glasso
Total error: 87.05
TP error (# of TP): 0.904 (285)
FP error (# of FP): 0.012 (64)
FN error (# of FN): 1.134 (3585)
Diag error: 1.654 

- SPACE
Total error: 86.84
TP error (# of TP): 0.954 (218)
FP error (# of FP): 0.017 (10)
FN error (# of FN): 1.131 (3652)
Diag error: 1.635
