In [159]:
import numpy as np
from sklearn import preprocessing
from sklearn.covariance import GraphicalLassoCV , GraphicalLasso
from sklearn.metrics import precision_recall_fscore_support
from synthetic import generate_dataset
from adjacency_score import performance_metrics


In [160]:
n_obs = 1000
n_features = 20
nb_edges = 3 #average, per node
n_classes = 3

X_s,y_s,graph_s = generate_dataset(nb_classes=n_classes,nb_obs=n_obs,nb_features=n_features,nb_edges=nb_edges,\
                                           nb_characteristic_features=3,signal=0.8,diffusion_coefficient=0.5,noise=0.0,model="ER",random_seed=0)

In [161]:
true_adj_s = np.array(graph_s.get_adjacency().data)
true_adj_s = ((true_adj_s+ true_adj_s.transpose())>0).astype(int) # in case the graph was directed

In [162]:
# standardize the features, as suggested in https://scikit-learn.org/stable/modules/covariance.html#sparse-inverse-covariance
X_scaled = preprocessing.scale(X_s)

In [166]:
# GraphicalLassoCV tends to select an alpha which results in too many FP.
# Might be worth it to implement our own CV method (here I just picked one good alpha value)

#cov = GraphicalLassoCV().fit(X2_scaled)
cov = GraphicalLasso(alpha=0.15).fit(X_scaled)

In [167]:
precision_matrix = cov.get_precision()
est_adj_s = precision_matrix.astype(bool).astype(int)
# The adjacency matrix has no self-edge, so remove the diagonal
est_adj_s-=np.eye(n_features,dtype=int) 

In [168]:
ps,rs,fs,ss = precision_recall_fscore_support(true_adj_s, est_adj_s, average = 'micro')
print("Precision: " + str(ps))
print("Recall: " + str(rs))

Precision: 0.835820895522388
Recall: 0.9333333333333333
