In [36]:
import sys
sys.path.append('../../src/')
import torch
import pandas as pd
import numpy as np
import pickle
import argparse
import networkx as nx
from torch_geometric.utils import dense_to_sparse, degree
import matplotlib.pyplot as plt
from utils.classificationnet import GCNSynthetic
from utils.utils import normalize_adj, get_neighbourhood

In [37]:
import warnings
warnings.filterwarnings("ignore")

In [38]:
torch.set_printoptions(precision=8)
header = ["node_idx", "new_idx", "cf_adj", "sub_adj", "cf", "num_nodes", "node_dict", "y_pred", "y_pred_cf",
            "label",  "loss_total", "loss_pred", "loss_graph_dist","found"]

In [39]:
# For original model
dataset = "syn1"
hidden = 20
seed = 37
dropout = 0.0

# Load original dataset and model

with open("../../data/gnn_explainer/{}.pickle".format(dataset), "rb") as f:
	data = pickle.load(f)

adj = torch.Tensor(data["adj"]).squeeze()       # Does not include self loops
features = torch.Tensor(data["feat"]).squeeze()
labels = torch.tensor(data["labels"]).squeeze()
idx_train = torch.tensor(data["train_idx"])
idx_test = torch.tensor(data["test_idx"])
edge_index = dense_to_sparse(adj)

norm_adj = normalize_adj(adj)

model = GCNSynthetic(nfeat=features.shape[1], nhid=hidden, nout=hidden,
                     nclass=len(labels.unique()), dropout=dropout)
model.load_state_dict(torch.load("../../models/gcn_3layer_{}.pt".format(dataset)))
model.eval()
output = model(features, norm_adj)
y_pred_orig = torch.argmax(output, dim=1)
print("train set y_true counts: {}".format(np.unique(labels[idx_train].numpy(), return_counts=True)))
print("train set y_pred_orig counts: {}".format(np.unique(y_pred_orig[idx_train].numpy(), return_counts=True)))
#print("Whole graph counts: {}".format(np.unique(labels.numpy(), return_counts=True)))
print("test set y_true counts: {}".format(np.unique(labels[idx_test].numpy(), return_counts=True)))
print("test set y_pred_orig counts: {}".format(np.unique(y_pred_orig[idx_test].numpy(), return_counts=True)))
print("Whole graph counts: {}".format(np.unique(labels.numpy(), return_counts=True)))

train set y_true counts: (array([0, 1, 2, 3]), array([238, 131, 128,  63]))
train set y_pred_orig counts: (array([0, 1, 2, 3]), array([238, 136, 114,  72]))
test set y_true counts: (array([0, 1, 2, 3]), array([62, 29, 32, 17]))
test set y_pred_orig counts: (array([0, 1, 2, 3]), array([62, 30, 30, 18]))
Whole graph counts: (array([0, 1, 2, 3]), array([300, 160, 160,  80]))


### LOADING RESULTS TO DATAFRAME

save_prefix list:
1. inductive_non0_correct_only
2. inductive_non0_correct_del_only
3. inductive_non0_correct_adaptive
4. ablation_feats
5. ablation_feats_degree
6. ablation_feats_entropy
7. ablation_feats_onehot
8. ablation_feats_deg_entropy
9. ablation_feats_deg_onehot
10. ablation_feats_entropy_onehot

In [40]:
save_prefix = "ablation_feats_degree" #'inductive_non0_correct_only'

In [41]:
train=False #keep true if training and then testing, else keep False when directly testing using saved models

In [42]:
if(train):
    with open("../../results/{}/{}_train_checkpoint_{}.pkl".format(dataset, dataset, save_prefix), "rb") as ftrain:
        cf_examples_train = pickle.load(ftrain)
    ftrain.close()

# # Load cf examples for test set
with open("../../results/{}/{}_test_{}.pkl".format(dataset, dataset, save_prefix), "rb") as ftest:
    cf_examples_test = pickle.load(ftest)
ftest.close()

In [43]:
if(train):
    df_train = pd.DataFrame(columns=header)
    df_train_not_found = pd.DataFrame(columns=header)
    for key in cf_examples_train.keys():
        val={}
        i=0
        for head in header:
            if(head == "node_idx"):
                val[head] = key
            else:
                if(torch.is_tensor(cf_examples_train[key][i])):
                    val[head] = cf_examples_train[key][i].detach().cpu().numpy()
                else:
                    val[head] = cf_examples_train[key][i]
                i+=1
        if(val['found'] == 'not found'):
            df_train_not_found = df_train_not_found.append(val, ignore_index=True)
        else:
            df_train = df_train.append(val, ignore_index=True)

    print(len(df_train), len(df_train_not_found))

In [44]:
df_test = pd.DataFrame(columns=header)
df_test_not_found = pd.DataFrame(columns=header)
for key in cf_examples_test.keys():
    val={}
    i=0
    for head in header:
        if(head == "node_idx"):
            val[head] = key
        else:
            if(torch.is_tensor(cf_examples_test[key][i])):
                val[head] = cf_examples_test[key][i].detach().cpu().numpy()
            else:
                val[head] = cf_examples_test[key][i]
            i+=1
    if(val['found'] == 'not found'):
        df_test_not_found = df_test_not_found.append(val, ignore_index=True)
    else:
        df_test = df_test.append(val, ignore_index=True)

len(df_test), len(df_test_not_found)

(69, 7)

#### QUANTITATIVE ANALYSIS

In [45]:
if(train):
    print("Num cf examples found during train: {}/{}".format(len(df_train), len(df_train)+len(df_train_not_found)))
print("Num cf examples found during test: {}/{}".format(len(df_test), len(df_test)+len(df_test_not_found)))
if(train):
    print("Average graph distance train: {}".format(np.mean(df_train["loss_graph_dist"])))
print("Average graph distance test: {}".format(np.mean(df_test["loss_graph_dist"])))

Num cf examples found during test: 69/76
Average graph distance test: 4.028985507246377


#### AVG EXPLANATION SIZE

In [46]:
if(train):
  avg_cf_len = 0
  cf_size_list_train = []
  cf_size_dict_train = {}
  for i in range(len(df_train)) :
    cf_len = len(df_train.loc[i, "cf"])
    avg_cf_len += cf_len
    cf_size_list_train.append(cf_len)
    label = df_train.loc[i, "label"].item()
    if(label not in cf_size_dict_train.keys()):
      cf_size_dict_train[label] = [cf_len]
    else:
      cf_size_dict_train[label].append(cf_len)

  print("Average counterfactual length train: ", np.mean(cf_size_list_train), avg_cf_len/len(df_train))
  print("Std Dev counterfactual length train: ", np.std(cf_size_list_train))

In [47]:
if(train):
    for key in cf_size_dict_train.keys():
        print("Label: {}, Avg CF Size: {:.2f} +- {:.2f}".format(key, np.mean(cf_size_dict_train[key]), np.std(cf_size_dict_train[key])))

In [48]:
avg_cf_len = 0
cf_size_list_test = []
cf_size_dict_test = {}
for i in range(len(df_test)) :
  cf_len = len(df_test.loc[i, "cf"])
  avg_cf_len += cf_len
  cf_size_list_test.append(cf_len)
  label = df_test.loc[i, "label"].item()
  if(label not in cf_size_dict_test.keys()):
    cf_size_dict_test[label] = [cf_len]
  else:
    cf_size_dict_test[label].append(cf_len)


print("Average counterfactual length test: ", np.mean(cf_size_list_test), avg_cf_len/len(df_test))
print("Std Dev counterfactual length test: ", np.std(cf_size_list_test))

Average counterfactual length test:  4.028985507246377 4.028985507246377
Std Dev counterfactual length test:  4.276565784929941


In [49]:
for key in cf_size_dict_test.keys():
    print("Label: {}, Avg CF Size: {:.2f} +- {:.2f}".format(key, np.mean(cf_size_dict_test[key]), np.std(cf_size_dict_test[key])))

Label: 2, Avg CF Size: 1.17 +- 0.45
Label: 1, Avg CF Size: 9.65 +- 2.61
Label: 3, Avg CF Size: 1.31 +- 0.68


#### FIDELITY

AVERAGE FIDELITY

In [50]:
if(train):
    print("Avg fidelity train: {:.3f}".format(1 - len(df_train) / (len(df_train)+len(df_train_not_found))))
print("Avg fidelity test: {:.3f}".format(1 - len(df_test) / (len(df_test)+len(df_test_not_found))))

Avg fidelity test: 0.092


LABEL-WISE FIDELITY

In [51]:
if(train):
    label_found_train = [len(df_train[df_train['label'] == 0]),len(df_train[df_train['label'] == 1]),
    len(df_train[df_train['label'] == 2]), len(df_train[df_train['label'] == 3])] 
    print(label_found_train)
    label_not_found_train = [len(df_train_not_found[df_train_not_found['label'] == 0]),
    len(df_train_not_found[df_train_not_found['label'] == 1]),
    len(df_train_not_found[df_train_not_found['label'] == 2]),
    len(df_train_not_found[df_train_not_found['label'] == 3])]
    print(label_not_found_train)
    for i in range(len(label_found_train)):
        if(label_found_train[i]+label_not_found_train[i] == 0):
            continue
        fidelity = 1 - (label_found_train[i]/(label_found_train[i]+label_not_found_train[i]))
        print('Fidelity of label {} (train) : {:.3f}'.format(i, fidelity))

In [52]:
label_found_test = [len(df_test[df_test['label'] == 0]),len(df_test[df_test['label'] == 1]),
len(df_test[df_test['label'] == 2]), len(df_test[df_test['label'] == 3])] 
print(label_found_test)
label_not_found_test = [len(df_test_not_found[df_test_not_found['label'] == 0]),
len(df_test_not_found[df_test_not_found['label'] == 1]),
len(df_test_not_found[df_test_not_found['label'] == 2]),
len(df_test_not_found[df_test_not_found['label'] == 3])]
print(label_not_found_test)
## label wise fidelity
for i in range(len(label_found_test)):
    if(label_found_test[i]+label_not_found_test[i] == 0):
        continue
    fidelity = 1 - (label_found_test[i]/(label_found_test[i]+label_not_found_test[i]))
    print('Fidelity of label {} (test) : {:.3f}'.format(i, fidelity))

[0, 23, 30, 16]
[0, 6, 0, 1]
Fidelity of label 1 (test) : 0.207
Fidelity of label 2 (test) : 0.000
Fidelity of label 3 (test) : 0.059


#### ACCURACY

In [53]:
if(train):
    accuracy = []
    # Get original predictions 
    dict_ypred_orig = dict(zip(sorted(np.concatenate((idx_train.numpy(), idx_test.numpy()))), 
                                y_pred_orig.numpy()))

    for i in range(len(df_train)): 
        node_idx = df_train["node_idx"][i]
        new_idx = df_train["new_idx"][i]

        _, _, _, node_dict = get_neighbourhood(int(node_idx), edge_index, 4, features, labels)

        # Confirm idx mapping is correct
        if node_dict[node_idx] == df_train["new_idx"][i]:
            cf = df_train["cf"][i]
            correct=0
            nodes_in_motif = []
            for perb in cf:
                u = perb[0]
                v = perb[1]
                if(perb[2] == 'add'): #assuming all additions to be correct
                    correct+=1
                else: #deletion only between motif node as correct, u is the target node, v is the other node
                    if(dict_ypred_orig[v] != 0 and dict_ypred_orig[u] != 0):
                        correct+=1
                        nodes_in_motif.append(v)
            correct = correct/len(cf)
            
            accuracy.append([node_idx, new_idx, cf, nodes_in_motif, correct])
            # print(correct)
            # print(accuracy)
    df_accuracy = pd.DataFrame(accuracy, columns=["node_idx", "new_idx", "cf", "nodes_in_motif", "prop_correct"])
    print("model: {}, Accuracy: {:.4f}".format(save_prefix, np.mean(df_accuracy["prop_correct"])))

In [54]:
accuracy = []
# Get original predictions 
dict_ypred_orig = dict(zip(sorted(np.concatenate((idx_train.numpy(), idx_test.numpy()))), 
                            y_pred_orig.numpy()))
for i in range(len(df_test)): 
    node_idx = df_test["node_idx"][i]
    new_idx = df_test["new_idx"][i]

    _, _, _, node_dict = get_neighbourhood(int(node_idx), edge_index, 4, features, labels)

    # Confirm idx mapping is correct
    if node_dict[node_idx] == df_test["new_idx"][i]:
        cf = df_test["cf"][i]
        correct=0
        nodes_in_motif = []
        for perb in cf:
            u = perb[0]
            v = perb[1]
            if(perb[2] == 'add'): #assuming all additions to be correct
                correct+=1
            else: #deletion only between motif node as correct, u is the target node, v is the other node
                if(dict_ypred_orig[v] != 0 and dict_ypred_orig[u] != 0):
                    correct+=1
                    nodes_in_motif.append(v)
        correct = correct/len(cf)
        
        accuracy.append([node_idx, new_idx, cf, nodes_in_motif, correct])
        # print(correct)
        # print(accuracy)
df_accuracy_test = pd.DataFrame(accuracy, columns=["node_idx", "new_idx", "cf", "nodes_in_motif", "prop_correct"])
print("model: {}, Accuracy: {:.4f}".format(save_prefix, np.mean(df_accuracy_test["prop_correct"])))

model: ablation_feats_degree, Accuracy: 0.9889


#