## 1. Load Data from URL.

In [None]:
import requests
import json
import time
def getUrl(url):
    r = requests.get(url)
    return r


In [None]:
# Cora_GCN
url_list = ["http://localhost:7777/api/graph_info?dataset_id=4&model_id=4&explain_id=4&graph_id=1"]
r = getUrl(url_list[0])
receive_obj = json.loads(r.text)
print(len(r.text))

In [None]:
url_list = ["http://localhost:7777/api/rule_mining?dataset_id=4"]
r = getUrl(url_list[0])
receive_obj = json.loads(r.text)
print(len(r.text))

In [None]:
for dataset_id in [4,5,6,7,9,13]:
    #dataset_id = dataset_id
    start_time = time.time()
    url_list = ["http://localhost:7777/api/rule_mining?dataset_id={}".format(dataset_id)]
    r = getUrl(url_list[0])
    print(dataset_id, time.time() - start_time)

In [None]:
for dataset_id in [4,5,6,7,9,13]:
    #dataset_id = dataset_id
    start_time = time.time()
    url_list = ["http://localhost:7777/api/graph_bundle_info?dataset_id={}".format(dataset_id)]
    r = getUrl(url_list[0])
    print(dataset_id, time.time() - start_time)

## 2. Test Compress and Decompress.

In [None]:
import json
import gzip

def compress_data(data):
    # Convert to JSON
    json_data = json.dumps(data, indent=2)
    # Convert to bytes
    encoded = json_data.encode('utf-8')
    # Compress
    compressed = gzip.compress(encoded)
    return compressed
def decompress_data(compressed):
    # Decompress
    encoded = gzip.decompress(compressed)
    # Convert to string
    json_data = encoded.decode('utf-8')
    # Convert to JSON
    data = json.loads(json_data)
    return data
import time
start_time = time.time()
compressed = compress_data(receive_obj)
data = decompress_data(compressed)
print(time.time() - start_time)

## 3. Cache JSON.

In [1]:
import requests
import time
import json
import gzip
def getUrl(url):
    r = requests.get(url)
    return r
for dataset_id in [4,5,6,7,9,13]:
    #dataset_id = dataset_id
    start_time = time.time()
    url_list = ["http://localhost:7777/api/graph_bundle_info?dataset_id={}".format(dataset_id)]
    CACHE_DIR = "../Cache/"
    VERSION = "V1_2"
    r = getUrl(url_list[0])
    receive_obj = json.loads(r.text)


    with open(CACHE_DIR+"cache_bundle_{}_{}.json".format(dataset_id, VERSION), "w") as f:
        json.dump(receive_obj, f)
    end_time = time.time()
    print(dataset_id, end_time - start_time)

4 0.8634727001190186
5 1.1725950241088867
6 9.1912522315979
7 1.549485206604004
9 9.383479356765747
13 1.6043713092803955


In [2]:
with open(CACHE_DIR+"cache_bundle_{}_{}.json".format(dataset_id, VERSION), "r") as f:
    receive_obj = json.load(f)
print(receive_obj.keys())

dict_keys(['graph_obj', 'success'])


In [3]:
url_list = ["http://localhost:7777/api/datasets"]
r = getUrl(url_list[0])
receive_obj = json.loads(r.text)
with open(CACHE_DIR+"datasetlist_{}.json".format(VERSION), "w") as f:
    json.dump(receive_obj, f)

A different version of cacheing json.

In [None]:
import requests
import time
import json
import gzip
def getUrl(url):
    r = requests.get(url)
    return r
dataset_id = 4
url_list = ["http://localhost:7777/api/graph_bundle_info?dataset_id={}".format(dataset_id)]
CACHE_DIR = "../Cache/"
VERSION = "V1_1"
r = getUrl(url_list[0])
receive_obj = json.loads(r.text)
# Convert to JSON
json_data = json.dumps(receive_obj, indent=2)
# Convert to bytes
encoded = json_data.encode('utf-8')

with gzip.open(CACHE_DIR+"cache_bundle_{}_{}.json.gz".format(dataset_id, VERSION), "wb") as f:
    f.write(encoded)


In [None]:
with gzip.open(CACHE_DIR+"cache_bundle_{}_{}.json.gz".format(dataset_id, VERSION), "rb") as f:
    data = f.read()
# Convert to string
json_data = data.decode('utf-8')
# Convert to JSON
receive_obj = json.loads(json_data)
print(receive_obj.keys())

In [None]:
print(compressed[0:50])

## 4. Speed test of servers.

In [None]:
import requests
import time
import json

def getUrl(url):
    r = requests.get(url)
    return r
# Photo
for dataset_id in [4,4,5,5,6,6,7,7,9,9]:
    #for dataset_id in [4,4]:
    start_time = time.time()
    url_list = ["http://localhost:7777/api/graph_bundle_info?dataset_id={}".format(dataset_id)]
    r = getUrl(url_list[0])
    duration_time = time.time() - start_time
    #print(duration_time)
    #print(len(r.text))
    receive_obj = json.loads(r.text)
    #print(receive_obj.keys())
    
    if receive_obj["success"] == True:
        graph_obj = receive_obj["graph_obj"]
        #graph_obj = decompress_data(receive_obj["graph_obj"])
        print(graph_obj["common"]["name"], duration_time)
        #print(receive_obj["graph_obj"].keys())
        pass
    else:
        print("Not success dataset_id:{} time:{}".format(dataset_id, duration_time))
        

In [None]:
# Cora
#No cache: 6s
#Cache: 1s

# Photo
#No Cache: 24s
#Cache: 


#citeseer 43.84026622772217  (No Cache, No Cache SPD, KFS)
#citeseer 1.2933223247528076   (Cache)

#pubmed 598.2629690170288
#pubmed 6.6324803829193115

#cora_ml 63.03324055671692
#cora_ml 1.2183799743652344

#polblogs 21.464422702789307
#polblogs 0.7255387306213379

## 5. Analyze Data Package.

In [None]:
import json
receive_obj = json.loads(r.text)
if receive_obj["success"] == True:
    print(receive_obj["graph_obj"].keys())

In [None]:
graph_obj = receive_obj["graph_obj"]
output_class = graph_obj["graph_out"]["node_features"]
ground_truth_class = graph_obj["graph_target"]["node_features"]
TF_class = []
for i in range(len(output_class)):
    TF_class.append(output_class[i]==ground_truth_class[i])


In [None]:
embedding = graph_obj["graph_out"]["output_vector"]
print(embedding)

### 5.1 Construct confusion dict / matrix.

In [None]:
confusion_dict = {}
for i in range(len(output_class)):
    ground_truth = ground_truth_class[i]
    output = output_class[i]
    if not ground_truth in confusion_dict:
        confusion_dict[ground_truth] = {}
    if not output in confusion_dict[ground_truth]:
        confusion_dict[ground_truth][output] = 0
    confusion_dict[ground_truth][output] = confusion_dict[ground_truth][output] + 1
print(confusion_dict)

In [None]:
def construct_confusion_matrix(confusion_dict, class_num):
    confusion_matrix = []
    for i in range(class_num):
        # Ground Truth
        this_class_confusion_dict = confusion_dict[i]
        this_class_confusion_matrix = []
        for j in range(class_num):
            value = 0
            if j in this_class_confusion_dict:
                value = this_class_confusion_dict[j]
            this_class_confusion_matrix.append(value)
        confusion_matrix.append(this_class_confusion_matrix)
    return confusion_matrix
confusion_matrix = construct_confusion_matrix(confusion_dict, 7)
print(confusion_matrix)

In [None]:
import numpy as np; np.random.seed(0)
import seaborn as sns; sns.set()
#uniform_data = np.random.rand(10, 12)
ax = sns.heatmap(confusion_matrix,annot=True,fmt="d",cmap="YlGnBu",linewidths=.5)
fig = ax.get_figure()
fig.savefig("confusion_matrix.png")

## 6. Load Multiple Data Package.

In [None]:
import requests
import json

def getUrl(url):
    r = requests.get(url)
    return r
def getClass(graph_obj):
    output_class = graph_obj["graph_out"]["node_features"]
    ground_truth_class = graph_obj["graph_target"]["node_features"]
    TF_class = []
    for i in range(len(output_class)):
        TF_class.append(output_class[i]==ground_truth_class[i])
    return [ground_truth_class, output_class, TF_class]
def getScoreOnGroundTruth(graph_obj):
    output_vector = graph_obj["graph_out"]["output_vector"]
    ground_truth_class = graph_obj["graph_target"]["node_features"]
    score_list = []
    for i in range(len(output_vector)):
        ground_truth = ground_truth_class[i]
        score = output_vector[i][ground_truth]
        score_list.append(score)
    return score_list
def getScoreOnPrediction(graph_obj):
    output_vector = graph_obj["graph_out"]["output_vector"]
    prediction_class = graph_obj["graph_out"]["node_features"]
    score_list = []
    for i in range(len(output_vector)):
        prediction_label = prediction_class[i]
        score = output_vector[i][prediction_label]
        score_list.append(score)
    return score_list
def overall_for_one_dataset(url):
    r = getUrl(url)
    return getClass(r)
# Cora_ML_GCN / Cora_ML_MLP / Cora_ML_GCN_identity_features
#url_list = ["http://localhost:7777/api/graph_info?dataset_id=7&model_id=16&explain_id=4&graph_id=1",
#            "http://localhost:7777/api/graph_info?dataset_id=7&model_id=17&explain_id=4&graph_id=1",
#            "http://localhost:7777/api/graph_info?dataset_id=7&model_id=18&explain_id=4&graph_id=1"
#           ]

# Cora_GCN / MLP / Cora_GCN_identity_features
url_list = ["http://localhost:7777/api/graph_info?dataset_id=4&model_id=4&explain_id=4&graph_id=1",
            "http://localhost:7777/api/graph_info?dataset_id=4&model_id=7&explain_id=4&graph_id=1",
            "http://localhost:7777/api/graph_info?dataset_id=4&model_id=8&explain_id=4&graph_id=1"
           ]
# Cora GCN SymNorm tideA / GCN SymNorm A
# url_list = ["http://localhost:7777/api/graph_info?dataset_id=4&model_id=4&explain_id=4&graph_id=1",
#             "http://localhost:7777/api/graph_info?dataset_id=4&model_id=12&explain_id=4&graph_id=1"
#            ]

# Citeseer_GCN / MLP / Giteseer_GCN_identity_features
#url_list = ["http://localhost:7777/api/graph_info?dataset_id=5&model_id=9&explain_id=4&graph_id=1"
#           ,"http://localhost:7777/api/graph_info?dataset_id=5&model_id=10&explain_id=4&graph_id=1"
#           ,"http://localhost:7777/api/graph_info?dataset_id=5&model_id=11&explain_id=4&graph_id=1"
#           ]
results_list = []
obj_list = []
score_list = []
pred_score_list = []
for i in range(len(url_list)):
    r = getUrl(url_list[i])
    receive_obj = json.loads(r.text)
    graph_obj = receive_obj["graph_obj"]
    obj_list.append(graph_obj)
    results = getClass(graph_obj)
    results_list.append(results)
    score_list.append(getScoreOnGroundTruth(graph_obj))
    pred_score_list.append(getScoreOnPrediction(graph_obj))
print(len(results_list))

## 7. Check consistent

In [None]:
### Check consistent
flag = True
for j in range(len(results_list[0][0])):
    for i in range(1,len(url_list)):
        if results_list[0][0][j] == results_list[i][0][j]:
            continue
        else:
            flag = False
            break
    if flag:
        break
if flag:
    print("consistent")
else:
    print("not consistent")

## 8. Multiple TF Stats.

In [None]:
Stats = {}
def addCount(TF_list):
    TF_list = map(lambda x: str(x), TF_list)
    fingerprint = " ".join(TF_list)
    if fingerprint in Stats:
        Stats[fingerprint] = Stats[fingerprint] + 1
    else:
        Stats[fingerprint] = 1
    
for j in range(len(results_list[0][2])):
    TF_sublist = []
    for i in range(len(url_list)):
        TF_sublist.append(results_list[i][2][j])
    addCount(TF_sublist)


## 9. Construct Neighbor Set.

In [None]:
def constructNeighborSet(graph_in):
    neighbor_set = {}
    senders = graph_in["senders"]
    receivers = graph_in["receivers"]
    for i in range(len(senders)):
        send_node = senders[i]
        if not send_node in neighbor_set:
            neighbor_set[send_node] = []
        neighbor_set[send_node].append(receivers[i])
    return neighbor_set

graph_in = obj_list[0]["graph_in"]
cora_gcn_neighbor_set = constructNeighborSet(graph_in)


In [None]:
def getStats(nodelist, results_list):
    Stats = {}
    def addCount(TF_list):
        TF_list = map(lambda x: str(x), TF_list)
        fingerprint = " ".join(TF_list)
        if fingerprint in Stats:
            Stats[fingerprint] = Stats[fingerprint] + 1
        else:
            Stats[fingerprint] = 1

    for j in nodelist:
        TF_sublist = []
        for i in range(len(results_list)):
            TF_sublist.append(results_list[i][2][j])
        addCount(TF_sublist)
    return Stats

## 10. Extract Special Patterns. cCorrent_nWrong / cWrong_nCorrect.

In [None]:
def extractSpecialPatternList(TF_class, neighbor_set, center_class, neighbor_class):
    specialList = []
    for i in range(len(TF_class)):
        if TF_class[i] == center_class:
            neighbors = neighbor_set[i]
            flag = True
            for j in range(len(neighbors)):
                if not TF_class[neighbors[j]] == neighbor_class:
                    flag = False
                    break
            if flag:
                specialList.append(i)
    return specialList
Special_pattern_cCorrect_nWrong = extractSpecialPatternList(results_list[0][2], cora_gcn_neighbor_set, True, False)
Special_pattern_cWrong_nCorrect = extractSpecialPatternList(results_list[0][2], cora_gcn_neighbor_set, False, True)


#print(len(Special_pattern_cCorrect_nWrong), len(Special_pattern_cWrong_nCorrect))


In [None]:
def printStats(Stats):
    for key in Stats:
        print(key+" "+str((Stats[key])))

## 11. Degree Stats.

In [None]:
import os
%matplotlib inline

import matplotlib.pyplot as plt
import numpy as np

In [None]:
def getDegreeStatsGraph(Stats, title, key_num=0, enable_legend=False):
    x_axis_list = []
    y_axis_list = []
    legend_name = []
    stats_main_key = list(Stats.keys())
    #stats_main_key = [stats_main_key[key_num]]
    for key in stats_main_key:
        legend_name.append(key)
        stats_key = Stats[key]
        sorted_keys = list(stats_key.keys())
        #sorted_keys.sort()
        x_axis_list.append(sorted_keys)
        y_axis_sublist = []
        total_num = 0
        for i in sorted_keys:
            total_num = total_num + stats_key[i]
        for i in sorted_keys:
            y_axis_sublist.append(stats_key[i])
        y_axis_list.append(y_axis_sublist)
    #color = [GREEN,BLUE,PURPLE,LIGHTBLUE,RED,DARKYELLO]
    color = ["#1f77b4", "#ff7f0e", "#2ca02c", "#d62728", "#9467bd", "#8c564b", "#e377c2", "#7f7f7f", "#bcbd22", "#17becf"]
    marker = ["o","v","s","p","*","h","<",">"]
    alpha = [0.9] * 10
    fig=plt.figure(figsize=(6,4))
    ax1=fig.add_subplot(111)
    for i in range(len(legend_name)):
        xnew_list = x_axis_list[i]
        ynew_list = y_axis_list[i]
        ax1.plot(xnew_list,ynew_list,label=legend_name[i],color=color[i],alpha=alpha[i], marker=marker[i])
    ax1.set_xlabel("Degree")
    ax1.set_ylabel("Accuracy")
    ax1.set_title(title+" ")
    #ax1.set_xlim(xmin=0, xmax=30)
    #ax1.set_ylim(ymin=0, ymax=1) 
    #ax1.set_xbound(0, 600)
    if enable_legend:
        plt.legend(loc='best')
    plt.grid()
    #plt.show()
    #plt.savefig("figs/Analysis_{}_{}".format(title,stats_main_key[0]),dpi=320,quality=100)
    plt.savefig("figs/Analysis_{}".format(title),dpi=320,quality=100)

In [None]:
def printDegreeStats(Stats):
    for key in Stats:
        print(key)
        stats_key = Stats[key]
        sorted_keys = list(stats_key.keys())
        sorted_keys.sort()
        for i in sorted_keys:
            print(i,stats_key[i])

In [None]:
def getNeighborDegreeStats(nodelist, results_list, neighbor_set):
    Stats = {}
    def addId(TF_list, node_id):
        if len(TF_list)>0:
            TF_list = map(lambda x: str(x), TF_list)
            fingerprint = " ".join(TF_list)
        else:
            fingerprint = "All"
        degree = len(neighbor_set[node_id])
        if degree >= 16:
            degree = ">=16"
        if not fingerprint in Stats:
            Stats[fingerprint] = {}
        if not degree in Stats[fingerprint]:
            Stats[fingerprint][degree] = 0
        Stats[fingerprint][degree] = Stats[fingerprint][degree] + 1
    if len(results_list) == 0:
        for j in nodelist:
            addId([], j)
    else:
        for j in nodelist:
            TF_sublist = []
            for i in range(len(results_list)):
                TF_sublist.append(results_list[i][2][j])
            addId(TF_sublist, j)
    return Stats

In [None]:
def getNeighborStats(nodelist, results_list):
    Stats = {}
    def addId(TF_list, node_id):
        TF_list = map(lambda x: str(x), TF_list)
        fingerprint = " ".join(TF_list)
        if fingerprint in Stats:
            Stats[fingerprint].append(node_id)
        else:
            Stats[fingerprint] = [node_id]

    for j in nodelist:
        TF_sublist = []
        for i in range(len(results_list)):
            TF_sublist.append(results_list[i][2][j])
        addId(TF_sublist, j)
    return Stats

In [None]:
def getNeighborsAccuracy(nodeIdList, neighbor_set, TF_class):
    neighbor_accuracy = []
    for i in range(len(nodeIdList)):
        nodeid = nodeIdList[i]
        neighbor_num = len(neighbor_set[nodeid])
        neighbor_correct_num = 0
        for j in range(len(neighbor_set[nodeid])):
            neighbor_id = neighbor_set[nodeid][j]
            if TF_class[neighbor_id]:
                neighbor_correct_num = neighbor_correct_num + 1
        neighbor_acc = neighbor_correct_num / neighbor_num
        neighbor_accuracy.append(neighbor_acc)
    return neighbor_accuracy

def getNeighborStr(accuracy):
    node_num = len(accuracy)
    sum_acc = 0
    for i in range(node_num):
        sum_acc = sum_acc + accuracy[i]
    avg_acc = sum_acc / node_num
    return str(avg_acc)


def printNeighborStats(Stats, neighbor_set, results_list):
    #print("Cora_GCN Cora_MLP Cora_GCN_identity_feature Nodenum Cora_GCN_Neighbor_Mean_accuracy Cora_MLP_Neighbor_Mean_accuracy Cora_GCN_identity_feature_Neighbor_Mean_accuracy")
    for key in Stats:
        print(key+" "+str(Stats[key]))
        #gcn_neighbor_acc = getNeighborsAccuracy(Stats[key],neighbor_set, results_list[0][2])
        #mlp_neighbor_acc = getNeighborsAccuracy(Stats[key],neighbor_set, results_list[1][2])
        #gcn_identity_neighbor_acc = getNeighborsAccuracy(Stats[key],neighbor_set, results_list[2][2])
        #print(key+" "+str(len(Stats[key]))+" "+getNeighborStr(gcn_neighbor_acc)+" "+getNeighborStr(mlp_neighbor_acc)+" "+getNeighborStr(gcn_identity_neighbor_acc))
        #print("GCN neighbor: ",  getNeighborsAccuracy(Stats[key],neighbor_set, results_list[0][2]))
        #print("MLP neighbor: ", getNeighborsAccuracy(Stats[key],neighbor_set, results_list[1][2]))
        #print("GCN Identity neighbor: ", getNeighborsAccuracy(Stats[key],neighbor_set, results_list[2][2]))

In [None]:
node_num = len(cora_gcn_neighbor_set.keys())

model_name = ["GCN SymNorm tideA", "GCN SymNorm A"]
selected_model_permutation = [
    [], [1]
]
for selected_model in selected_model_permutation:
    selected_results_list = []
    finger = ""
    for i in range(len(selected_model)):
        finger = finger + model_name[selected_model[i]] + " "
        selected_results_list.append(results_list[selected_model[i]])
    stats = getNeighborDegreeStats(range(node_num), selected_results_list, cora_gcn_neighbor_set)
    print(stats)

    for i in range(len(list(stats.keys()))):
        getDegreeStatsGraph(stats,finger,i)

In [None]:
node_num = len(cora_gcn_neighbor_set.keys())

model_name = ["GCN", "MLP", "GCN_identity_feature"]
selected_model_permutation = [
    [], [0], [1], [2], [0,1], [0,2], [1,2], [0,1,2]
]
for selected_model in selected_model_permutation:
    selected_results_list = []
    finger = ""
    for i in range(len(selected_model)):
        finger = finger + model_name[selected_model[i]] + " "
        selected_results_list.append(results_list[selected_model[i]])
    stats = getNeighborDegreeStats(range(node_num), selected_results_list, cora_gcn_neighbor_set)
    print(stats)

    for i in range(len(list(stats.keys()))):
        getDegreeStatsGraph(stats,finger,i)

In [None]:
def getDegreeAcuracyStats(stats):
    accuracy_stats = {}
    true_stats = stats["True"]
    false_stats = stats["False"]
    degree_key = list(set().union(true_stats.keys(), false_stats.keys()))
    for degree in degree_key:
        true_num = true_stats.get(degree)
        false_num = false_stats.get(degree)
        if not true_num:
            true_num = 0
        if not false_num:
            false_num = 0
        total = true_num + false_num
        accuracy = true_num / total
        accuracy_stats[degree] = accuracy
    #print(accuracy_stats)
    return accuracy_stats

In [None]:
node_num = len(cora_gcn_neighbor_set.keys())

model_name = ["GCN", "MLP", "GCN_identity_feature"]
selected_model_permutation = [
    [0], [1], [2]
]
selected_stats = {}
for selected_model in selected_model_permutation:
    selected_results_list = []
    finger = ""
    for i in range(len(selected_model)):
        finger = finger + model_name[selected_model[i]] + " "
        selected_results_list.append(results_list[selected_model[i]])
    model_name_local = model_name[selected_model[0]]
    stats = getNeighborDegreeStats(range(node_num), selected_results_list, cora_gcn_neighbor_set)
    #print(stats)
    accuracy_stats = getDegreeAcuracyStats(stats)
    selected_stats[model_name_local] = accuracy_stats
getDegreeStatsGraph(selected_stats,"",i,enable_legend=True)

In [None]:
neighbor_stats_special_pattern_cCorrect_nWrong = getNeighborStats(Special_pattern_cCorrect_nWrong, results_list)
printNeighborStats(neighbor_stats_special_pattern_cCorrect_nWrong, cora_gcn_neighbor_set, results_list)

In [None]:
neighbor_stats_special_pattern_cWrong_nCorrect = getNeighborStats(Special_pattern_cWrong_nCorrect, results_list)
printNeighborStats(neighbor_stats_special_pattern_cWrong_nCorrect, cora_gcn_neighbor_set, results_list)

In [None]:
def getDegreeFromList(nodelist, neighbor_set):
    for i in nodelist:
        print(i, len(neighbor_set[i]))

In [None]:
nodelist =[394, 1945, 2045, 2180, 2434, 2503, 2532]
getDegreeFromList(nodelist, cora_gcn_neighbor_set)

In [None]:

graph_obj = obj_list[0]
train_mask = graph_obj["mask"]["train"]
node_num = len(cora_gcn_neighbor_set.keys())


## 12. Shortest Path Distance.

In [None]:
import collections 
def getShortestPathDistance(node_num, neighbor_set, anchor_list):
    shortest_path_list = []
    anchor_set = set(anchor_list)
    for i in range(node_num):
        if i == 633:
            debug_flag = True
        else:
            debug_flag = False
        de = collections.deque([[i,0]])
        shortest_path_distance = "inf"
        mask = [False for i in range(node_num)]
        while len(de)>0:
            curr = de.popleft()
            if debug_flag:
                print(curr)
            mask[curr[0]] = True
            if curr[0] in anchor_set:
                shortest_path_distance = curr[1]
                break
            else:
                neighbors = neighbor_set[curr[0]]
                for j in neighbors:
                    if not mask[j]:
                        de.append([j, curr[1]+1])
        shortest_path_list.append(shortest_path_distance)
    return shortest_path_list

shortest_path_list = getShortestPathDistance(node_num, cora_gcn_neighbor_set, train_mask)


In [None]:
import collections 
def getShortestPathDistanceSet(node_num, neighbor_set, anchor_list):
    shortest_path_list = []
    anchor_set = set(anchor_list)
    for i in range(node_num):
        if i == 633:
            debug_flag = True
        else:
            debug_flag = False
        de = collections.deque([[i,0]])
        shortest_path_distance = "inf"
        shortest_path_set = []
        mask = [False for i in range(node_num)]
        while len(de)>0:
            curr = de.popleft()
            if debug_flag:
                print(curr)
            mask[curr[0]] = True
            if curr[0] in anchor_set:
                if shortest_path_distance == "inf":
                    shortest_path_distance = curr[1]
                    shortest_path_set.append(curr[0])
                elif shortest_path_distance == curr[1]:
                    shortest_path_set.append(curr[0])
                else:
                    break
            else:
                neighbors = neighbor_set[curr[0]]
                for j in neighbors:
                    if not mask[j]:
                        de.append([j, curr[1]+1])
        shortest_path_list.append({
            "shortest_path_distance":shortest_path_distance,
            "shortest_path_set":list(set(shortest_path_set))
        })
    return shortest_path_list

shortest_path_list = getShortestPathDistanceSet(node_num, cora_gcn_neighbor_set, train_mask)


In [None]:
def printPathStats(current_node, stats,labels):
    print("Current Node", current_node)
    
    print("Shortest path distance", stats[current_node]["shortest_path_distance"])
    short_set = stats[current_node]["shortest_path_set"]
    print("Node_Id  Ground Truth  Predict  T/F")
    print("Current Node")
    print(current_node,"\t",labels[0][current_node], "\t\t", labels[1][current_node], "\t", labels[2][current_node])
    print("Train Node")
    for key in short_set:
        print(key,"\t",labels[0][key], "\t\t", labels[1][key], "\t", labels[2][key])
    

In [None]:
printPathStats(1378, shortest_path_list,results_list[0])

In [None]:
def getShortestPathDict(shortest_path_list, TF_class):
    shortest_path_dict = {}
    for i in range(len(shortest_path_list)):
        sp = shortest_path_list[i]
        tf = str(TF_class[i])
        if not sp in shortest_path_dict:
            shortest_path_dict[sp] = {
                "True":0,
                "False":0
            }
        shortest_path_dict[sp][str(tf)] = shortest_path_dict[sp][str(tf)] + 1
    return shortest_path_dict
        
shortest_path_dict = getShortestPathDict(shortest_path_list, results_list[0][2])


In [None]:
for i in shortest_path_dict:
    print(i, shortest_path_dict[i]["True"], shortest_path_dict[i]["False"])

In [None]:
def constructNeighborSet(graph_in):
    neighbor_set = {}
    senders = graph_in["senders"]
    receivers = graph_in["receivers"]
    for i in range(len(senders)):
        send_node = senders[i]
        if not send_node in neighbor_set:
            neighbor_set[send_node] = []
        neighbor_set[send_node].append(receivers[i])
    return neighbor_set

graph_in = obj_list[0]["graph_in"]
cora_gcn_neighbor_set = constructNeighborSet(graph_in)


In [None]:
model_selection = 0
TF_class = results_list[model_selection][2]
node_num = len(cora_gcn_neighbor_set.keys())

pred_score_dict = {}
pred_score_dict[str(True)]=0
pred_score_dict[str(False)]=0
TrueNum = 0
FalseNum = 0
pd_score_list =pred_score_list[model_selection]
count = 0 
for i in range(node_num):
    pred_score = pd_score_list[i]
    pred_score_dict[str(TF_class[i])] = pred_score_dict[str(TF_class[i])] + pred_score
    if TF_class[i]:
        TrueNum = TrueNum + 1
    else:
        if pred_score < 0.5:
            print(i, pred_score)
            count = count + 1
        FalseNum = FalseNum + 1
        #if pred_score>0.5:
        #    print(i, pred_score)
pred_score_dict[str(True)] = pred_score_dict[str(True)]  / TrueNum
pred_score_dict[str(False)] = pred_score_dict[str(False)]  / FalseNum
print(pred_score_dict)
print(TrueNum, FalseNum, count)

In [None]:
## GCN for Cora-ML
# Pred Score List
# {'True': 0.5891725559627756, 'False': 0.3584966339494871}
# Ground Truth List
# {'True': 0.5891725559627756, 'False': 0.18541575310265887}


# MLP
# Pred Score List
# {'True': 0.581954779419416, 'False': 0.36661832975616526}


# GCN_Identity_features
# Pred Score List
# {'True': 0.3290714873580271, 'False': 0.22680015858059818}


# GCN for Cora
# Pred Score List
# {'True': 0.7961086297035217, 'False': 0.5451021153391815}
# True False True>0.9 False>0.9
# 2200 508 896  21


In [None]:
#print("NodeId\tGCN - MLP")
node_num = len(cora_gcn_neighbor_set.keys())
score_diff = []
for i in range(node_num):
    #print(i,"\t", score_list[0][i] - score_list[1][i])
    score_diff.append(score_list[0][i] - score_list[1][i])

In [None]:
import seaborn as sns
sns.set(style="whitegrid")
ax = sns.distplot(score_diff,kde=False)
fig = ax.get_figure()