In [1]:
import torch
import numpy as np
import torch.nn.functional as F
import torch.optim as optim
from deeprobust.graph.defense import GCN
from deeprobust.graph.targeted_attack import FGA
from deeprobust.graph.utils import *
from deeprobust.graph.data import Dataset
from tqdm import tqdm
import argparse
from experiments import split_dataset
from DistributedDefense import TwoPartyCNGCN
import networkx as nx
from scipy.sparse import csr_matrix
import Mahsa_backdoor_V0 as backdoor

In [2]:
####################### Data loading and preprocessing #######################
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

# Load data
dataset = "polblogs"
#data = Dataset(root='/tmp/', name=dataset)  : this is for unix-based systems

# Use the current directory for windows
data = Dataset(root='.', name=dataset)
#data = Dataset(root='/tmp/', name=dataset)

adj, features, labels = data.adj, data.features, data.labels
idx_train, idx_val, idx_test = data.idx_train, data.idx_val, data.idx_test


# Split graph into two graphs 
proportion_of_common_links = 0.5
adj1, adj2 = split_dataset(adj, proportion_of_common_links) 


Loading polblogs dataset...
Selecting 1 largest connected components


In [3]:
################################ Mahsa attack ###############################
# Perform the attack
# 
modified_adj1 =  adj1.copy()

# Create a NetworkX graph from the adjacency matrix
graph = nx.from_scipy_sparse_array(modified_adj1)

# Add labels to the graph
for node_id, label in enumerate(labels):
    graph.nodes[node_id]['label'] = label
# print(f"lenght of the labels : {len(labels)}") 
print(f"graph edges : {graph.number_of_edges()}")


target_node, target_label, budget = backdoor.target(graph)   
non_neighbor_opposit= backdoor.find_non_neighbor_opposit_label(graph, target_node, target_label)
max_same_min_opposit_label_neighbors= backdoor.find_max_same_min_opposit_label_neighbors(graph, non_neighbor_opposit)
nodes_for_attack = backdoor.nodes_for_attack(graph, target_node, max_same_min_opposit_label_neighbors, budget)
attacked_graph = backdoor.insert_edge(graph, target_node, nodes_for_attack)
backdoor.evaluate_graph(attacked_graph, nodes_for_attack, target_node, budget)
modified_adj1 =backdoor.convert(attacked_graph)

print(attacked_graph)



graph edges : 16714
Edge insertion is successful and 5 edges has been inserted between 1 and : [(1179, 0.5217391304347826, 11), (837, 0.826797385620915, 10), (618, 0.5944055944055944, 8), (1198, 0.8768115942028986, 7), (1005, 0.8834080717488789, 6)]
Graph with 1222 nodes and 16719 edges


In [5]:
############################ Crypto'Graph defense ###########################
# Perform Crypto'Graph distributed defense

threshold = 2               # threshold for dropping dissimilar edges
metric = "neighbors"        # metric for dropping dissimilar edges (neighbors, jaccard, cosine)
object = "links"            # object for defense (links, features)

model = TwoPartyCNGCN(dataset=dataset, nfeat=features.shape[1], nhid=16, nclass=labels.max().item() + 1,
                          device=device)
model.fit(modified_adj1.copy(), adj2.copy(), features, features, labels, idx_train, threshold, metric=metric, object=object,
            train_iters=200, initialize=True, verbose=False, idx_val=idx_val)
model.eval()
accuracies = model.test(idx_test)

Dropping dissimilar edges using metric :  neighbors  on links
removed 1667 edges in polblogs 1
removed 1610 edges in polblogs 2
*** polblogs 1 ***
Test set results: loss= 0.3397 accuracy= 0.8027
*** polblogs 2 ***
Test set results: loss= 0.3430 accuracy= 0.8027


In [12]:
#### Mahsa Test after CryptoGraph ####
import matplotlib.pyplot as plt
from sklearn.metrics import accuracy_score

# Get the model's predictions
# outputs = model(features) 

# Assume that 'new_features' is your new node features matrix
outputs1 = model.gcn1.forward(adj1, features)
outputs2 = model.gcn2.forward(features)

outputs = model.forward(features)

_ , predicted_classes = outputs.max(dim=1) # get the predicted classes for each node
_, predicted = outputs.max(1)

# Create a color map based on the predicted labels
color_map = predicted.numpy()  # convert to numpy array for indexing
# Draw the graph
nx.draw(graph, node_color=color_map, with_labels=True)
plt.show()


#evaluate accuracy of trained GCN model
# Get the true labels and predicted labels for the test nodes
true_labels = labels[idx_test].cpu().numpy()
predicted_labels = predicted[idx_test].cpu().numpy()

# Calculate the accuracy
accuracy = accuracy_score(true_labels, predicted_labels)
print(f'Accuracy: {accuracy}')



AttributeError: 'numpy.ndarray' object has no attribute 'is_sparse'

In [6]:
################################# Evaluation ###############################
print(f"Test accuracy: {accuracies[0]:.2f}")
print(f"Test accuracy after attack: {accuracies[1]:.2f}")
#print(f"Test accuracy after defense: {accuracies[2]:.2f}")
#print(f"Test accuracy after attack and defense: {accuracies[3]:.2f}")



Test accuracy: 0.80
Test accuracy after attack: 0.80
