In [1]:
# imports
import os
import sys
import time
# add to the path the source files
sys.path.append(os.path.dirname(os.getcwd()))

import wandb
import numpy as np
import pandas as pd
from copy import deepcopy
from dgl.data.utils import load_graphs
import dgl
import torch 
import torch.nn as nn
import torch_geometric 
from torch_geometric.loader import DataLoader
import networkx as nx
from src.dataset.counting_algorithm import subgraph_counting
#from src.baseline.model_gcn import GCN, GIN
from src.baseline.dataset_gcn import GraphDataset
from src.metrics.L1_based import L1LossCount, L1LossStd
from matplotlib import pyplot as plt
from src.baseline.model_gcn import GCN, GIN
from src.I2GNN.I2GNN import I2GNN
from src.ppgn.ppgn import PPGN, PPGNexpl
import json


def generate_gnn_input(graph: nx.Graph, device)->torch_geometric.data.Data:
    """Creates from a networkx graph a Data instance, which is the input a a pytorch geometric model."""
    x = torch.ones(graph.number_of_nodes(), 1) # no improovement by using more channels in the first layer
    num_edges = graph.number_of_edges()
    edge_index = torch.empty(2, 2*num_edges, dtype=torch.long)
    for i, edge in enumerate(graph.edges()):
        edge_index[0,i] = edge[0]
        edge_index[1,i] = edge[1]
        edge_index[0, i+num_edges] = edge[1]
        edge_index[1, i+num_edges] = edge[0]
    return torch_geometric.data.Data(x=x, edge_index=edge_index).to(device)

loss_fn = torch.nn.L1Loss()

In [2]:
# robustness greedy
# from src.adversarial.beam_attack import PreserveBeamAttack, SubstructurePreserveBeamAttack, BeamAttack
from src.adversarial.beam_attack_I2GNN import I2GNNPreserveBeamAttack, I2GNNSubstructurePreserveBeamAttack, I2GNNBeamAttack
from src.adversarial.beam_attack import PreserveBeamAttack, SubstructurePreserveBeamAttack, BeamAttack
from src.adversarial.greedy_attack import PreserveGreedyAttack
from src.dataset.counting_algorithm import subgraph_counting, subgraph_listing

from joblib import Parallel, delayed
import shutil

task = 'Triangle'
device = 'cpu'
dataset = 'sbm_30'
model = 'I2GNN'
dataset_path = f"/nfs/students/campi/dataset/training/test_5000_{dataset}.bin"
models_path = [f"//nfs/students/campi/best_models/{model}_{dataset}/{model}_{task}_{i}.pth" for i in range(5)]
dict_path = [f"//nfs/students/campi/best_models/{model}_{dataset}/{model}_{task}_{i}.json" for i in range(5)]
i = 0
with open(dict_path[0], 'r') as f:
    h_params = json.load(f)
if model == 'PPGN':
    gnn = PPGN(**h_params)
if model == 'I2GNN':
    gnn = I2GNN(**h_params)
gnn.load_state_dict(torch.load(models_path[0], map_location=torch.device(device)))
gnn.eval()

graphs, counts = dgl.load_graphs(dataset_path)
graph = nx.Graph(dgl.to_networkx(graphs[i]))
count = counts[task][i]

start = time.time()
if model == 'PPGN':
    greedy_attack = SubstructurePreserveBeamAttack(n_samples = 1, edge_addition=True, edge_deletion=True, device=device)
if model =='I2GNN':
    greedy_attack = I2GNNSubstructurePreserveBeamAttack(n_samples = 1, edge_addition=True, edge_deletion=True, device=device)
adversarial_graph, adversarial_error_history, adversarial_count = greedy_attack.find_adversarial_example(budget=3, gnn=gnn, loss_fn=loss_fn, graph=graph,subgraph_type=task,count=count)
print(f'{adversarial_error_history}, time: {time.time() - start}')


start = time.time()
if model == 'PPGN':
    greedy_attack2 = PreserveBeamAttack(n_samples = 1, edge_addition=True, edge_deletion=True, device=device)
if model =='I2GNN':
    greedy_attack2 = I2GNNPreserveBeamAttack(n_samples = 1, edge_addition=True, edge_deletion=True, device=device)
adversarial_graph, adversarial_error_history, adversarial_count = greedy_attack2.find_adversarial_example(budget=3, gnn=gnn, loss_fn=loss_fn, graph=graph,subgraph_type=task,count=count)
print(f'{adversarial_error_history}, time: {time.time() - start}')

Processing...
Done!


/nfs/homedirs/campi/robustness-of-subgraphs-count/tests/processed
229


Processing...
Done!


/nfs/homedirs/campi/robustness-of-subgraphs-count/tests/processed
224


Processing...
Done!


/nfs/homedirs/campi/robustness-of-subgraphs-count/tests/processed
218


Processing...
Done!


/nfs/homedirs/campi/robustness-of-subgraphs-count/tests/processed
[0.0067195892333984375, 0.009820938110351562, 0.011531829833984375, 0.01264190673828125], time: 31.46625304222107
/nfs/homedirs/campi/robustness-of-subgraphs-count/tests/processed


Processing...
Done!


229


Processing...
Done!


/nfs/homedirs/campi/robustness-of-subgraphs-count/tests/processed
224


Processing...
Done!


/nfs/homedirs/campi/robustness-of-subgraphs-count/tests/processed
218


Processing...
Done!


/nfs/homedirs/campi/robustness-of-subgraphs-count/tests/processed
[0.0067195892333984375, 0.009820938110351562, 0.011531829833984375, 0.01264190673828125], time: 31.375270128250122


FileNotFoundError: [Errno 2] No such file or directory: 'root'

In [None]:
# robustness greedy
from src.adversarial.max_similar import MaximizeSimilar
from src.dataset.counting_algorithm import subgraph_counting, subgraph_listing

from joblib import Parallel, delayed

task = 'Chordal cycle'
similar_task = 'Tailed triangle'
device = 'cpu'
dataset = 'er_10'
dataset_path = f"/nfs/students/campi/dataset/training/test_5000_{dataset}.bin"
models_path = [f"//nfs/students/campi/best_models/PPGN_{dataset}/PPGN_{task}_{i}.pth" for i in range(5)]
dict_path = [f"//nfs/students/campi/best_models/PPGN_{dataset}/PPGN_{task}_{i}.json" for i in range(5)]
i = 30
with open(dict_path[0], 'r') as f:
    h_params = json.load(f)
gnn = PPGN(**h_params)
gnn.load_state_dict(torch.load(models_path[0], map_location=torch.device(device)))
gnn.eval()

graphs, counts = dgl.load_graphs(dataset_path)
graph = nx.Graph(dgl.to_networkx(graphs[i]))
count = counts[task][i]
similar_count = counts[similar_task][i]

#maximize the count of tailed triangles
budget = 60
increase = True
preserve = False
attack = MaximizeSimilar(preserve=preserve)
adv_example, similar_adv_count = attack.find_adversarial_example(budget, graph, task, count, similar_task, increase)
print(f'Initial: {similar_count}, final {similar_adv_count}')

print(f'Test prediction: {gnn(generate_gnn_input(graph, device))}, gt: {count}, num edges: {graph.number_of_edges()}')
print(f'Adv prediction: {gnn(generate_gnn_input(adv_example, device))}, gt: {count}, num_edges: {adv_example.number_of_edges()}')


saliency map with gradients

In [None]:
from src.dataset.counting_algorithm import subgraph_counting, subgraph_listing
from matplotlib import pyplot as plt

task ='Chordal cycle'
similar_task = '4-Clique'
device = 'cpu'
dataset = 'er_10'
dataset_path = f"/nfs/students/campi/dataset/training/test_5000_{dataset}.bin"
models_path = [f"//nfs/students/campi/best_models/PPGN_{dataset}/PPGN_{task}_{i}.pth" for i in range(5)]
dict_path = [f"//nfs/students/campi/best_models/PPGN_{dataset}/PPGN_{task}_{i}.json" for i in range(5)]
i = 1
with open(dict_path[0], 'r') as f:
    h_params = json.load(f)
gnn = PPGNexpl(**h_params)
gnn.load_state_dict(torch.load(models_path[0], map_location=torch.device(device)))

graphs, counts = dgl.load_graphs(dataset_path)
graph = nx.Graph(dgl.to_networkx(graphs[i]))
nx.draw(graph,with_labels=True)
plt.show()
count = counts[task][i]
subgraphs = subgraph_listing(graph, task)
subhraphs_similar = subgraph_listing(graph, similar_task)
edge_count = torch.zeros((graph.number_of_nodes(), graph.number_of_nodes()))
similar_edge_count = torch.zeros((graph.number_of_nodes(), graph.number_of_nodes()))
sub_edges = []
for sub in subgraphs:
    for edge in graph.subgraph(sub).edges():
        edge_count[edge[0], edge[1]] += 1
        edge_count[edge[1], edge[0]] += 1
        sub_edges.append(edge)
for sub in subhraphs_similar:
    for edge in graph.subgraph(sub).edges():
        similar_edge_count[edge[0], edge[1]] += 1
        similar_edge_count[edge[1], edge[0]] += 1
print(edge_count)
print(similar_edge_count)

data = generate_gnn_input(graph, device)
pred = gnn(data)
pred.backward()
grad = torch.squeeze(gnn.adj.grad)
grad = ((grad + grad.T)*gnn.adj)
print(grad)
grad_f = grad.flatten()
print(grad_f[torch.nonzero(grad_f).flatten()])
for i in range(graph.number_of_nodes()):
    for j in range(i+1, graph.number_of_nodes()):
        print(f'edge({i},{j}): {grad[i,j]}')
#print(grad)

In [None]:
from src.dataset.counting_algorithm import subgraph_counting, subgraph_listing
from matplotlib import pyplot as plt
import copy

task ='Chordal cycle'
similar_task = '4-Clique'
device = 'cpu'
dataset = 'er_10'
dataset_path = f"/nfs/students/campi/dataset/training/test_5000_{dataset}.bin"
models_path = [f"//nfs/students/campi/best_models/PPGN_{dataset}/PPGN_{task}_{i}.pth" for i in range(5)]
dict_path = [f"//nfs/students/campi/best_models/PPGN_{dataset}/PPGN_{task}_{i}.json" for i in range(5)]
i = 1
with open(dict_path[0], 'r') as f:
    h_params = json.load(f)
gnn = PPGNexpl(**h_params)
gnn.load_state_dict(torch.load(models_path[0], map_location=torch.device(device)))

graphs, counts = dgl.load_graphs(dataset_path)
graph = nx.Graph(dgl.to_networkx(graphs[i]))
nx.draw(graph,with_labels=True)
plt.show()
count = counts[task][i]
subgraphs = subgraph_listing(graph, task)

data = generate_gnn_input(graph, device)
pred = gnn(data)
sal = torch.zeros((graph.number_of_nodes(), graph.number_of_nodes()))
for e in graph.edges():
    new_graph = copy.deepcopy(graph)
    new_graph.remove_edge(*e)
    data = generate_gnn_input(new_graph, device)
    new_pred = gnn(data)
    sal[e[0], e[1]] = new_pred - pred
    sal[e[1], e[0]] = new_pred - pred

for e in nx.non_edges(graph):
    new_graph = copy.deepcopy(graph)
    new_graph.add_edge(*e)
    data = generate_gnn_input(new_graph, device)
    new_pred = gnn(data)
    sal[e[0], e[1]] = new_pred - pred
    sal[e[1], e[0]] = new_pred - pred
    
subhraphs_similar = subgraph_listing(graph, similar_task)
edge_count = torch.zeros((graph.number_of_nodes(), graph.number_of_nodes()))
similar_edge_count = torch.zeros((graph.number_of_nodes(), graph.number_of_nodes()))
sub_edges = []
for sub in subgraphs:
    for edge in graph.subgraph(sub).edges():
        edge_count[edge[0], edge[1]] += 1
        edge_count[edge[1], edge[0]] += 1
        sub_edges.append(edge)
        
for sub in subhraphs_similar:
    for edge in graph.subgraph(sub).edges():
        similar_edge_count[edge[0], edge[1]] += 1
        similar_edge_count[edge[1], edge[0]] += 1
print(edge_count)
print(similar_edge_count)

for i in range(graph.number_of_nodes()):
    for j in range(i+1, graph.number_of_nodes()):
        print(f'edge({i},{j}): {sal[i,j]}')

In [None]:

dataset = GraphDataset(dataset_path=dataset_path, task=task, in_channels=1)
dataloader = DataLoader(dataset, batch_size=16)

preds = []
gt = []
start = time.time()
with torch.no_grad():
    for data, y in dataloader:
        data = data.to(device)
        y = y.to(device)
        pred = gnn(data)
        preds.extend(pred.flatten().tolist())
        gt.extend(y.flatten().tolist())
print(f'time: {time.time() - start}')
plt.hist(preds, bins=20)
plt.title("Predictions distribution")
plt.show()
plt.hist(gt, bins=20)
plt.title("Ground truth distribution")
plt.show()
plt.scatter(gt, preds)
plt.title("Models predictions vs groud truths")
plt.show()