<a href="https://colab.research.google.com/github/ewei2406/AAGNN/blob/main/Unlearnablev2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Requirements

In [None]:
!pip install torch-scatter -f https://data.pyg.org/whl/torch-1.9.0+cu111.html
!pip install torch-sparse -f https://data.pyg.org/whl/torch-1.9.0+cu111.html
!pip install torch-geometric

Looking in links: https://data.pyg.org/whl/torch-1.9.0+cu111.html
Collecting torch-scatter
  Downloading https://data.pyg.org/whl/torch-1.9.0%2Bcu111/torch_scatter-2.0.9-cp37-cp37m-linux_x86_64.whl (10.4 MB)
[K     |████████████████████████████████| 10.4 MB 2.7 MB/s 
[?25hInstalling collected packages: torch-scatter
Successfully installed torch-scatter-2.0.9
Looking in links: https://data.pyg.org/whl/torch-1.9.0+cu111.html
Collecting torch-sparse
  Downloading https://data.pyg.org/whl/torch-1.9.0%2Bcu111/torch_sparse-0.6.12-cp37-cp37m-linux_x86_64.whl (3.7 MB)
[K     |████████████████████████████████| 3.7 MB 1.9 MB/s 
Installing collected packages: torch-sparse
Successfully installed torch-sparse-0.6.12
Collecting torch-geometric
  Downloading torch_geometric-2.0.2.tar.gz (325 kB)
[K     |████████████████████████████████| 325 kB 5.3 MB/s 
Collecting rdflib
  Downloading rdflib-6.0.2-py3-none-any.whl (407 kB)
[K     |████████████████████████████████| 407 kB 36.9 MB/s 
Collecting ya

In [None]:
import torch
import torch_geometric
import seaborn as sns
import numpy as np
import pandas as pd
import torch.nn.functional as F
import matplotlib.pyplot as plt

In [None]:
from torch_geometric.loader import DataLoader
from torch_geometric.datasets import Planetoid
from torch_geometric.utils.dropout import dropout_adj
from torch_geometric.nn import DenseGCNConv

# Data

In [None]:
dataset = Planetoid(root='/tmp/Cora', name='Cora')
data = dataset[0]

Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.x
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.tx
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.allx
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.y
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.ty
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.ally
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.graph
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.test.index
Processing...
Done!


In [None]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
data = dataset[0].to(device)

# Functions

In [None]:
def to_sparse(edge_ind):
  return torch.sparse_coo_tensor(edge_ind, torch.ones_like(edge_ind[0])).to_dense()

def to_index(sparse):
  res = sparse.float().nonzero().permute(1,0)
  return res

def mirror(sparse):
  upper = torch.triu(sparse)
  lower = torch.rot90(torch.flip(torch.triu(sparse, diagonal=1), [0]), 3, [0,1])
  return upper + lower

def set_diagonal(sparse, target=0):
  copy = sparse.clone()
  return copy.fill_diagonal_(target)

def make_symmetric(sparse):
  res = mirror(sparse)
  # res = set_diagonal(res)
  return res

def invert_by(sparse, flip_dense):
  return (sparse + flip_dense) - torch.mul(sparse * flip_dense, 2)


# Model

In [None]:
class GCN_Dense(torch.nn.Module):
    def __init__(self):
        super().__init__()
        self.conv1 = DenseGCNConv(dataset.num_node_features, 64)
        self.conv2 = DenseGCNConv(64, dataset.num_classes)

    def forward(self, x, dense):
        # x, edge_index = data.x, data.edge_index

        x = self.conv1(x, dense)
        x = F.relu(x)
        x = F.dropout(x, training=self.training)
        x = self.conv2(x, dense)

        return F.log_softmax(x, dim=1)

# Train a model regularly

In [None]:
def denseTrain(model_dense, data, optimizer, epochs):
  model_dense.train()
  for epoch in range(epochs):
    optimizer.zero_grad()

    pred = model_dense(data.x, to_sparse(data.edge_index)).squeeze()

    train_correct = (pred.squeeze().argmax(1)[data.train_mask] == data.y[data.train_mask]).sum()
    train_acc = int(train_correct) / int(data.train_mask.sum())

    test_correct = (pred.squeeze().argmax(1)[data.test_mask] == data.y[data.test_mask]).sum()
    test_acc = int(test_correct) / int(data.test_mask.sum())

    loss = F.cross_entropy(pred[data.train_mask], data.y[data.train_mask])
    print(f"Epoch: {epoch} \t Train: {train_acc*100:<.1f}% \t Test: {test_acc*100:<.1f}% \t Loss: {loss:<.2f}")
    
    loss.backward()
    optimizer.step()

def denseTrainVersusReal(model_dense, data_altered, data_real, optimizer, epochs):
  model_dense.train()
  for epoch in range(epochs):
    optimizer.zero_grad()

    pred = model_dense(data_altered.x, to_sparse(data_altered.edge_index)).squeeze()

    train_correct = (pred.squeeze().argmax(1)[data_altered.train_mask] == data_altered.y[data_altered.train_mask]).sum()
    train_acc = int(train_correct) / int(data_altered.train_mask.sum())

    real_pred = model_dense(data_real.x, to_sparse(data_real.edge_index)).squeeze()

    test_correct = (real_pred.squeeze().argmax(1)[data_real.test_mask] == data_real.y[data_real.test_mask]).sum()
    test_acc = int(test_correct) / int(data_real.test_mask.sum())

    loss = F.cross_entropy(pred[data_altered.train_mask], data_altered.y[data_altered.train_mask])
    print(f"Epoch: {epoch} \t Train: {train_acc*100:<.1f}% \t Test: {test_acc*100:<.1f}% \t Loss: {loss:<.2f}")
    
    loss.backward()
    optimizer.step()

In [None]:
model_dense = GCN_Dense().to(device)
optimizer = torch.optim.Adam(model_dense.parameters(), lr=0.01, weight_decay=5e-4)
denseTrain(model_dense, data, optimizer, 10)

Epoch: 0 	 Train: 12.9% 	 Test: 16.6% 	 Loss: 1.94
Epoch: 1 	 Train: 90.0% 	 Test: 59.7% 	 Loss: 1.69
Epoch: 2 	 Train: 95.0% 	 Test: 68.3% 	 Loss: 1.40
Epoch: 3 	 Train: 97.9% 	 Test: 71.8% 	 Loss: 1.05
Epoch: 4 	 Train: 95.7% 	 Test: 73.1% 	 Loss: 0.82
Epoch: 5 	 Train: 95.7% 	 Test: 72.8% 	 Loss: 0.63
Epoch: 6 	 Train: 97.9% 	 Test: 75.8% 	 Loss: 0.48
Epoch: 7 	 Train: 96.4% 	 Test: 76.1% 	 Loss: 0.36
Epoch: 8 	 Train: 98.6% 	 Test: 77.2% 	 Loss: 0.25
Epoch: 9 	 Train: 98.6% 	 Test: 78.5% 	 Loss: 0.21


# FGSM like attack (Evasion) on regularly trained model

In [None]:
def fgsm_dense(model, data, epsilon=0.01):
  random_perturbed_data = data.clone()

  dense = to_sparse(random_perturbed_data.edge_index)
  delta = torch.empty(dense.shape).uniform_(0, 1).bernoulli()
  delta = make_symmetric(delta)
  delta.requires_grad = True
  res = invert_by(dense, delta)

  random_perturbed_data.edge_index = to_index(res)

  pred = model(data.x, res).squeeze()

  loss = F.cross_entropy(pred[data.train_mask], data.y[data.train_mask])
  loss.backward()

  res = epsilon * delta.grad.detach().sign().abs()
  res = make_symmetric(torch.bernoulli(res))
  return res

In [None]:
epsilon = 0.01 # Budget is 1% of edges
residual = fgsm_dense(model_dense, data, 0.01) # Based off of regularly trained model

print(f"Percentage of edges altered: {residual.sum() / 2708 ** 2 / 0.01:.2f}%")
print(f"Number of edges altered: {residual.sum():.0f}")
print(f"Total potential edges: {2702 ** 2:.0f}")

fgsm_data = data.clone()
altered = invert_by(to_sparse(fgsm_data.edge_index), residual)
fgsm_data.edge_index = to_index(altered)

pred = model_dense(fgsm_data.x, altered).squeeze().argmax(1)
reg_pred = model_dense(data.x, to_sparse(data.edge_index)).squeeze().argmax(1)

print(f"Accuracy with fgsm: {(fgsm_data.y == pred).sum() / 27.08:.2f}%")
print(f"Accuracy without fgsm: {(fgsm_data.y == reg_pred).sum() / 27.08:.2f}%")

Percentage of edges altered: 1.00%
Number of edges altered: 73150
Total potential edges: 7300804
Accuracy with fgsm: 36.63%
Accuracy without fgsm: 78.03%


# Train a model on a randomly perturbed dataset

In [None]:
random_perturbed_data = data.clone()
epsilon = 0.01

dense = to_sparse(random_perturbed_data.edge_index)

delta = torch.empty(dense.shape).uniform_(0, epsilon).bernoulli()

delta = make_symmetric(delta)
res = invert_by(dense, delta)

random_perturbed_data.edge_index = to_index(res)

In [None]:
edges_changed = delta.sum() / 2
expected_changed = 0.001 * (2708 ** 2) // 4
total_edges = data.edge_index.shape
(edges_changed, total_edges)

(tensor(18307.5000), torch.Size([2, 10556]))

In [None]:
model_dense_perturbed = GCN_Dense().to(device)
optimizer_p = torch.optim.Adam(model_dense_perturbed.parameters(), lr=0.01, weight_decay=5e-4)
denseTrainVersusReal(model_dense_perturbed, random_perturbed_data, data, optimizer_p, 10)

Epoch: 0 	 Train: 17.9% 	 Test: 15.9% 	 Loss: 1.95
Epoch: 1 	 Train: 55.0% 	 Test: 49.3% 	 Loss: 1.90
Epoch: 2 	 Train: 73.6% 	 Test: 60.9% 	 Loss: 1.86
Epoch: 3 	 Train: 79.3% 	 Test: 64.6% 	 Loss: 1.80
Epoch: 4 	 Train: 82.1% 	 Test: 66.4% 	 Loss: 1.73
Epoch: 5 	 Train: 84.3% 	 Test: 67.6% 	 Loss: 1.67
Epoch: 6 	 Train: 86.4% 	 Test: 69.1% 	 Loss: 1.60
Epoch: 7 	 Train: 88.6% 	 Test: 70.9% 	 Loss: 1.52
Epoch: 8 	 Train: 87.9% 	 Test: 73.1% 	 Loss: 1.44
Epoch: 9 	 Train: 90.7% 	 Test: 71.3% 	 Loss: 1.38


# Generate error-minimizing noise

In [None]:
minimize_data = data.clone()
test_model = GCN_Dense().to(device)
optimizer_t = torch.optim.Adam(test_model.parameters(), lr=0.01, weight_decay=5e-4)

# model_dense

epsilon = 0.01
dense = to_sparse(minimize_data.edge_index)
delta = torch.empty(dense.shape).uniform_(0, 1).bernoulli()

delta = make_symmetric(delta)
delta.requires_grad = True
res = invert_by(dense, delta)

test_model.train()
for epoch in range(20):
  optimizer_t.zero_grad()

  pred = test_model(data.x, to_sparse(data.edge_index)).squeeze()
  loss = F.cross_entropy(pred[data.train_mask], data.y[data.train_mask])

  loss.backward(retain_graph=True)
  torch.nn.utils.clip_grad_norm_(test_model.parameters(), 5.0)
  optimizer_t.step()

new_pred = test_model(data.x, to_sparse(data.edge_index))
new_pred

# res = epsilon * delta.grad.detach().sign().abs()
# res = make_symmetric(torch.bernoulli(res))

tensor([[[-14.3064, -16.6655, -19.6197,  ..., -15.5678, -16.7716, -15.3058],
         [-11.4681, -18.1974, -21.0339,  ...,  -2.6865, -17.6600, -18.4770],
         [-12.7804, -18.2471, -21.3859,  ...,  -6.5674, -18.0307, -18.0126],
         ...,
         [-11.4321, -14.0844, -18.8945,  ..., -17.5201, -13.4681, -17.1307],
         [-15.9891, -15.4739, -16.3817,  ..., -13.0341, -15.4339, -18.3894],
         [-16.0421, -15.3969, -16.0787,  ..., -13.7059, -16.1950, -17.9801]]],
       grad_fn=<LogSoftmaxBackward>)

In [None]:
model_min = GCN_Dense().to(device)
optimizer_m = torch.optim.Adam(model_min.parameters(), lr=0.01, weight_decay=5e-4)
denseTrainVersusReal(model_min, minimize_data, data, optimizer_m, 10)

Epoch: 0 	 Train: 12.1% 	 Test: 9.9% 	 Loss: 1.96
Epoch: 1 	 Train: 89.3% 	 Test: 61.1% 	 Loss: 1.72
Epoch: 2 	 Train: 95.7% 	 Test: 70.5% 	 Loss: 1.46
Epoch: 3 	 Train: 94.3% 	 Test: 72.2% 	 Loss: 1.17
Epoch: 4 	 Train: 95.0% 	 Test: 73.4% 	 Loss: 0.88
Epoch: 5 	 Train: 95.7% 	 Test: 71.7% 	 Loss: 0.69
Epoch: 6 	 Train: 97.9% 	 Test: 73.4% 	 Loss: 0.50
Epoch: 7 	 Train: 97.9% 	 Test: 73.0% 	 Loss: 0.39
Epoch: 8 	 Train: 96.4% 	 Test: 73.2% 	 Loss: 0.28
Epoch: 9 	 Train: 97.1% 	 Test: 72.8% 	 Loss: 0.22


In [None]:
(pred[:,logit_target].shape,
data.y[data.train_mask].shape)

pred[:,logit_target]