In [None]:
# Install required packages.
!pip install -q torch-scatter -f https://data.pyg.org/whl/torch-1.10.0+cu113.html
!pip install -q torch-sparse -f https://data.pyg.org/whl/torch-1.10.0+cu113.html
!pip install -q git+https://github.com/pyg-team/pytorch_geometric.git

# Helper function for visualization.
%matplotlib inline
import matplotlib.pyplot as plt
from sklearn.manifold import TSNE

def visualize(h, color):
    z = TSNE(n_components=2).fit_transform(h.detach().cpu().numpy())

    plt.figure(figsize=(10,10))
    plt.xticks([])
    plt.yticks([])

    plt.scatter(z[:, 0], z[:, 1], s=70, c=color, cmap="Set2")
    plt.show()

[K     |████████████████████████████████| 7.9 MB 5.4 MB/s 
[K     |████████████████████████████████| 3.5 MB 4.7 MB/s 
[?25h  Building wheel for torch-geometric (setup.py) ... [?25l[?25hdone


In [None]:
import torch 
import numpy as np
import math


from torch_geometric.utils import degree
import torch_geometric
import torch_geometric.utils as tg_utils
import pandas as pd

In [None]:
!rm -r data

rm: cannot remove 'data': No such file or directory


In [None]:
from torch_geometric.datasets import Planetoid
from torch_geometric.transforms import NormalizeFeatures

dataset = Planetoid(root='data/Planetoid', name='Cora', transform=NormalizeFeatures())

print()
print(f'Dataset: {dataset}:')
print('======================')
print(f'Number of graphs: {len(dataset)}')
print(f'Number of features: {dataset.num_features}')
print(f'Number of classes: {dataset.num_classes}')

data = dataset[0]  # Get the first graph object.

print()
print(data)
print('===========================================================================================================')

# Gather some statistics about the graph.
print(f'Number of nodes: {data.num_nodes}')
print(f'Number of edges: {data.num_edges}')
print(f'Average node degree: {data.num_edges / data.num_nodes:.2f}')
print(f'Number of training nodes: {data.train_mask.sum()}')
print(f'Training node label rate: {int(data.train_mask.sum()) / data.num_nodes:.2f}')
print(f'Has isolated nodes: {data.has_isolated_nodes()}')
print(f'Has self-loops: {data.has_self_loops()}')
print(f'Is undirected: {data.is_undirected()}')

Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.x
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.tx
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.allx
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.y
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.ty
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.ally
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.graph
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.test.index
Processing...



Dataset: Cora():
Number of graphs: 1
Number of features: 1433
Number of classes: 7

Data(x=[2708, 1433], edge_index=[2, 10556], y=[2708], train_mask=[2708], val_mask=[2708], test_mask=[2708])
Number of nodes: 2708
Number of edges: 10556
Average node degree: 3.90
Number of training nodes: 140
Training node label rate: 0.05
Has isolated nodes: False
Has self-loops: False
Is undirected: True


Done!


In [None]:
def get_masked_noise(data,  noise_level=0.15):
  x = data.x
  noise_added_node_num = int(noise_level * x.shape[0])
  chose_random_rows = np.random.choice(x.shape[0], noise_added_node_num, replace=False)
  #print(chose_random_rows)
  mask_rows = torch.zeros(x.shape)
  mask_rows[chose_random_rows,:] = torch.ones(1, x.shape[1])
  noise = (0.1**0.5)*torch.randn(x.shape)
  masked_noise = noise* mask_rows.int().float()

  #print(mask_rows)
  #print(noise)
  #print(masked_noise)
  return x + masked_noise

In [None]:
# remove x% edges
def remove_from_all_edges(data, noise_level = 0.15, bidirectional=False):
  edge_index = data.edge_index

  if bidirectional:
    noise_level /= 2
  
  edge_ratio_to_keep = 1 - noise_level
  num_edges_keep= int(edge_ratio_to_keep * edge_index.shape[1])
  chose_random_edge_indices = np.random.choice(edge_index.shape[1], num_edges_keep, replace=False)

  #print(edge_index[0][chose_random_edge_indices].shape)
  #print(num_edges_keep)
  #print(edge_index.shape[1])

  edge_index_removed = torch.zeros((2,num_edges_keep), dtype=torch.int64)
  edge_index_removed[0] = edge_index[0][chose_random_edge_indices]
  edge_index_removed[1] = edge_index[1][chose_random_edge_indices]

  if bidirectional:
    # find the node names deleted in below indices and delete also for the opposite side.
    final_edges_bidirec_0 = []
    final_edges_bidirec_1 = []

    # create a set with all edges
    edge_maps = set()
    for e_0, e_1 in zip(edge_index_removed[0], edge_index_removed[1]):
      edge_maps.add((e_0.item(),e_1.item()))

    for e_0, e_1 in zip(edge_index_removed[0], edge_index_removed[1]):
      e_0_val = e_0.item()
      e_1_val = e_1.item()
      # check an edge has its other direction, if yes add to the final list, if not skip
      if (e_0_val, e_1_val) in edge_maps and (e_1_val, e_0_val) in edge_maps:
        final_edges_bidirec_0.append(e_0_val) 
        final_edges_bidirec_1.append(e_1_val)

    final_edges_bidirec_0 = torch.tensor(final_edges_bidirec_0)
    final_edges_bidirec_1 = torch.tensor(final_edges_bidirec_1)
    edge_index_removed = torch.zeros((2,len(final_edges_bidirec_1)), dtype=torch.int64)
    edge_index_removed[0] = final_edges_bidirec_0
    edge_index_removed[1] = final_edges_bidirec_1

  return edge_index_removed



In [None]:
# remove x% edges from random k, top_k, bottom_k nodes

def choose_nodes(data, num_nodes, k_nodes, choose_type):
  if choose_type=='random':
    nodes_chosen = torch.from_numpy(np.random.choice(num_nodes, k_nodes, replace=False))
  elif choose_type=='top_k':
    # find indegree edges
    dg = torch_geometric.utils.degree(data.edge_index[0])
    top_k_nodes_degrees, top_k_nodes_indices = torch.topk(dg, k_nodes)
    #print(top_k_nodes_degrees, top_k_nodes_indices)
    nodes_chosen = top_k_nodes_indices
  elif choose_type=='bottom_k':
    # find indegree edges
    dg = torch_geometric.utils.degree(data.edge_index[0])
    bottom_k_nodes_degrees, bottom_k_nodes_indices = torch.topk(dg, k_nodes, largest=False)
    #print(bottom_k_nodes_degrees, bottom_k_nodes_indices)
    nodes_chosen = bottom_k_nodes_indices
  else:
    raise 'choose_type should be from random, top_k, bottom_k'
  return nodes_chosen

# to do loop for each node separately
def remove_edges_from_chosen_nodes(data, nodes_chosen, edges_to_remove_per_node_ratio):
  edges_0_list, edges_1_list = [],[]
  for nc in nodes_chosen:
    edge_0, edge_1 = remove_edge_per_node(data=data, node=nc, 
                                          edges_to_remove_per_node_ratio=edges_to_remove_per_node_ratio)
    edges_0_list.append(edge_0)
    edges_1_list.append(edge_1)

  edges_0 = torch.cat(edges_0_list, 0)
  edges_1 = torch.cat(edges_1_list, 0)

  return edges_0, edges_1

def remove_edge_per_node(data, node, edges_to_remove_per_node_ratio=0.1):
  mask_node_indices = torch.isin(data.edge_index[0], node)

  select_node_edges_0 = data.edge_index[0][mask_node_indices]
  select_node_edges_1 = data.edge_index[1][mask_node_indices]
  #print(select_node_edges_0)
  #print(select_node_edges_1)

  # choose how much of the edges we will remove for this node
  # we decide on number of edges to remove for each node based on the number of edges each node has
  # and by taking the ratio given by edges_to_remove_per_node_ratio
  # note: we use ceil to remove at least one node (unless ratio is 0)
  num_edges_remove = int(math.ceil(edges_to_remove_per_node_ratio* select_node_edges_0.shape[0]))
  # print(num_edges_remove)
  num_edges_keep = select_node_edges_0.shape[0] - num_edges_remove

  # choose random edges to keep, the rest is removed
  chose_random_edge_indices = np.random.choice(select_node_edges_0.shape[0], num_edges_keep, replace=False)
  # print(num_edges_keep)
  
  edge_index_removed = torch.zeros((2,num_edges_keep), dtype=torch.int64)
  edge_node_index_removed_0 = select_node_edges_0[chose_random_edge_indices]
  edge_node_index_removed_1 = select_node_edges_1[chose_random_edge_indices]

  return edge_node_index_removed_0, edge_node_index_removed_1

def remove_edges_from_nodes(data, noise_level = 0.15, k_nodes=10,
                            choose_type='random', bidirectional=False):
  if bidirectional:
    noise_level /= 2

  edge_p_node_ratio_to_keep = 1 - noise_level

  # choose topk, bottomk, or random
  nodes_chosen = choose_nodes(data=data, num_nodes=data.num_nodes, k_nodes=k_nodes, choose_type=choose_type)

  # keep edges from remaining nodes
  mask_node_indices = torch.isin(data.edge_index[0],nodes_chosen)
  index_keep = torch.ones(data.edge_index[0].shape[0], dtype=bool)
  index_keep[mask_node_indices] = False
  edges_to_keep_0 = data.edge_index[0][index_keep]
  edges_to_keep_1 = data.edge_index[1][index_keep]
  #print(edges_to_keep_0) 
  #print(edges_to_keep_1)

  # remove one-directional or bi-directional
  edges_0_kept_chosen_nodes, edges_1_kept_chosen_nodes = remove_edges_from_chosen_nodes(data=data, 
                                                                                        nodes_chosen=nodes_chosen,
                                                                                        edges_to_remove_per_node_ratio=noise_level)
  # concat edges to keep and edges_kept_chosen_nodes
  final_edges_0 = torch.cat([edges_to_keep_0, edges_0_kept_chosen_nodes], 0)
  final_edges_1 = torch.cat([edges_to_keep_1, edges_1_kept_chosen_nodes], 0)

  # do bidirectional here! IF bidirectional set to true remove both directions of the edges.
  if bidirectional:
    # find the node names deleted in below indices and delete also for the opposite side.
    final_edges_bidirec_0 = []
    final_edges_bidirec_1 = []

    # create a set with all edges
    edge_maps = set()
    for e_0, e_1 in zip(final_edges_0, final_edges_1):
      edge_maps.add((e_0.item(),e_1.item()))

    for e_0, e_1 in zip(final_edges_0, final_edges_1):
      e_0_val = e_0.item()
      e_1_val = e_1.item()
      # check an edge has its other direction, if yes add to the final list, if not skip
      if (e_0_val, e_1_val) in edge_maps and (e_1_val, e_0_val) in edge_maps:
        final_edges_bidirec_0.append(e_0_val) 
        final_edges_bidirec_1.append(e_1_val)

    final_edges_bidirec_0 = torch.tensor(final_edges_bidirec_0)
    final_edges_bidirec_1 = torch.tensor(final_edges_bidirec_1)
    final_edges_0 = final_edges_bidirec_0
    final_edges_1 = final_edges_bidirec_1
  edge_index_removed = torch.zeros((2, final_edges_0.shape[0]), dtype=torch.int64)
  edge_index_removed[0] = final_edges_0
  edge_index_removed[1] = final_edges_1

    
  # use TORCH_GEOMETRIC.UTILS.SORT_EDGE_INDEX
  edge_index_removed_sorted = tg_utils.sort_edge_index(edge_index_removed)
  return edge_index_removed_sorted


In [None]:
def add_random_edges(data, noise_level = 0.15, bidirectional=False):
  edge_index = data.edge_index
  num_nodes = data.num_nodes

  if bidirectional:
    noise_level /= 2

  new_edges = edge_index.T
  edge_num = new_edges.shape[0]
  num_of_new_edges = int(edge_num * noise_level)

  for i in range(num_of_new_edges):
    while True:
      new_edge = ((torch.rand(1,2) * 1000000).to(int) % num_nodes)
      new_edge_flip = torch.flip(new_edge, [1])
      new_edge_exist = torch.any(torch.all(torch.eq(new_edges,new_edge),1))
      new_edge_flip_exist = torch.any(torch.all(torch.eq(new_edges,new_edge_flip),1))
      if not new_edge_exist and not new_edge_flip_exist:
        new_edges = torch.cat((new_edges,new_edge), 0)
        if bidirectional:
          new_edges = torch.cat((new_edges,new_edge_flip), 0)
        break
      elif not new_edge_exist:
        new_edges = torch.cat((new_edges,new_edge), 0)
        break
      elif not new_edge_flip_exist:
        new_edges = torch.cat((new_edges,new_edge_flip), 0)
        break

  edge_index_sorted = tg_utils.sort_edge_index(new_edges.T)
  return edge_index_sorted

In [None]:
for added_ratio in [0.15]:
  new_edges = add_random_edges(data,  noise_level=added_ratio, bidirectional=True)
  data[f'edge_index_added_n_{added_ratio}'] = new_edges


In [None]:
## Helper function of add_edges_to_nodes

def add_edges_to_a_node(data, new_edges, node, num_to_add=0, bidirectional=False):
  for i in range(num_to_add):

    # Running till we find the right edge to add
    while True:
      #create a random node. 1000000 is a arbitrary number which can be replaced to any number bigger than data.num_nodes
      new_index = ((torch.rand(1) * 1000000).to(int) % data.num_nodes).item()
      new_edge = torch.tensor([[node, new_index]])
      new_edge_flip = torch.flip(new_edge, [1])
      #check whether the new edge and flip one exists or not 
      new_edge_exist = torch.any(torch.all(torch.eq(new_edges,new_edge),1))
      new_edge_flip_exist = torch.any(torch.all(torch.eq(new_edges,new_edge_flip),1))
      if not new_edge_exist and not new_edge_flip_exist:
        new_edges = torch.cat((new_edges,new_edge), 0)
        if bidirectional:
          new_edges = torch.cat((new_edges,new_edge_flip), 0)
        break
      elif not new_edge_exist:
        new_edges = torch.cat((new_edges,new_edge), 0)
        break
      elif not new_edge_flip_exist:
        new_edges = torch.cat((new_edges,new_edge_flip), 0)
        break
  return new_edges

## Adding x% edges to random k, top k, or bottom k nodes

def add_edges_to_nodes(data, noise_level=0.15, k_nodes=10, chosse_type='random', bidirectional=False):
  if bidirectional:
    noise_level /= 2

  #choose nodes by three different types
  nodes_chosen = choose_nodes(data=data, num_nodes=data.num_nodes, k_nodes=k_nodes, choose_type=chosse_type)
  
  new_edges = data.edge_index.T

  #add new edges to every chosen node
  for node in nodes_chosen:
    edge_num_of_node = torch.isin(data.edge_index, node).to(int).sum()
    edge_num_to_add = int(edge_num_of_node * noise_level)
    
    new_edges = add_edges_to_a_node(data, new_edges, node.item(), edge_num_to_add, bidirectional)
  
  edge_index_sorted = tg_utils.sort_edge_index(new_edges.T)
  return edge_index_sorted

## Experiments

In [None]:
!pip install class-resolver

from torch_geometric.nn import MLP, GCN, GraphSAGE, GAT
from class_resolver import ClassResolver

Collecting class-resolver
  Downloading class_resolver-0.3.4-py3-none-any.whl (20 kB)
Installing collected packages: class-resolver
Successfully installed class-resolver-0.3.4


In [None]:
def get_model(model_name, model_params):
  # add more model from here if needed: https://pytorch-geometric.readthedocs.io/en/latest/modules/nn.html#models
  # you can also check model parameters from above
  model = None
  device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
  if model_name=='MLP':
    model = MLP(**model_params).to(device)
  elif model_name=='GNN':
    model = GCN(**model_params).to(device)
  elif model_name=='GAT':
    model = GAT(**model_params).to(device)
  elif model_name=='Graphsage':
    model = GraphSAGE(**model_params).to(device)
  else:
    raise 'Model names should be within MLP, GNN, GAT, Graphsage'
  return model

In [None]:
# Creating MLP example, add all parameters you want to use to create/tune model in the below dictionary
model_params= {'in_channels':1433, 'hidden_channels':16, 'out_channels':7, 'num_layers':3}
mlp_model = get_model(model_name='MLP', model_params=model_params)
print(mlp_model)

model_params= {'in_channels':1433, 'hidden_channels':16, 'out_channels':7, 'num_layers':3, 'dropout':0.1}
gnn_model = get_model(model_name='GNN', model_params=model_params)
print(gnn_model)


MLP(1433, 16, 16, 7)
GCN(1433, 7, num_layers=3)


In [None]:
def get_noise_function(noise_info):
  '''
  This function directly returns the function we created for different noise logics
  Change the name of the function below if noise logic function name is changed.
  '''
  noise_fn = None 
  if noise_info['noise_type']=='feature_noise':
    noise_fn = get_masked_noise
  elif noise_info['noise_type']=='edge_removal':
    if noise_info['strategy']=='all_edges' :
      noise_fn = remove_from_all_edges
    elif noise_info['strategy']=='nodes' :
      noise_fn = remove_edges_from_nodes
  elif noise_info['noise_type']=='edge_addition':
    if noise_info['strategy']=='all_edges' :
      noise_fn = add_random_edges
    elif noise_info['strategy']=='nodes' :
      noise_fn = add_edges_to_nodes
  else:
    raise 'Noise type should be chosen from feature_noise, edge_removal, edge_addition' 
  return noise_fn
  

def create_noised_data_for_experiment(data, noise_info):
  '''
  Create noised data outside of experiment, so that you can use same data in different experiments
  '''
  noise_data_names = []
  noise_fn = get_noise_function(noise_info)
  
  if noise_info.get('strategy')=='nodes':
    add_prefix = f"{noise_info.get('strategy', '')}_choose_type-{noise_info['params'].get('choose_type', '')}_bidirec-{noise_info['params'].get('bidirectional', '')}"
  elif noise_info.get('strategy')=='all_edges':
    add_prefix = f"{noise_info.get('strategy', '')}_"
  else: 
    add_prefix=''

  if noise_info.get('strategy')=='nodes':
    for k_nodes in noise_info['k_nodes_list']:
      for noise_level in noise_info['noise_levels']:
        noise_data_name = f'{noise_info["noise_type"]}_{add_prefix}_knodes-{k_nodes}_noiselvl-{noise_level}'
        noise_info['params']['noise_level'] = noise_level
        noise_info['params']['k_nodes'] = k_nodes
        noised_data = noise_fn(**noise_info['params'])
        data[noise_data_name] = noised_data
        noise_data_names.append(noise_data_name)

  else: # all_edges and gaussian noisee case
    for noise_level in noise_info['noise_levels']:
      noise_data_name = f'{noise_info["noise_type"]}_{add_prefix}_noiselvl-{noise_level}'
      noise_info['params']['noise_level'] = noise_level
      noised_data = noise_fn(**noise_info['params'])
      data[noise_data_name] = noised_data
      noise_data_names.append(noise_data_name)
  return noise_data_names
 
# reuse train and test
def train(model, optimizer, x_type='x', edge_type='edge_index'):
  criterion = torch.nn.CrossEntropyLoss()
  model.train()
  optimizer.zero_grad()  # Clear gradients.
  out = model(data[x_type], data[edge_type])  # Perform a single forward pass.
  loss = criterion(out[data.train_mask], data.y[data.train_mask])  # Compute the loss solely based on the training nodes.
  loss.backward()  # Derive gradients.
  optimizer.step()  # Update parameters based on gradients.
  return loss

def test(model, x_type='x', edge_type='edge_index'):
  model.eval()
  out = model(data[x_type], data[edge_type])
  pred = out.argmax(dim=1)  # Use the class with highest probability.
  test_correct = pred[data.test_mask] == data.y[data.test_mask]  # Check against ground-truth labels.
  test_acc = int(test_correct.sum()) / int(data.test_mask.sum())  # Derive ratio of correct predictions.
  return test_acc

def validation(model, x_type='x', edge_type='edge_index'):
  model.eval()
  out = model(data[x_type], data[edge_type])
  pred = out.argmax(dim=1)  # Use the class with highest probability.
  val_correct = pred[data.val_mask] == data.y[data.val_mask]  # Check against ground-truth labels.
  val_acc = int(val_correct.sum()) / int(data.val_mask.sum())  # Derive ratio of correct predictions.
  return val_acc

def experiment(dataset_name, model_names, model_params, data, x_types, edge_types, noise_info, opt_params,  num_epochs=50, repeat_num=1, print_updates=False,print_updates_detail=False):
  '''
  Assumes the noisy data is already created and inside the data object (so that we can use same data sample for different models to compare)
  '''
  exp_count = len(x_types)*len(edge_types)*len(model_names)
  count=0
  res = []
  for model_name in model_names:
    for x_type in x_types:
      for ed_type in edge_types:
        count+=1
        test_accs = []
        for exp_num in range(1, repeat_num+1): # we will repeat experiment repeat many times, to increase results reliability
           
          model =  get_model(model_name=model_name, model_params=model_params[model_name])
          optimizer = torch.optim.Adam(model.parameters(), lr=opt_params[model_name]['lr'], weight_decay=opt_params[model_name]['weight_decay']) 
          for epoch in range(num_epochs):
              loss = train(model, optimizer, x_type=x_type, edge_type=ed_type)
              # print(f'Epoch: {epoch:03d}, Loss: {loss:.4f}')

          # we test our results in original data, no noise added ones 
          # if you want to test on noised data change them to x_type=x_type and edge_type=ed_type below
          test_acc = test(model, x_type='x',edge_type='edge_index') 
          test_accs.append(test_acc)
          if print_updates_detail:
            print(f'Exp_num:{exp_num} Test Accuracy: {test_acc:.4f}')

        mean_test_acc = round(np.mean(test_accs), 3)
        # TODO we can add other metrics such as runtime to log in here later
        exp_res = {'dataset_name': dataset_name,
                  'model_name':model_name, 'x_type':x_type, 
                'edge_type':ed_type, 'mean_test_accuracy':str(mean_test_acc), 'test_accuracies': ','.join([str(acc) for acc in test_accs]),
                  'num_epochs':num_epochs, 'model_params': model_params[model_name],
                  'noise_info':noise_info, 'repeat_num':repeat_num} # TODO add more info on experiment noise_info
        res.append(exp_res)
        if print_updates:
            print(f'Run {model_name}: {count}/{exp_count}: {x_type} - {ed_type} Avg. Test Accuracy: {mean_test_acc}')
        
        

  res_df = pd.DataFrame(res, columns=exp_res.keys())
  return res_df

In [None]:
dataset_name='Cora'

In [None]:
def nodes_percent_to_num_nodes(percents, num_nodes):
  return [round(pct*num_nodes) for pct in percents]

In [None]:
# Optimized Model Params

model_params = {
     'MLP': {'in_channels':1433, 'hidden_channels':16, 'bias': True, 'dropout': 0.5 },
    'GNN':{'in_channels':1433, 'hidden_channels':30, 'out_channels':7, 'num_layers':2, 'dropout':0.2, 'aggr' :'add'},
    'GAT':{'in_channels':1433, 'hidden_channels':15, 'out_channels':7, 'num_layers':2, 'dropout':0.2},
    'Graphsage':{'in_channels':1433, 'hidden_channels':50, 'out_channels':7, 'num_layers':2, 'dropout':0.3}
}
opt_params = {
    'MLP':{'lr':0.01, 'weight_decay':5e-4},
    'GNN':{'lr':0.05, 'weight_decay':1e-6}, 
    'GAT':{'lr':0.05, 'weight_decay':5e-4}, 
    'Graphsage':{'lr':0.05, 'weight_decay':5e-4}
}


### Experiment 1 - Add gaussian noise

In [None]:
# noise data creation example: remove edges -- strategy 2: remove from selected nodes

# **** DONT forget to change this *****
EXPERIMENT_FILENAME = './results_v3/Experiment_1_add_gaussian_noise.csv'
# **** DONT forget to change this *****

data = data.to('cpu')

feature_noise_params = {'data':data}
noise_info = {'noise_type':'feature_noise', 'params':feature_noise_params, \
              'noise_levels':[0, 0.01, 0.05, 0.1, 0.15, 0.3, 0.45, 0.6, 0.9, 0.95, 0.99, 1 ],\
              'noise_param_name':'noise_level'}

noise_var_names_feat_exp = create_noised_data_for_experiment(data=data, noise_info=noise_info)
print(data)

# Experiment on edge removal - strategy- nodes
# Put names of the models we want to use for this experiment type
data = data.to('cuda:0')
model_names = ['GNN', 'GAT', 'Graphsage']
exp_rm_random_nodes_df = experiment(dataset_name=dataset_name, model_names=model_names,
                                    model_params=model_params, opt_params=opt_params,
                                    data=data, x_types=noise_var_names_feat_exp, edge_types=['edge_index'],
                                    noise_info=noise_info, num_epochs=25, repeat_num=3, print_updates=True)

# Do for node strategies only
# exp_rm_random_nodes_df[['choose_type', 'bidirectional','k_nodes','noise_level']] = exp_rm_random_nodes_df.apply(lambda x: [s.split('-')[-1] for s in x['edge_type'].split('_') if '-' in s], axis=1, result_type='expand')
data = data.to('cpu')
exp_rm_random_nodes_df.to_csv(EXPERIMENT_FILENAME)
exp_rm_random_nodes_df

Data(x=[2708, 1433], edge_index=[2, 10556], y=[2708], train_mask=[2708], val_mask=[2708], test_mask=[2708], edge_index_added_n_0.15=[2, 12138], feature_noise__noiselvl-0=[2708, 1433], feature_noise__noiselvl-0.01=[2708, 1433], feature_noise__noiselvl-0.05=[2708, 1433], feature_noise__noiselvl-0.1=[2708, 1433], feature_noise__noiselvl-0.15=[2708, 1433], feature_noise__noiselvl-0.3=[2708, 1433], feature_noise__noiselvl-0.45=[2708, 1433], feature_noise__noiselvl-0.6=[2708, 1433], feature_noise__noiselvl-0.9=[2708, 1433], feature_noise__noiselvl-0.95=[2708, 1433], feature_noise__noiselvl-0.99=[2708, 1433], feature_noise__noiselvl-1=[2708, 1433])
Run GNN: 1/36: feature_noise__noiselvl-0 - edge_index Avg. Test Accuracy: 0.795
Run GNN: 2/36: feature_noise__noiselvl-0.01 - edge_index Avg. Test Accuracy: 0.785
Run GNN: 3/36: feature_noise__noiselvl-0.05 - edge_index Avg. Test Accuracy: 0.724
Run GNN: 4/36: feature_noise__noiselvl-0.1 - edge_index Avg. Test Accuracy: 0.457
Run GNN: 5/36: feature

Unnamed: 0,dataset_name,model_name,x_type,edge_type,mean_test_accuracy,test_accuracies,num_epochs,model_params,noise_info,repeat_num
0,Cora,GNN,feature_noise__noiselvl-0,edge_index,0.795,"0.783,0.796,0.805",25,"{'in_channels': 1433, 'hidden_channels': 30, '...","{'noise_type': 'feature_noise', 'params': {'da...",3
1,Cora,GNN,feature_noise__noiselvl-0.01,edge_index,0.785,"0.769,0.785,0.8",25,"{'in_channels': 1433, 'hidden_channels': 30, '...","{'noise_type': 'feature_noise', 'params': {'da...",3
2,Cora,GNN,feature_noise__noiselvl-0.05,edge_index,0.724,"0.723,0.712,0.737",25,"{'in_channels': 1433, 'hidden_channels': 30, '...","{'noise_type': 'feature_noise', 'params': {'da...",3
3,Cora,GNN,feature_noise__noiselvl-0.1,edge_index,0.457,"0.41,0.505,0.457",25,"{'in_channels': 1433, 'hidden_channels': 30, '...","{'noise_type': 'feature_noise', 'params': {'da...",3
4,Cora,GNN,feature_noise__noiselvl-0.15,edge_index,0.597,"0.662,0.566,0.564",25,"{'in_channels': 1433, 'hidden_channels': 30, '...","{'noise_type': 'feature_noise', 'params': {'da...",3
5,Cora,GNN,feature_noise__noiselvl-0.3,edge_index,0.346,"0.414,0.332,0.293",25,"{'in_channels': 1433, 'hidden_channels': 30, '...","{'noise_type': 'feature_noise', 'params': {'da...",3
6,Cora,GNN,feature_noise__noiselvl-0.45,edge_index,0.085,"0.065,0.108,0.083",25,"{'in_channels': 1433, 'hidden_channels': 30, '...","{'noise_type': 'feature_noise', 'params': {'da...",3
7,Cora,GNN,feature_noise__noiselvl-0.6,edge_index,0.152,"0.144,0.167,0.145",25,"{'in_channels': 1433, 'hidden_channels': 30, '...","{'noise_type': 'feature_noise', 'params': {'da...",3
8,Cora,GNN,feature_noise__noiselvl-0.9,edge_index,0.199,"0.313,0.14,0.144",25,"{'in_channels': 1433, 'hidden_channels': 30, '...","{'noise_type': 'feature_noise', 'params': {'da...",3
9,Cora,GNN,feature_noise__noiselvl-0.95,edge_index,0.149,"0.159,0.185,0.103",25,"{'in_channels': 1433, 'hidden_channels': 30, '...","{'noise_type': 'feature_noise', 'params': {'da...",3


### Experiment 2 - Remove edges randomly

In [None]:
# noise data creation example: remove edges -- strategy 2: remove from selected nodes

# **** DONT forget to change this *****
EXPERIMENT_FILENAME = './results_v3/Experiment_2_remove_random_edges.csv'
# **** DONT forget to change this *****

data = data.to('cpu')

remove_edge_params = {'data':data, 'bidirectional':True}
noise_info = {'noise_type':'edge_removal', 'strategy':'all_edges','params':remove_edge_params, \
              'noise_levels':[0, 0.01, 0.05, 0.1, 0.15, 0.3, 0.45, 0.6, 0.9, 0.95, 0.99, 1 ],\
              'noise_param_name':'edges_to_remove_ratio'}

exp_rm_random_edges = create_noised_data_for_experiment(data=data, noise_info=noise_info)
print(data)

# Experiment on edge removal - strategy- nodes
# Put names of the models we want to use for this experiment type
data = data.to('cuda:0')
model_names = ['GNN', 'GAT', 'Graphsage']
exp_rm_random_nodes_df = experiment(dataset_name=dataset_name, model_names=model_names,
                                    model_params=model_params, opt_params=opt_params,
                                    data=data, x_types=['x'], edge_types=exp_rm_random_edges,
                                    noise_info=noise_info, num_epochs=25, repeat_num=3, print_updates=True)

# Do for node strategies only
# exp_rm_random_nodes_df[['choose_type', 'bidirectional','k_nodes','noise_level']] = exp_rm_random_nodes_df.apply(lambda x: [s.split('-')[-1] for s in x['edge_type'].split('_') if '-' in s], axis=1, result_type='expand')
data = data.to('cpu')
exp_rm_random_nodes_df.to_csv(EXPERIMENT_FILENAME)
exp_rm_random_nodes_df

Data(x=[2708, 1433], edge_index=[2, 10556], y=[2708], train_mask=[2708], val_mask=[2708], test_mask=[2708], edge_index_added_n_0.15=[2, 12138], feature_noise__noiselvl-0=[2708, 1433], feature_noise__noiselvl-0.01=[2708, 1433], feature_noise__noiselvl-0.05=[2708, 1433], feature_noise__noiselvl-0.1=[2708, 1433], feature_noise__noiselvl-0.15=[2708, 1433], feature_noise__noiselvl-0.3=[2708, 1433], feature_noise__noiselvl-0.45=[2708, 1433], feature_noise__noiselvl-0.6=[2708, 1433], feature_noise__noiselvl-0.9=[2708, 1433], feature_noise__noiselvl-0.95=[2708, 1433], feature_noise__noiselvl-0.99=[2708, 1433], feature_noise__noiselvl-1=[2708, 1433], edge_removal_all_edges__noiselvl-0=[2, 10556], edge_removal_all_edges__noiselvl-0.01=[2, 10450], edge_removal_all_edges__noiselvl-0.05=[2, 10032], edge_removal_all_edges__noiselvl-0.1=[2, 9530], edge_removal_all_edges__noiselvl-0.15=[2, 9032], edge_removal_all_edges__noiselvl-0.3=[2, 7652], edge_removal_all_edges__noiselvl-0.45=[2, 6340], edge_remo

Unnamed: 0,dataset_name,model_name,x_type,edge_type,mean_test_accuracy,test_accuracies,num_epochs,model_params,noise_info,repeat_num
0,Cora,GNN,x,edge_removal_all_edges__noiselvl-0,0.805,"0.814,0.804,0.796",25,"{'in_channels': 1433, 'hidden_channels': 30, '...","{'noise_type': 'edge_removal', 'strategy': 'al...",3
1,Cora,GNN,x,edge_removal_all_edges__noiselvl-0.01,0.798,"0.799,0.793,0.803",25,"{'in_channels': 1433, 'hidden_channels': 30, '...","{'noise_type': 'edge_removal', 'strategy': 'al...",3
2,Cora,GNN,x,edge_removal_all_edges__noiselvl-0.05,0.789,"0.799,0.774,0.794",25,"{'in_channels': 1433, 'hidden_channels': 30, '...","{'noise_type': 'edge_removal', 'strategy': 'al...",3
3,Cora,GNN,x,edge_removal_all_edges__noiselvl-0.1,0.793,"0.778,0.803,0.798",25,"{'in_channels': 1433, 'hidden_channels': 30, '...","{'noise_type': 'edge_removal', 'strategy': 'al...",3
4,Cora,GNN,x,edge_removal_all_edges__noiselvl-0.15,0.791,"0.797,0.801,0.776",25,"{'in_channels': 1433, 'hidden_channels': 30, '...","{'noise_type': 'edge_removal', 'strategy': 'al...",3
5,Cora,GNN,x,edge_removal_all_edges__noiselvl-0.3,0.793,"0.787,0.798,0.795",25,"{'in_channels': 1433, 'hidden_channels': 30, '...","{'noise_type': 'edge_removal', 'strategy': 'al...",3
6,Cora,GNN,x,edge_removal_all_edges__noiselvl-0.45,0.77,"0.785,0.769,0.757",25,"{'in_channels': 1433, 'hidden_channels': 30, '...","{'noise_type': 'edge_removal', 'strategy': 'al...",3
7,Cora,GNN,x,edge_removal_all_edges__noiselvl-0.6,0.772,"0.768,0.783,0.765",25,"{'in_channels': 1433, 'hidden_channels': 30, '...","{'noise_type': 'edge_removal', 'strategy': 'al...",3
8,Cora,GNN,x,edge_removal_all_edges__noiselvl-0.9,0.764,"0.761,0.767,0.764",25,"{'in_channels': 1433, 'hidden_channels': 30, '...","{'noise_type': 'edge_removal', 'strategy': 'al...",3
9,Cora,GNN,x,edge_removal_all_edges__noiselvl-0.95,0.77,"0.778,0.766,0.765",25,"{'in_channels': 1433, 'hidden_channels': 30, '...","{'noise_type': 'edge_removal', 'strategy': 'al...",3


### Experiment 3 - Add edges randomly

In [None]:
# noise data creation example: remove edges -- strategy 2: remove from selected nodes

# **** DONT forget to change this *****
EXPERIMENT_FILENAME = './results_v3/Experiment_3_add_random_edges.csv'
# **** DONT forget to change this *****

data = data.to('cpu')

add_edge_params = {'data':data, 'bidirectional':True}
# edge_addition_params = {'data':data}
noise_info = {'noise_type':'edge_addition', 'strategy':'all_edges','params':add_edge_params, \
              'noise_levels':[0, 0.01, 0.05, 0.1, 0.15, 0.3, 0.45, 0.6, 0.9, 0.95, 0.99, 1 ],\
              'noise_param_name':'edges_to_add_ratio'}

exp_add_random_edges = create_noised_data_for_experiment(data=data, noise_info=noise_info)
print(data)

# Experiment on edge removal - strategy- nodes
# Put names of the models we want to use for this experiment type
data = data.to('cuda:0')
model_names = ['GNN', 'GAT', 'Graphsage']
exp_rm_random_nodes_df = experiment(dataset_name=dataset_name, model_names=model_names,
                                    model_params=model_params, opt_params=opt_params,
                                    data=data, x_types=['x'], edge_types=exp_add_random_edges,
                                    noise_info=noise_info, num_epochs=25, repeat_num=3, print_updates=True)

# Do for node strategies only
# exp_rm_random_nodes_df[['choose_type', 'bidirectional','k_nodes','noise_level']] = exp_rm_random_nodes_df.apply(lambda x: [s.split('-')[-1] for s in x['edge_type'].split('_') if '-' in s], axis=1, result_type='expand')
data = data.to('cpu')
exp_rm_random_nodes_df.to_csv(EXPERIMENT_FILENAME)
exp_rm_random_nodes_df

Data(x=[2708, 1433], edge_index=[2, 10556], y=[2708], train_mask=[2708], val_mask=[2708], test_mask=[2708], edge_index_added_n_0.15=[2, 12138], feature_noise__noiselvl-0=[2708, 1433], feature_noise__noiselvl-0.01=[2708, 1433], feature_noise__noiselvl-0.05=[2708, 1433], feature_noise__noiselvl-0.1=[2708, 1433], feature_noise__noiselvl-0.15=[2708, 1433], feature_noise__noiselvl-0.3=[2708, 1433], feature_noise__noiselvl-0.45=[2708, 1433], feature_noise__noiselvl-0.6=[2708, 1433], feature_noise__noiselvl-0.9=[2708, 1433], feature_noise__noiselvl-0.95=[2708, 1433], feature_noise__noiselvl-0.99=[2708, 1433], feature_noise__noiselvl-1=[2708, 1433], edge_removal_all_edges__noiselvl-0=[2, 10556], edge_removal_all_edges__noiselvl-0.01=[2, 10450], edge_removal_all_edges__noiselvl-0.05=[2, 10032], edge_removal_all_edges__noiselvl-0.1=[2, 9530], edge_removal_all_edges__noiselvl-0.15=[2, 9032], edge_removal_all_edges__noiselvl-0.3=[2, 7652], edge_removal_all_edges__noiselvl-0.45=[2, 6340], edge_remo

Unnamed: 0,dataset_name,model_name,x_type,edge_type,mean_test_accuracy,test_accuracies,num_epochs,model_params,noise_info,repeat_num
0,Cora,GNN,x,edge_addition_all_edges__noiselvl-0,0.802,"0.807,0.807,0.791",25,"{'in_channels': 1433, 'hidden_channels': 30, '...","{'noise_type': 'edge_addition', 'strategy': 'a...",3
1,Cora,GNN,x,edge_addition_all_edges__noiselvl-0.01,0.791,"0.79,0.794,0.788",25,"{'in_channels': 1433, 'hidden_channels': 30, '...","{'noise_type': 'edge_addition', 'strategy': 'a...",3
2,Cora,GNN,x,edge_addition_all_edges__noiselvl-0.05,0.799,"0.788,0.803,0.805",25,"{'in_channels': 1433, 'hidden_channels': 30, '...","{'noise_type': 'edge_addition', 'strategy': 'a...",3
3,Cora,GNN,x,edge_addition_all_edges__noiselvl-0.1,0.796,"0.793,0.797,0.797",25,"{'in_channels': 1433, 'hidden_channels': 30, '...","{'noise_type': 'edge_addition', 'strategy': 'a...",3
4,Cora,GNN,x,edge_addition_all_edges__noiselvl-0.15,0.787,"0.792,0.78,0.789",25,"{'in_channels': 1433, 'hidden_channels': 30, '...","{'noise_type': 'edge_addition', 'strategy': 'a...",3
5,Cora,GNN,x,edge_addition_all_edges__noiselvl-0.3,0.774,"0.763,0.784,0.775",25,"{'in_channels': 1433, 'hidden_channels': 30, '...","{'noise_type': 'edge_addition', 'strategy': 'a...",3
6,Cora,GNN,x,edge_addition_all_edges__noiselvl-0.45,0.763,"0.76,0.757,0.771",25,"{'in_channels': 1433, 'hidden_channels': 30, '...","{'noise_type': 'edge_addition', 'strategy': 'a...",3
7,Cora,GNN,x,edge_addition_all_edges__noiselvl-0.6,0.741,"0.703,0.761,0.758",25,"{'in_channels': 1433, 'hidden_channels': 30, '...","{'noise_type': 'edge_addition', 'strategy': 'a...",3
8,Cora,GNN,x,edge_addition_all_edges__noiselvl-0.9,0.745,"0.749,0.734,0.751",25,"{'in_channels': 1433, 'hidden_channels': 30, '...","{'noise_type': 'edge_addition', 'strategy': 'a...",3
9,Cora,GNN,x,edge_addition_all_edges__noiselvl-0.95,0.746,"0.737,0.756,0.746",25,"{'in_channels': 1433, 'hidden_channels': 30, '...","{'noise_type': 'edge_addition', 'strategy': 'a...",3


### Experiment 4 - Remove Edges: Random - nodes

In [None]:
# noise data creation example: remove edges -- strategy 2: remove from selected nodes

# **** DONT forget to change this *****
EXPERIMENT_FILENAME = './results_v3/Experiment_4_remove_random_nodes.csv'
# **** DONT forget to change this *****

nodes_percent = [0.05, 0.1, 0.2]  # [0.05, 0.1, 0.2, 0.25, 0.4, 0.5]
k_nodes_list = nodes_percent_to_num_nodes(percents=nodes_percent, num_nodes=data.num_nodes)

data = data.to('cpu')
remove_edge_from_nodes_params = {'data':data, 'bidirectional':True, 'choose_type':'random'}

noise_info = {'noise_type':'edge_removal', 'strategy':'nodes', 'params':remove_edge_from_nodes_params, 
              'noise_levels':[0, 0.01, 0.05, 0.1, 0.15, 0.3, 0.45, 0.6, 0.9, 0.95, 0.99, 1 ], 
              'k_nodes_list':k_nodes_list,
              'noise_param_name':'noise_level'}

exp_rm_random_nodes_var_names = create_noised_data_for_experiment(data=data, noise_info=noise_info)
print(data)

# Experiment on edge removal - strategy- nodes
# Put names of the models we want to use for this experiment type
data = data.to('cuda:0')
model_names = ['GNN', 'GAT', 'Graphsage']
exp_rm_random_nodes_df = experiment(dataset_name=dataset_name, model_names=model_names,
                                    model_params=model_params, opt_params=opt_params,
                                    data=data, x_types=['x'], edge_types=exp_rm_random_nodes_var_names,
                                    noise_info=noise_info, num_epochs=25, repeat_num=3, print_updates=True)

# Do for node strategies only
exp_rm_random_nodes_df[['choose_type', 'bidirectional','k_nodes','noise_level']] = exp_rm_random_nodes_df.apply(lambda x: [s.split('-')[-1] for s in x['edge_type'].split('_') if '-' in s], axis=1, result_type='expand')
data = data.to('cpu')
exp_rm_random_nodes_df.to_csv(EXPERIMENT_FILENAME)
exp_rm_random_nodes_df

Data(x=[2708, 1433], edge_index=[2, 10556], y=[2708], train_mask=[2708], val_mask=[2708], test_mask=[2708], edge_index_added_n_0.15=[2, 12138], feature_noise__noiselvl-0=[2708, 1433], feature_noise__noiselvl-0.01=[2708, 1433], feature_noise__noiselvl-0.05=[2708, 1433], feature_noise__noiselvl-0.1=[2708, 1433], feature_noise__noiselvl-0.15=[2708, 1433], feature_noise__noiselvl-0.3=[2708, 1433], feature_noise__noiselvl-0.45=[2708, 1433], feature_noise__noiselvl-0.6=[2708, 1433], feature_noise__noiselvl-0.9=[2708, 1433], feature_noise__noiselvl-0.95=[2708, 1433], feature_noise__noiselvl-0.99=[2708, 1433], feature_noise__noiselvl-1=[2708, 1433], edge_removal_all_edges__noiselvl-0=[2, 10556], edge_removal_all_edges__noiselvl-0.01=[2, 10450], edge_removal_all_edges__noiselvl-0.05=[2, 10032], edge_removal_all_edges__noiselvl-0.1=[2, 9530], edge_removal_all_edges__noiselvl-0.15=[2, 9032], edge_removal_all_edges__noiselvl-0.3=[2, 7652], edge_removal_all_edges__noiselvl-0.45=[2, 6340], edge_remo

Unnamed: 0,dataset_name,model_name,x_type,edge_type,mean_test_accuracy,test_accuracies,num_epochs,model_params,noise_info,repeat_num,choose_type,bidirectional,k_nodes,noise_level
0,Cora,GNN,x,edge_removal_nodes_choose_type-random_bidirec-...,0.804,"0.804,0.8,0.808",25,"{'in_channels': 1433, 'hidden_channels': 30, '...","{'noise_type': 'edge_removal', 'strategy': 'no...",3,random,True,135,0
1,Cora,GNN,x,edge_removal_nodes_choose_type-random_bidirec-...,0.795,"0.799,0.801,0.785",25,"{'in_channels': 1433, 'hidden_channels': 30, '...","{'noise_type': 'edge_removal', 'strategy': 'no...",3,random,True,135,0.01
2,Cora,GNN,x,edge_removal_nodes_choose_type-random_bidirec-...,0.801,"0.805,0.793,0.804",25,"{'in_channels': 1433, 'hidden_channels': 30, '...","{'noise_type': 'edge_removal', 'strategy': 'no...",3,random,True,135,0.05
3,Cora,GNN,x,edge_removal_nodes_choose_type-random_bidirec-...,0.802,"0.794,0.806,0.806",25,"{'in_channels': 1433, 'hidden_channels': 30, '...","{'noise_type': 'edge_removal', 'strategy': 'no...",3,random,True,135,0.1
4,Cora,GNN,x,edge_removal_nodes_choose_type-random_bidirec-...,0.797,"0.78,0.812,0.8",25,"{'in_channels': 1433, 'hidden_channels': 30, '...","{'noise_type': 'edge_removal', 'strategy': 'no...",3,random,True,135,0.15
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
103,Cora,Graphsage,x,edge_removal_nodes_choose_type-random_bidirec-...,0.756,"0.762,0.745,0.761",25,"{'in_channels': 1433, 'hidden_channels': 50, '...","{'noise_type': 'edge_removal', 'strategy': 'no...",3,random,True,542,0.6
104,Cora,Graphsage,x,edge_removal_nodes_choose_type-random_bidirec-...,0.763,"0.74,0.771,0.777",25,"{'in_channels': 1433, 'hidden_channels': 50, '...","{'noise_type': 'edge_removal', 'strategy': 'no...",3,random,True,542,0.9
105,Cora,Graphsage,x,edge_removal_nodes_choose_type-random_bidirec-...,0.769,"0.765,0.777,0.764",25,"{'in_channels': 1433, 'hidden_channels': 50, '...","{'noise_type': 'edge_removal', 'strategy': 'no...",3,random,True,542,0.95
106,Cora,Graphsage,x,edge_removal_nodes_choose_type-random_bidirec-...,0.781,"0.786,0.785,0.773",25,"{'in_channels': 1433, 'hidden_channels': 50, '...","{'noise_type': 'edge_removal', 'strategy': 'no...",3,random,True,542,0.99


### Experiment 5 - Remove Edges: topk - nodes

In [None]:
# noise data creation example: remove edges -- strategy 2: remove from selected nodes

# **** DONT forget to change this *****
EXPERIMENT_FILENAME = './results_v3/Experiment_5_remove_topk_nodes.csv'
# **** DONT forget to change this *****

nodes_percent = [0.05, 0.1, 0.2]  # [0.05, 0.1, 0.2, 0.25, 0.4, 0.5]
k_nodes_list = nodes_percent_to_num_nodes(percents=nodes_percent, num_nodes=data.num_nodes)

data = data.to('cpu')
remove_edge_from_nodes_params = {'data':data, 'bidirectional':True, 'choose_type':'top_k'}

noise_info = {'noise_type':'edge_removal', 'strategy':'nodes', 'params':remove_edge_from_nodes_params, 
              'noise_levels':[0, 0.01, 0.05, 0.1, 0.15, 0.3, 0.45, 0.6, 0.9, 0.95, 0.99, 1 ], 
              'k_nodes_list':k_nodes_list,
              'noise_param_name':'noise_level'}

exp_rm_random_nodes_var_names = create_noised_data_for_experiment(data=data, noise_info=noise_info)
print(data)

# Experiment on edge removal - strategy- nodes
# Put names of the models we want to use for this experiment type
data = data.to('cuda:0')
model_names = ['GNN', 'GAT', 'Graphsage']
exp_rm_random_nodes_df = experiment(dataset_name=dataset_name, model_names=model_names,
                                    model_params=model_params, opt_params=opt_params,
                                    data=data, x_types=['x'], edge_types=exp_rm_random_nodes_var_names,
                                    noise_info=noise_info, num_epochs=25, repeat_num=3, print_updates=True)

# Do for node strategies only
exp_rm_random_nodes_df[['choose_type', 'bidirectional','k_nodes','noise_level']] = exp_rm_random_nodes_df.apply(lambda x: [s.split('-')[-1] for s in x['edge_type'].split('_') if '-' in s], axis=1, result_type='expand')
data = data.to('cpu')
exp_rm_random_nodes_df.to_csv(EXPERIMENT_FILENAME)
exp_rm_random_nodes_df

Data(x=[2708, 1433], edge_index=[2, 10556], y=[2708], train_mask=[2708], val_mask=[2708], test_mask=[2708], edge_index_added_n_0.15=[2, 12138], feature_noise__noiselvl-0=[2708, 1433], feature_noise__noiselvl-0.01=[2708, 1433], feature_noise__noiselvl-0.05=[2708, 1433], feature_noise__noiselvl-0.1=[2708, 1433], feature_noise__noiselvl-0.15=[2708, 1433], feature_noise__noiselvl-0.3=[2708, 1433], feature_noise__noiselvl-0.45=[2708, 1433], feature_noise__noiselvl-0.6=[2708, 1433], feature_noise__noiselvl-0.9=[2708, 1433], feature_noise__noiselvl-0.95=[2708, 1433], feature_noise__noiselvl-0.99=[2708, 1433], feature_noise__noiselvl-1=[2708, 1433], edge_removal_all_edges__noiselvl-0=[2, 10556], edge_removal_all_edges__noiselvl-0.01=[2, 10450], edge_removal_all_edges__noiselvl-0.05=[2, 10032], edge_removal_all_edges__noiselvl-0.1=[2, 9530], edge_removal_all_edges__noiselvl-0.15=[2, 9032], edge_removal_all_edges__noiselvl-0.3=[2, 7652], edge_removal_all_edges__noiselvl-0.45=[2, 6340], edge_remo

Unnamed: 0,dataset_name,model_name,x_type,edge_type,mean_test_accuracy,test_accuracies,num_epochs,model_params,noise_info,repeat_num,choose_type,bidirectional,k_nodes,noise_level
0,Cora,GNN,x,edge_removal_nodes_choose_type-top_k_bidirec-T...,0.8,"0.805,0.801,0.795",25,"{'in_channels': 1433, 'hidden_channels': 30, '...","{'noise_type': 'edge_removal', 'strategy': 'no...",3,top,True,135,0
1,Cora,GNN,x,edge_removal_nodes_choose_type-top_k_bidirec-T...,0.791,"0.794,0.799,0.781",25,"{'in_channels': 1433, 'hidden_channels': 30, '...","{'noise_type': 'edge_removal', 'strategy': 'no...",3,top,True,135,0.01
2,Cora,GNN,x,edge_removal_nodes_choose_type-top_k_bidirec-T...,0.8,"0.797,0.804,0.799",25,"{'in_channels': 1433, 'hidden_channels': 30, '...","{'noise_type': 'edge_removal', 'strategy': 'no...",3,top,True,135,0.05
3,Cora,GNN,x,edge_removal_nodes_choose_type-top_k_bidirec-T...,0.787,"0.772,0.792,0.797",25,"{'in_channels': 1433, 'hidden_channels': 30, '...","{'noise_type': 'edge_removal', 'strategy': 'no...",3,top,True,135,0.1
4,Cora,GNN,x,edge_removal_nodes_choose_type-top_k_bidirec-T...,0.795,"0.802,0.798,0.785",25,"{'in_channels': 1433, 'hidden_channels': 30, '...","{'noise_type': 'edge_removal', 'strategy': 'no...",3,top,True,135,0.15
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
103,Cora,Graphsage,x,edge_removal_nodes_choose_type-top_k_bidirec-T...,0.779,"0.782,0.767,0.787",25,"{'in_channels': 1433, 'hidden_channels': 50, '...","{'noise_type': 'edge_removal', 'strategy': 'no...",3,top,True,542,0.6
104,Cora,Graphsage,x,edge_removal_nodes_choose_type-top_k_bidirec-T...,0.766,"0.754,0.765,0.779",25,"{'in_channels': 1433, 'hidden_channels': 50, '...","{'noise_type': 'edge_removal', 'strategy': 'no...",3,top,True,542,0.9
105,Cora,Graphsage,x,edge_removal_nodes_choose_type-top_k_bidirec-T...,0.762,"0.757,0.764,0.766",25,"{'in_channels': 1433, 'hidden_channels': 50, '...","{'noise_type': 'edge_removal', 'strategy': 'no...",3,top,True,542,0.95
106,Cora,Graphsage,x,edge_removal_nodes_choose_type-top_k_bidirec-T...,0.767,"0.765,0.772,0.765",25,"{'in_channels': 1433, 'hidden_channels': 50, '...","{'noise_type': 'edge_removal', 'strategy': 'no...",3,top,True,542,0.99


### Experiment 6 - Remove Edges: Bottom k - nodes

In [None]:
# noise data creation example: remove edges -- strategy 2: remove from selected nodes

# **** DONT forget to change this *****
EXPERIMENT_FILENAME = './results_v3/Experiment_6_remove_bottomk_nodes.csv'
# **** DONT forget to change this *****

nodes_percent = [0.05, 0.1, 0.2]  # [0.05, 0.1, 0.2, 0.25, 0.4, 0.5]
k_nodes_list = nodes_percent_to_num_nodes(percents=nodes_percent, num_nodes=data.num_nodes)

data = data.to('cpu')
remove_edge_from_nodes_params = {'data':data, 'bidirectional':True, 'choose_type':'bottom_k'}

noise_info = {'noise_type':'edge_removal', 'strategy':'nodes', 'params':remove_edge_from_nodes_params, 
              'noise_levels':[0, 0.01, 0.05, 0.1, 0.15, 0.3, 0.45, 0.6, 0.9, 0.95, 0.99, 1 ], 
              'k_nodes_list':k_nodes_list,
              'noise_param_name':'noise_level'}

exp_rm_random_nodes_var_names = create_noised_data_for_experiment(data=data, noise_info=noise_info)
print(data)

# Experiment on edge removal - strategy- nodes
# Put names of the models we want to use for this experiment type
data = data.to('cuda:0')
model_names = ['GNN', 'GAT', 'Graphsage']
exp_rm_random_nodes_df = experiment(dataset_name=dataset_name, model_names=model_names,
                                    model_params=model_params, opt_params=opt_params,
                                    data=data, x_types=['x'], edge_types=exp_rm_random_nodes_var_names,
                                    noise_info=noise_info, num_epochs=25, repeat_num=3, print_updates=True)

# Do for node strategies only
exp_rm_random_nodes_df[['choose_type', 'bidirectional','k_nodes','noise_level']] = exp_rm_random_nodes_df.apply(lambda x: [s.split('-')[-1] for s in x['edge_type'].split('_') if '-' in s], axis=1, result_type='expand')
data = data.to('cpu')
exp_rm_random_nodes_df.to_csv(EXPERIMENT_FILENAME)
exp_rm_random_nodes_df

Data(x=[2708, 1433], edge_index=[2, 10556], y=[2708], train_mask=[2708], val_mask=[2708], test_mask=[2708], edge_index_added_n_0.15=[2, 12138], feature_noise__noiselvl-0=[2708, 1433], feature_noise__noiselvl-0.01=[2708, 1433], feature_noise__noiselvl-0.05=[2708, 1433], feature_noise__noiselvl-0.1=[2708, 1433], feature_noise__noiselvl-0.15=[2708, 1433], feature_noise__noiselvl-0.3=[2708, 1433], feature_noise__noiselvl-0.45=[2708, 1433], feature_noise__noiselvl-0.6=[2708, 1433], feature_noise__noiselvl-0.9=[2708, 1433], feature_noise__noiselvl-0.95=[2708, 1433], feature_noise__noiselvl-0.99=[2708, 1433], feature_noise__noiselvl-1=[2708, 1433], edge_removal_all_edges__noiselvl-0=[2, 10556], edge_removal_all_edges__noiselvl-0.01=[2, 10450], edge_removal_all_edges__noiselvl-0.05=[2, 10032], edge_removal_all_edges__noiselvl-0.1=[2, 9530], edge_removal_all_edges__noiselvl-0.15=[2, 9032], edge_removal_all_edges__noiselvl-0.3=[2, 7652], edge_removal_all_edges__noiselvl-0.45=[2, 6340], edge_remo

Unnamed: 0,dataset_name,model_name,x_type,edge_type,mean_test_accuracy,test_accuracies,num_epochs,model_params,noise_info,repeat_num,choose_type,bidirectional,k_nodes,noise_level
0,Cora,GNN,x,edge_removal_nodes_choose_type-bottom_k_bidire...,0.793,"0.785,0.806,0.789",25,"{'in_channels': 1433, 'hidden_channels': 30, '...","{'noise_type': 'edge_removal', 'strategy': 'no...",3,bottom,True,135,0
1,Cora,GNN,x,edge_removal_nodes_choose_type-bottom_k_bidire...,0.791,"0.785,0.789,0.798",25,"{'in_channels': 1433, 'hidden_channels': 30, '...","{'noise_type': 'edge_removal', 'strategy': 'no...",3,bottom,True,135,0.01
2,Cora,GNN,x,edge_removal_nodes_choose_type-bottom_k_bidire...,0.789,"0.784,0.79,0.794",25,"{'in_channels': 1433, 'hidden_channels': 30, '...","{'noise_type': 'edge_removal', 'strategy': 'no...",3,bottom,True,135,0.05
3,Cora,GNN,x,edge_removal_nodes_choose_type-bottom_k_bidire...,0.799,"0.799,0.796,0.803",25,"{'in_channels': 1433, 'hidden_channels': 30, '...","{'noise_type': 'edge_removal', 'strategy': 'no...",3,bottom,True,135,0.1
4,Cora,GNN,x,edge_removal_nodes_choose_type-bottom_k_bidire...,0.794,"0.804,0.791,0.787",25,"{'in_channels': 1433, 'hidden_channels': 30, '...","{'noise_type': 'edge_removal', 'strategy': 'no...",3,bottom,True,135,0.15
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
103,Cora,Graphsage,x,edge_removal_nodes_choose_type-bottom_k_bidire...,0.762,"0.765,0.765,0.757",25,"{'in_channels': 1433, 'hidden_channels': 50, '...","{'noise_type': 'edge_removal', 'strategy': 'no...",3,bottom,True,542,0.6
104,Cora,Graphsage,x,edge_removal_nodes_choose_type-bottom_k_bidire...,0.768,"0.774,0.768,0.763",25,"{'in_channels': 1433, 'hidden_channels': 50, '...","{'noise_type': 'edge_removal', 'strategy': 'no...",3,bottom,True,542,0.9
105,Cora,Graphsage,x,edge_removal_nodes_choose_type-bottom_k_bidire...,0.772,"0.767,0.775,0.773",25,"{'in_channels': 1433, 'hidden_channels': 50, '...","{'noise_type': 'edge_removal', 'strategy': 'no...",3,bottom,True,542,0.95
106,Cora,Graphsage,x,edge_removal_nodes_choose_type-bottom_k_bidire...,0.776,"0.779,0.777,0.773",25,"{'in_channels': 1433, 'hidden_channels': 50, '...","{'noise_type': 'edge_removal', 'strategy': 'no...",3,bottom,True,542,0.99


### Experiment 7 - Add Edges: random nodes

In [None]:
# noise data creation example: remove edges -- strategy 2: remove from selected nodes

# **** DONT forget to change this *****
EXPERIMENT_FILENAME = './results_v3/Experiment_7_add_random_nodes.csv'
# **** DONT forget to change this *****

nodes_percent = [0.05, 0.1, 0.2]  # [0.05, 0.1, 0.2, 0.25, 0.4, 0.5]
k_nodes_list = nodes_percent_to_num_nodes(percents=nodes_percent, num_nodes=data.num_nodes)

data = data.to('cpu')
add_edge_from_nodes_params = {'data':data, 'bidirectional':True, 'choose_type':'random'}

noise_info = {'noise_type':'edge_removal', 'strategy':'nodes', 'params':add_edge_from_nodes_params, 
              'noise_levels':[0, 0.01, 0.05, 0.1, 0.15, 0.3, 0.45, 0.6, 0.9, 0.95, 0.99, 1 ], 
              'k_nodes_list':k_nodes_list,
              'noise_param_name':'noise_level'}

exp_add_random_nodes_var_names = create_noised_data_for_experiment(data=data, noise_info=noise_info)
print(data)

# Experiment on edge removal - strategy- nodes
# Put names of the models we want to use for this experiment type
data = data.to('cuda:0')
model_names = ['GNN', 'GAT', 'Graphsage']
exp_add_random_nodes_df = experiment(dataset_name=dataset_name, model_names=model_names,
                                    model_params=model_params, opt_params=opt_params,
                                    data=data, x_types=['x'], edge_types=exp_add_random_nodes_var_names,
                                    noise_info=noise_info, num_epochs=25, repeat_num=3, print_updates=True)

# Do for node strategies only
exp_add_random_nodes_df[['choose_type', 'bidirectional','k_nodes','noise_level']] = exp_add_random_nodes_df.apply(lambda x: [s.split('-')[-1] for s in x['edge_type'].split('_') if '-' in s], axis=1, result_type='expand')
data = data.to('cpu')
exp_add_random_nodes_df.to_csv(EXPERIMENT_FILENAME)
exp_add_random_nodes_df

Data(x=[2708, 1433], edge_index=[2, 10556], y=[2708], train_mask=[2708], val_mask=[2708], test_mask=[2708], edge_index_added_n_0.15=[2, 12138], feature_noise__noiselvl-0=[2708, 1433], feature_noise__noiselvl-0.01=[2708, 1433], feature_noise__noiselvl-0.05=[2708, 1433], feature_noise__noiselvl-0.1=[2708, 1433], feature_noise__noiselvl-0.15=[2708, 1433], feature_noise__noiselvl-0.3=[2708, 1433], feature_noise__noiselvl-0.45=[2708, 1433], feature_noise__noiselvl-0.6=[2708, 1433], feature_noise__noiselvl-0.9=[2708, 1433], feature_noise__noiselvl-0.95=[2708, 1433], feature_noise__noiselvl-0.99=[2708, 1433], feature_noise__noiselvl-1=[2708, 1433], edge_removal_all_edges__noiselvl-0=[2, 10556], edge_removal_all_edges__noiselvl-0.01=[2, 10450], edge_removal_all_edges__noiselvl-0.05=[2, 10032], edge_removal_all_edges__noiselvl-0.1=[2, 9530], edge_removal_all_edges__noiselvl-0.15=[2, 9032], edge_removal_all_edges__noiselvl-0.3=[2, 7652], edge_removal_all_edges__noiselvl-0.45=[2, 6340], edge_remo

Unnamed: 0,dataset_name,model_name,x_type,edge_type,mean_test_accuracy,test_accuracies,num_epochs,model_params,noise_info,repeat_num,choose_type,bidirectional,k_nodes,noise_level
0,Cora,GNN,x,edge_removal_nodes_choose_type-random_bidirec-...,0.798,"0.804,0.811,0.78",25,"{'in_channels': 1433, 'hidden_channels': 30, '...","{'noise_type': 'edge_removal', 'strategy': 'no...",3,random,True,135,0
1,Cora,GNN,x,edge_removal_nodes_choose_type-random_bidirec-...,0.796,"0.8,0.799,0.788",25,"{'in_channels': 1433, 'hidden_channels': 30, '...","{'noise_type': 'edge_removal', 'strategy': 'no...",3,random,True,135,0.01
2,Cora,GNN,x,edge_removal_nodes_choose_type-random_bidirec-...,0.789,"0.793,0.769,0.804",25,"{'in_channels': 1433, 'hidden_channels': 30, '...","{'noise_type': 'edge_removal', 'strategy': 'no...",3,random,True,135,0.05
3,Cora,GNN,x,edge_removal_nodes_choose_type-random_bidirec-...,0.798,"0.798,0.808,0.789",25,"{'in_channels': 1433, 'hidden_channels': 30, '...","{'noise_type': 'edge_removal', 'strategy': 'no...",3,random,True,135,0.1
4,Cora,GNN,x,edge_removal_nodes_choose_type-random_bidirec-...,0.801,"0.8,0.805,0.797",25,"{'in_channels': 1433, 'hidden_channels': 30, '...","{'noise_type': 'edge_removal', 'strategy': 'no...",3,random,True,135,0.15
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
103,Cora,Graphsage,x,edge_removal_nodes_choose_type-random_bidirec-...,0.766,"0.766,0.763,0.77",25,"{'in_channels': 1433, 'hidden_channels': 50, '...","{'noise_type': 'edge_removal', 'strategy': 'no...",3,random,True,542,0.6
104,Cora,Graphsage,x,edge_removal_nodes_choose_type-random_bidirec-...,0.771,"0.767,0.778,0.768",25,"{'in_channels': 1433, 'hidden_channels': 50, '...","{'noise_type': 'edge_removal', 'strategy': 'no...",3,random,True,542,0.9
105,Cora,Graphsage,x,edge_removal_nodes_choose_type-random_bidirec-...,0.76,"0.744,0.763,0.773",25,"{'in_channels': 1433, 'hidden_channels': 50, '...","{'noise_type': 'edge_removal', 'strategy': 'no...",3,random,True,542,0.95
106,Cora,Graphsage,x,edge_removal_nodes_choose_type-random_bidirec-...,0.765,"0.764,0.769,0.761",25,"{'in_channels': 1433, 'hidden_channels': 50, '...","{'noise_type': 'edge_removal', 'strategy': 'no...",3,random,True,542,0.99


### Experiment 8 - Add Edges: topk nodes

In [None]:
# noise data creation example: remove edges -- strategy 2: remove from selected nodes

# **** DONT forget to change this *****
EXPERIMENT_FILENAME = './results_v3/Experiment_8_add_topk_nodes.csv'
# **** DONT forget to change this *****

nodes_percent = [0.05, 0.1, 0.2]  # [0.05, 0.1, 0.2, 0.25, 0.4, 0.5]
k_nodes_list = nodes_percent_to_num_nodes(percents=nodes_percent, num_nodes=data.num_nodes)

data = data.to('cpu')
add_edge_from_nodes_params = {'data':data, 'bidirectional':True, 'choose_type':'top_k'}

noise_info = {'noise_type':'edge_removal', 'strategy':'nodes', 'params':add_edge_from_nodes_params, 
              'noise_levels':[0, 0.01, 0.05, 0.1, 0.15, 0.3, 0.45, 0.6, 0.9, 0.95, 0.99, 1 ], 
              'k_nodes_list':k_nodes_list,
              'noise_param_name':'noise_level'}

exp_add_random_nodes_var_names = create_noised_data_for_experiment(data=data, noise_info=noise_info)
print(data)

# Experiment on edge removal - strategy- nodes
# Put names of the models we want to use for this experiment type
data = data.to('cuda:0')
model_names = ['GNN', 'GAT', 'Graphsage']
exp_add_random_nodes_df = experiment(dataset_name=dataset_name, model_names=model_names,
                                    model_params=model_params, opt_params=opt_params,
                                    data=data, x_types=['x'], edge_types=exp_add_random_nodes_var_names,
                                    noise_info=noise_info, num_epochs=25, repeat_num=3, print_updates=True)

# Do for node strategies only
exp_add_random_nodes_df[['choose_type', 'bidirectional','k_nodes','noise_level']] = exp_rm_random_nodes_df.apply(lambda x: [s.split('-')[-1] for s in x['edge_type'].split('_') if '-' in s], axis=1, result_type='expand')
data = data.to('cpu')
exp_add_random_nodes_df.to_csv(EXPERIMENT_FILENAME)
exp_add_random_nodes_df

Data(x=[2708, 1433], edge_index=[2, 10556], y=[2708], train_mask=[2708], val_mask=[2708], test_mask=[2708], edge_index_added_n_0.15=[2, 12138], feature_noise__noiselvl-0=[2708, 1433], feature_noise__noiselvl-0.01=[2708, 1433], feature_noise__noiselvl-0.05=[2708, 1433], feature_noise__noiselvl-0.1=[2708, 1433], feature_noise__noiselvl-0.15=[2708, 1433], feature_noise__noiselvl-0.3=[2708, 1433], feature_noise__noiselvl-0.45=[2708, 1433], feature_noise__noiselvl-0.6=[2708, 1433], feature_noise__noiselvl-0.9=[2708, 1433], feature_noise__noiselvl-0.95=[2708, 1433], feature_noise__noiselvl-0.99=[2708, 1433], feature_noise__noiselvl-1=[2708, 1433], edge_removal_all_edges__noiselvl-0=[2, 10556], edge_removal_all_edges__noiselvl-0.01=[2, 10450], edge_removal_all_edges__noiselvl-0.05=[2, 10032], edge_removal_all_edges__noiselvl-0.1=[2, 9530], edge_removal_all_edges__noiselvl-0.15=[2, 9032], edge_removal_all_edges__noiselvl-0.3=[2, 7652], edge_removal_all_edges__noiselvl-0.45=[2, 6340], edge_remo

Unnamed: 0,dataset_name,model_name,x_type,edge_type,mean_test_accuracy,test_accuracies,num_epochs,model_params,noise_info,repeat_num,choose_type,bidirectional,k_nodes,noise_level
0,Cora,GNN,x,edge_removal_nodes_choose_type-top_k_bidirec-T...,0.798,"0.799,0.796,0.8",25,"{'in_channels': 1433, 'hidden_channels': 30, '...","{'noise_type': 'edge_removal', 'strategy': 'no...",3,bottom,True,135,0
1,Cora,GNN,x,edge_removal_nodes_choose_type-top_k_bidirec-T...,0.792,"0.788,0.795,0.792",25,"{'in_channels': 1433, 'hidden_channels': 30, '...","{'noise_type': 'edge_removal', 'strategy': 'no...",3,bottom,True,135,0.01
2,Cora,GNN,x,edge_removal_nodes_choose_type-top_k_bidirec-T...,0.791,"0.797,0.784,0.792",25,"{'in_channels': 1433, 'hidden_channels': 30, '...","{'noise_type': 'edge_removal', 'strategy': 'no...",3,bottom,True,135,0.05
3,Cora,GNN,x,edge_removal_nodes_choose_type-top_k_bidirec-T...,0.794,"0.796,0.787,0.799",25,"{'in_channels': 1433, 'hidden_channels': 30, '...","{'noise_type': 'edge_removal', 'strategy': 'no...",3,bottom,True,135,0.1
4,Cora,GNN,x,edge_removal_nodes_choose_type-top_k_bidirec-T...,0.791,"0.789,0.797,0.786",25,"{'in_channels': 1433, 'hidden_channels': 30, '...","{'noise_type': 'edge_removal', 'strategy': 'no...",3,bottom,True,135,0.15
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
103,Cora,Graphsage,x,edge_removal_nodes_choose_type-top_k_bidirec-T...,0.767,"0.769,0.773,0.76",25,"{'in_channels': 1433, 'hidden_channels': 50, '...","{'noise_type': 'edge_removal', 'strategy': 'no...",3,bottom,True,542,0.6
104,Cora,Graphsage,x,edge_removal_nodes_choose_type-top_k_bidirec-T...,0.768,"0.774,0.765,0.766",25,"{'in_channels': 1433, 'hidden_channels': 50, '...","{'noise_type': 'edge_removal', 'strategy': 'no...",3,bottom,True,542,0.9
105,Cora,Graphsage,x,edge_removal_nodes_choose_type-top_k_bidirec-T...,0.769,"0.792,0.765,0.751",25,"{'in_channels': 1433, 'hidden_channels': 50, '...","{'noise_type': 'edge_removal', 'strategy': 'no...",3,bottom,True,542,0.95
106,Cora,Graphsage,x,edge_removal_nodes_choose_type-top_k_bidirec-T...,0.777,"0.78,0.766,0.785",25,"{'in_channels': 1433, 'hidden_channels': 50, '...","{'noise_type': 'edge_removal', 'strategy': 'no...",3,bottom,True,542,0.99


### Experiment 9 - Add Edges: bottomk nodes

In [None]:
# noise data creation example: remove edges -- strategy 2: remove from selected nodes

# **** DONT forget to change this *****
EXPERIMENT_FILENAME = './results_v3/Experiment_9_add_bottomk_nodes.csv'
# **** DONT forget to change this *****

nodes_percent = [0.05, 0.1, 0.2]  # [0.05, 0.1, 0.2, 0.25, 0.4, 0.5]
k_nodes_list = nodes_percent_to_num_nodes(percents=nodes_percent, num_nodes=data.num_nodes)

data = data.to('cpu')
add_edge_from_nodes_params = {'data':data, 'bidirectional':True, 'choose_type':'bottom_k'}

noise_info = {'noise_type':'edge_removal', 'strategy':'nodes', 'params':add_edge_from_nodes_params, 
              'noise_levels':[0, 0.01, 0.05, 0.1, 0.15, 0.3, 0.45, 0.6, 0.9, 0.95, 0.99, 1 ], 
              'k_nodes_list':k_nodes_list,
              'noise_param_name':'noise_level'}

exp_add_random_nodes_var_names = create_noised_data_for_experiment(data=data, noise_info=noise_info)
print(data)

# Experiment on edge removal - strategy- nodes
# Put names of the models we want to use for this experiment type
data = data.to('cuda:0')
model_names = ['GNN', 'GAT', 'Graphsage']
exp_add_random_nodes_df = experiment(dataset_name=dataset_name, model_names=model_names,
                                    model_params=model_params, opt_params=opt_params,
                                    data=data, x_types=['x'], edge_types=exp_add_random_nodes_var_names,
                                    noise_info=noise_info, num_epochs=25, repeat_num=3, print_updates=True)

# Do for node strategies only
exp_add_random_nodes_df[['choose_type', 'bidirectional','k_nodes','noise_level']] = exp_rm_random_nodes_df.apply(lambda x: [s.split('-')[-1] for s in x['edge_type'].split('_') if '-' in s], axis=1, result_type='expand')
data = data.to('cpu')
exp_add_random_nodes_df.to_csv(EXPERIMENT_FILENAME)
exp_add_random_nodes_df

Data(x=[2708, 1433], edge_index=[2, 10556], y=[2708], train_mask=[2708], val_mask=[2708], test_mask=[2708], edge_index_added_n_0.15=[2, 12138], feature_noise__noiselvl-0=[2708, 1433], feature_noise__noiselvl-0.01=[2708, 1433], feature_noise__noiselvl-0.05=[2708, 1433], feature_noise__noiselvl-0.1=[2708, 1433], feature_noise__noiselvl-0.15=[2708, 1433], feature_noise__noiselvl-0.3=[2708, 1433], feature_noise__noiselvl-0.45=[2708, 1433], feature_noise__noiselvl-0.6=[2708, 1433], feature_noise__noiselvl-0.9=[2708, 1433], feature_noise__noiselvl-0.95=[2708, 1433], feature_noise__noiselvl-0.99=[2708, 1433], feature_noise__noiselvl-1=[2708, 1433], edge_removal_all_edges__noiselvl-0=[2, 10556], edge_removal_all_edges__noiselvl-0.01=[2, 10450], edge_removal_all_edges__noiselvl-0.05=[2, 10032], edge_removal_all_edges__noiselvl-0.1=[2, 9530], edge_removal_all_edges__noiselvl-0.15=[2, 9032], edge_removal_all_edges__noiselvl-0.3=[2, 7652], edge_removal_all_edges__noiselvl-0.45=[2, 6340], edge_remo

Unnamed: 0,dataset_name,model_name,x_type,edge_type,mean_test_accuracy,test_accuracies,num_epochs,model_params,noise_info,repeat_num,choose_type,bidirectional,k_nodes,noise_level
0,Cora,GNN,x,edge_removal_nodes_choose_type-bottom_k_bidire...,0.79,"0.773,0.795,0.801",25,"{'in_channels': 1433, 'hidden_channels': 30, '...","{'noise_type': 'edge_removal', 'strategy': 'no...",3,bottom,True,135,0
1,Cora,GNN,x,edge_removal_nodes_choose_type-bottom_k_bidire...,0.802,"0.806,0.809,0.791",25,"{'in_channels': 1433, 'hidden_channels': 30, '...","{'noise_type': 'edge_removal', 'strategy': 'no...",3,bottom,True,135,0.01
2,Cora,GNN,x,edge_removal_nodes_choose_type-bottom_k_bidire...,0.797,"0.8,0.8,0.792",25,"{'in_channels': 1433, 'hidden_channels': 30, '...","{'noise_type': 'edge_removal', 'strategy': 'no...",3,bottom,True,135,0.05
3,Cora,GNN,x,edge_removal_nodes_choose_type-bottom_k_bidire...,0.806,"0.815,0.804,0.8",25,"{'in_channels': 1433, 'hidden_channels': 30, '...","{'noise_type': 'edge_removal', 'strategy': 'no...",3,bottom,True,135,0.1
4,Cora,GNN,x,edge_removal_nodes_choose_type-bottom_k_bidire...,0.796,"0.785,0.789,0.815",25,"{'in_channels': 1433, 'hidden_channels': 30, '...","{'noise_type': 'edge_removal', 'strategy': 'no...",3,bottom,True,135,0.15
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
103,Cora,Graphsage,x,edge_removal_nodes_choose_type-bottom_k_bidire...,0.757,"0.763,0.76,0.747",25,"{'in_channels': 1433, 'hidden_channels': 50, '...","{'noise_type': 'edge_removal', 'strategy': 'no...",3,bottom,True,542,0.6
104,Cora,Graphsage,x,edge_removal_nodes_choose_type-bottom_k_bidire...,0.766,"0.756,0.765,0.777",25,"{'in_channels': 1433, 'hidden_channels': 50, '...","{'noise_type': 'edge_removal', 'strategy': 'no...",3,bottom,True,542,0.9
105,Cora,Graphsage,x,edge_removal_nodes_choose_type-bottom_k_bidire...,0.776,"0.768,0.781,0.78",25,"{'in_channels': 1433, 'hidden_channels': 50, '...","{'noise_type': 'edge_removal', 'strategy': 'no...",3,bottom,True,542,0.95
106,Cora,Graphsage,x,edge_removal_nodes_choose_type-bottom_k_bidire...,0.766,"0.764,0.766,0.769",25,"{'in_channels': 1433, 'hidden_channels': 50, '...","{'noise_type': 'edge_removal', 'strategy': 'no...",3,bottom,True,542,0.99


In [None]:
import shutil
shutil.make_archive('results_v3', 'zip', 'results_v3')