<a href="https://colab.research.google.com/github/camligorkem/cs-260c-project/blob/main/CS_260_Node_Classification_Experiments_GC.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [270]:
from google.colab import drive

drive.mount('/content/drive')
%cd "/content/drive/My Drive/cs260c/"

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
/content/drive/My Drive/cs260c


In [1]:
# Install required packages.
!pip install -q torch-scatter -f https://data.pyg.org/whl/torch-1.10.0+cu113.html
!pip install -q torch-sparse -f https://data.pyg.org/whl/torch-1.10.0+cu113.html
!pip install -q git+https://github.com/pyg-team/pytorch_geometric.git

# Helper function for visualization.
%matplotlib inline
import matplotlib.pyplot as plt
from sklearn.manifold import TSNE

def visualize(h, color):
    z = TSNE(n_components=2).fit_transform(h.detach().cpu().numpy())

    plt.figure(figsize=(10,10))
    plt.xticks([])
    plt.yticks([])

    plt.scatter(z[:, 0], z[:, 1], s=70, c=color, cmap="Set2")
    plt.show()

[K     |████████████████████████████████| 7.9 MB 5.4 MB/s 
[K     |████████████████████████████████| 3.5 MB 4.5 MB/s 
[?25h  Building wheel for torch-geometric (setup.py) ... [?25l[?25hdone


In [2]:
import torch 
import numpy as np
import math


from torch_geometric.utils import degree
import torch_geometric
import torch_geometric.utils as tg_utils
import pandas as pd

In [3]:
!rm -r data

rm: cannot remove 'data': No such file or directory


In [4]:
from torch_geometric.datasets import Planetoid
from torch_geometric.transforms import NormalizeFeatures

dataset = Planetoid(root='data/Planetoid', name='Cora', transform=NormalizeFeatures())

print()
print(f'Dataset: {dataset}:')
print('======================')
print(f'Number of graphs: {len(dataset)}')
print(f'Number of features: {dataset.num_features}')
print(f'Number of classes: {dataset.num_classes}')

data = dataset[0]  # Get the first graph object.

print()
print(data)
print('===========================================================================================================')

# Gather some statistics about the graph.
print(f'Number of nodes: {data.num_nodes}')
print(f'Number of edges: {data.num_edges}')
print(f'Average node degree: {data.num_edges / data.num_nodes:.2f}')
print(f'Number of training nodes: {data.train_mask.sum()}')
print(f'Training node label rate: {int(data.train_mask.sum()) / data.num_nodes:.2f}')
print(f'Has isolated nodes: {data.has_isolated_nodes()}')
print(f'Has self-loops: {data.has_self_loops()}')
print(f'Is undirected: {data.is_undirected()}')

Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.x
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.tx
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.allx
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.y
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.ty
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.ally
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.graph
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.test.index
Processing...



Dataset: Cora():
Number of graphs: 1
Number of features: 1433
Number of classes: 7

Data(x=[2708, 1433], edge_index=[2, 10556], y=[2708], train_mask=[2708], val_mask=[2708], test_mask=[2708])
Number of nodes: 2708
Number of edges: 10556
Average node degree: 3.90
Number of training nodes: 140
Training node label rate: 0.05
Has isolated nodes: False
Has self-loops: False
Is undirected: True


Done!


In [5]:
def get_masked_noise(data,  noise_level=0.15):
  x = data.x
  noise_added_node_num = int(noise_level * x.shape[0])
  chose_random_rows = np.random.choice(x.shape[0], noise_added_node_num, replace=False)
  #print(chose_random_rows)
  mask_rows = torch.zeros(x.shape)
  mask_rows[chose_random_rows,:] = torch.ones(1, x.shape[1])
  noise = (0.1**0.5)*torch.randn(x.shape)
  masked_noise = noise* mask_rows.int().float()

  #print(mask_rows)
  #print(noise)
  #print(masked_noise)
  return x + masked_noise

In [6]:
# remove x% edges
def remove_from_all_edges(data, noise_level = 0.15, bidirectional=False):
  edge_index = data.edge_index

  if bidirectional:
    noise_level /= 2
  
  edge_ratio_to_keep = 1 - noise_level
  num_edges_keep= int(edge_ratio_to_keep * edge_index.shape[1])
  chose_random_edge_indices = np.random.choice(edge_index.shape[1], num_edges_keep, replace=False)

  #print(edge_index[0][chose_random_edge_indices].shape)
  #print(num_edges_keep)
  #print(edge_index.shape[1])

  edge_index_removed = torch.zeros((2,num_edges_keep), dtype=torch.int64)
  edge_index_removed[0] = edge_index[0][chose_random_edge_indices]
  edge_index_removed[1] = edge_index[1][chose_random_edge_indices]

  if bidirectional:
    # find the node names deleted in below indices and delete also for the opposite side.
    final_edges_bidirec_0 = []
    final_edges_bidirec_1 = []

    # create a set with all edges
    edge_maps = set()
    for e_0, e_1 in zip(edge_index_removed[0], edge_index_removed[1]):
      edge_maps.add((e_0.item(),e_1.item()))

    for e_0, e_1 in zip(edge_index_removed[0], edge_index_removed[1]):
      e_0_val = e_0.item()
      e_1_val = e_1.item()
      # check an edge has its other direction, if yes add to the final list, if not skip
      if (e_0_val, e_1_val) in edge_maps and (e_1_val, e_0_val) in edge_maps:
        final_edges_bidirec_0.append(e_0_val) 
        final_edges_bidirec_1.append(e_1_val)

    final_edges_bidirec_0 = torch.tensor(final_edges_bidirec_0)
    final_edges_bidirec_1 = torch.tensor(final_edges_bidirec_1)
    edge_index_removed = torch.zeros((2,len(final_edges_bidirec_1)), dtype=torch.int64)
    edge_index_removed[0] = final_edges_bidirec_0
    edge_index_removed[1] = final_edges_bidirec_1

  return edge_index_removed



In [7]:
# remove x% edges from random k, top_k, bottom_k nodes

def choose_nodes(data, num_nodes, k_nodes, choose_type):
  if choose_type=='random':
    nodes_chosen = torch.from_numpy(np.random.choice(num_nodes, k_nodes, replace=False))
  elif choose_type=='top_k':
    # find indegree edges
    dg = torch_geometric.utils.degree(data.edge_index[0])
    top_k_nodes_degrees, top_k_nodes_indices = torch.topk(dg, k_nodes)
    #print(top_k_nodes_degrees, top_k_nodes_indices)
    nodes_chosen = top_k_nodes_indices
  elif choose_type=='bottom_k':
    # find indegree edges
    dg = torch_geometric.utils.degree(data.edge_index[0])
    bottom_k_nodes_degrees, bottom_k_nodes_indices = torch.topk(dg, k_nodes, largest=False)
    #print(bottom_k_nodes_degrees, bottom_k_nodes_indices)
    nodes_chosen = bottom_k_nodes_indices
  else:
    raise 'choose_type should be from random, top_k, bottom_k'
  return nodes_chosen

# to do loop for each node separately
def remove_edges_from_chosen_nodes(data, nodes_chosen, edges_to_remove_per_node_ratio):
  edges_0_list, edges_1_list = [],[]
  for nc in nodes_chosen:
    edge_0, edge_1 = remove_edge_per_node(data=data, node=nc, 
                                          edges_to_remove_per_node_ratio=edges_to_remove_per_node_ratio)
    edges_0_list.append(edge_0)
    edges_1_list.append(edge_1)

  edges_0 = torch.cat(edges_0_list, 0)
  edges_1 = torch.cat(edges_1_list, 0)

  return edges_0, edges_1

def remove_edge_per_node(data, node, edges_to_remove_per_node_ratio=0.1):
  mask_node_indices = torch.isin(data.edge_index[0], node)

  select_node_edges_0 = data.edge_index[0][mask_node_indices]
  select_node_edges_1 = data.edge_index[1][mask_node_indices]
  #print(select_node_edges_0)
  #print(select_node_edges_1)

  # choose how much of the edges we will remove for this node
  # we decide on number of edges to remove for each node based on the number of edges each node has
  # and by taking the ratio given by edges_to_remove_per_node_ratio
  # note: we use ceil to remove at least one node (unless ratio is 0)
  num_edges_remove = int(math.ceil(edges_to_remove_per_node_ratio* select_node_edges_0.shape[0]))
  # print(num_edges_remove)
  num_edges_keep = select_node_edges_0.shape[0] - num_edges_remove

  # choose random edges to keep, the rest is removed
  chose_random_edge_indices = np.random.choice(select_node_edges_0.shape[0], num_edges_keep, replace=False)
  # print(num_edges_keep)
  
  edge_index_removed = torch.zeros((2,num_edges_keep), dtype=torch.int64)
  edge_node_index_removed_0 = select_node_edges_0[chose_random_edge_indices]
  edge_node_index_removed_1 = select_node_edges_1[chose_random_edge_indices]

  return edge_node_index_removed_0, edge_node_index_removed_1

def remove_edges_from_nodes(data, noise_level = 0.15, k_nodes=10,
                            choose_type='random', bidirectional=False):
  if bidirectional:
    noise_level /= 2

  edge_p_node_ratio_to_keep = 1 - noise_level

  # choose topk, bottomk, or random
  nodes_chosen = choose_nodes(data=data, num_nodes=data.num_nodes, k_nodes=k_nodes, choose_type=choose_type)

  # keep edges from remaining nodes
  mask_node_indices = torch.isin(data.edge_index[0],nodes_chosen)
  index_keep = torch.ones(data.edge_index[0].shape[0], dtype=bool)
  index_keep[mask_node_indices] = False
  edges_to_keep_0 = data.edge_index[0][index_keep]
  edges_to_keep_1 = data.edge_index[1][index_keep]
  #print(edges_to_keep_0) 
  #print(edges_to_keep_1)

  # remove one-directional or bi-directional
  edges_0_kept_chosen_nodes, edges_1_kept_chosen_nodes = remove_edges_from_chosen_nodes(data=data, 
                                                                                        nodes_chosen=nodes_chosen,
                                                                                        edges_to_remove_per_node_ratio=noise_level)
  # concat edges to keep and edges_kept_chosen_nodes
  final_edges_0 = torch.cat([edges_to_keep_0, edges_0_kept_chosen_nodes], 0)
  final_edges_1 = torch.cat([edges_to_keep_1, edges_1_kept_chosen_nodes], 0)

  # do bidirectional here! IF bidirectional set to true remove both directions of the edges.
  if bidirectional:
    # find the node names deleted in below indices and delete also for the opposite side.
    final_edges_bidirec_0 = []
    final_edges_bidirec_1 = []

    # create a set with all edges
    edge_maps = set()
    for e_0, e_1 in zip(final_edges_0, final_edges_1):
      edge_maps.add((e_0.item(),e_1.item()))

    for e_0, e_1 in zip(final_edges_0, final_edges_1):
      e_0_val = e_0.item()
      e_1_val = e_1.item()
      # check an edge has its other direction, if yes add to the final list, if not skip
      if (e_0_val, e_1_val) in edge_maps and (e_1_val, e_0_val) in edge_maps:
        final_edges_bidirec_0.append(e_0_val) 
        final_edges_bidirec_1.append(e_1_val)

    final_edges_bidirec_0 = torch.tensor(final_edges_bidirec_0)
    final_edges_bidirec_1 = torch.tensor(final_edges_bidirec_1)
    final_edges_0 = final_edges_bidirec_0
    final_edges_1 = final_edges_bidirec_1
  edge_index_removed = torch.zeros((2, final_edges_0.shape[0]), dtype=torch.int64)
  edge_index_removed[0] = final_edges_0
  edge_index_removed[1] = final_edges_1

    
  # use TORCH_GEOMETRIC.UTILS.SORT_EDGE_INDEX
  edge_index_removed_sorted = tg_utils.sort_edge_index(edge_index_removed)
  return edge_index_removed_sorted


In [8]:
def add_random_edges(data, noise_level = 0.15, bidirectional=False):
  edge_index = data.edge_index
  num_nodes = data.num_nodes

  if bidirectional:
    noise_level /= 2

  new_edges = edge_index.T
  edge_num = new_edges.shape[0]
  num_of_new_edges = int(edge_num * noise_level)

  for i in range(num_of_new_edges):
    while True:
      new_edge = ((torch.rand(1,2) * 1000000).to(int) % num_nodes)
      new_edge_flip = torch.flip(new_edge, [1])
      new_edge_exist = torch.any(torch.all(torch.eq(new_edges,new_edge),1))
      new_edge_flip_exist = torch.any(torch.all(torch.eq(new_edges,new_edge_flip),1))
      if not new_edge_exist and not new_edge_flip_exist:
        new_edges = torch.cat((new_edges,new_edge), 0)
        if bidirectional:
          new_edges = torch.cat((new_edges,new_edge_flip), 0)
        break
      elif not new_edge_exist:
        new_edges = torch.cat((new_edges,new_edge), 0)
        break
      elif not new_edge_flip_exist:
        new_edges = torch.cat((new_edges,new_edge_flip), 0)
        break

  edge_index_sorted = tg_utils.sort_edge_index(new_edges.T)
  return edge_index_sorted

In [9]:
for added_ratio in [0.15]:
  new_edges = add_random_edges(data,  noise_level=added_ratio, bidirectional=True)
  data[f'edge_index_added_n_{added_ratio}'] = new_edges


In [10]:
## Helper function of add_edges_to_nodes

def add_edges_to_a_node(data, new_edges, node, num_to_add=0, bidirectional=False):
  for i in range(num_to_add):

    # Running till we find the right edge to add
    while True:
      #create a random node. 1000000 is a arbitrary number which can be replaced to any number bigger than data.num_nodes
      new_index = ((torch.rand(1) * 1000000).to(int) % data.num_nodes).item()
      new_edge = torch.tensor([[node, new_index]])
      new_edge_flip = torch.flip(new_edge, [1])
      #check whether the new edge and flip one exists or not 
      new_edge_exist = torch.any(torch.all(torch.eq(new_edges,new_edge),1))
      new_edge_flip_exist = torch.any(torch.all(torch.eq(new_edges,new_edge_flip),1))
      if not new_edge_exist and not new_edge_flip_exist:
        new_edges = torch.cat((new_edges,new_edge), 0)
        if bidirectional:
          new_edges = torch.cat((new_edges,new_edge_flip), 0)
        break
      elif not new_edge_exist:
        new_edges = torch.cat((new_edges,new_edge), 0)
        break
      elif not new_edge_flip_exist:
        new_edges = torch.cat((new_edges,new_edge_flip), 0)
        break
  return new_edges

## Adding x% edges to random k, top k, or bottom k nodes

def add_edges_to_nodes(data, noise_level=0.15, k_nodes=10, chosse_type='random', bidirectional=False):
  if bidirectional:
    noise_level /= 2

  #choose nodes by three different types
  nodes_chosen = choose_nodes(data=data, num_nodes=data.num_nodes, k_nodes=k_nodes, choose_type=chosse_type)
  
  new_edges = data.edge_index.T

  #add new edges to every chosen node
  for node in nodes_chosen:
    edge_num_of_node = torch.isin(data.edge_index, node).to(int).sum()
    edge_num_to_add = int(edge_num_of_node * noise_level)
    
    new_edges = add_edges_to_a_node(data, new_edges, node.item(), edge_num_to_add, bidirectional)
  
  edge_index_sorted = tg_utils.sort_edge_index(new_edges.T)
  return edge_index_sorted

## Experiments

In [11]:
!pip install class-resolver

from torch_geometric.nn import MLP, GCN, GraphSAGE, GAT
from class_resolver import ClassResolver

Collecting class-resolver
  Downloading class_resolver-0.3.4-py3-none-any.whl (20 kB)
Installing collected packages: class-resolver
Successfully installed class-resolver-0.3.4


In [12]:
def get_model(model_name, model_params):
  # add more model from here if needed: https://pytorch-geometric.readthedocs.io/en/latest/modules/nn.html#models
  # you can also check model parameters from above
  model = None
  device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
  if model_name=='MLP':
    model = MLP(**model_params).to(device)
  elif model_name=='GNN':
    model = GCN(**model_params).to(device)
  elif model_name=='GAT':
    model = GAT(**model_params).to(device)
  elif model_name=='Graphsage':
    model = GraphSAGE(**model_params).to(device)
  else:
    raise 'Model names should be within MLP, GNN, GAT, Graphsage'
  return model

In [13]:
# Creating MLP example, add all parameters you want to use to create/tune model in the below dictionary
model_params= {'in_channels':1433, 'hidden_channels':16, 'out_channels':7, 'num_layers':3}
mlp_model = get_model(model_name='MLP', model_params=model_params)
print(mlp_model)

model_params= {'in_channels':1433, 'hidden_channels':16, 'out_channels':7, 'num_layers':3, 'dropout':0.1}
gnn_model = get_model(model_name='GNN', model_params=model_params)
print(gnn_model)


MLP(1433, 16, 16, 7)
GCN(1433, 7, num_layers=3)


In [25]:
def get_noise_function(noise_info):
  '''
  This function directly returns the function we created for different noise logics
  Change the name of the function below if noise logic function name is changed.
  '''
  noise_fn = None 
  if noise_info['noise_type']=='feature_noise':
    noise_fn = get_masked_noise
  elif noise_info['noise_type']=='edge_removal':
    if noise_info['strategy']=='all_edges' :
      noise_fn = remove_from_all_edges
    elif noise_info['strategy']=='nodes' :
      noise_fn = remove_edges_from_nodes
  elif noise_info['noise_type']=='edge_addition':
    if noise_info['strategy']=='all_edges' :
      noise_fn = add_random_edges
    elif noise_info['strategy']=='nodes' :
      noise_fn = add_edges_to_nodes
  else:
    raise 'Noise type should be chosen from feature_noise, edge_removal, edge_addition' 
  return noise_fn
  

def create_noised_data_for_experiment(data, noise_info):
  '''
  Create noised data outside of experiment, so that you can use same data in different experiments
  '''
  noise_data_names = []
  noise_fn = get_noise_function(noise_info)
  
  if noise_info.get('strategy')=='nodes':
    add_prefix = f"{noise_info.get('strategy', '')}_choose_type-{noise_info['params'].get('choose_type', '')}_bidirec-{noise_info['params'].get('bidirectional', '')}"
  elif noise_info.get('strategy')=='all_edges':
    add_prefix = f"{noise_info.get('strategy', '')}_"
  else: 
    add_prefix=''

  if noise_info.get('strategy')=='nodes':
    for k_nodes in noise_info['k_nodes_list']:
      for noise_level in noise_info['noise_levels']:
        noise_data_name = f'{noise_info["noise_type"]}_{add_prefix}_knodes-{k_nodes}_noiselvl-{noise_level}'
        noise_info['params']['noise_level'] = noise_level
        noise_info['params']['k_nodes'] = k_nodes
        noised_data = noise_fn(**noise_info['params'])
        data[noise_data_name] = noised_data
        noise_data_names.append(noise_data_name)

  else: # all_edges and gaussian noisee case
    for noise_level in noise_info['noise_levels']:
      noise_data_name = f'{noise_info["noise_type"]}_{add_prefix}_noiselvl-{noise_level}'
      noise_info['params']['noise_level'] = noise_level
      noised_data = noise_fn(**noise_info['params'])
      data[noise_data_name] = noised_data
      noise_data_names.append(noise_data_name)
  return noise_data_names
 
# reuse train and test
def train(model, optimizer, model_name, x_type='x', edge_type='edge_index'):
  criterion = torch.nn.CrossEntropyLoss()
  model.train()
  optimizer.zero_grad()  # Clear gradients.
  if model_name == 'MLP':
    out = model(data[x_type])
  else:
    out = model(data[x_type], data[edge_type])  # Perform a single forward pass.
  loss = criterion(out[data.train_mask], data.y[data.train_mask])  # Compute the loss solely based on the training nodes.
  loss.backward()  # Derive gradients.
  optimizer.step()  # Update parameters based on gradients.
  return loss

def test(model, model_name, x_type='x', edge_type='edge_index'):
  model.eval()
  if model_name == 'MLP':
    out = model(data[x_type])
  else:
    out = model(data[x_type], data[edge_type])  # Perform a single forward pass.

  pred = out.argmax(dim=1)  # Use the class with highest probability.
  test_correct = pred[data.test_mask] == data.y[data.test_mask]  # Check against ground-truth labels.
  test_acc = int(test_correct.sum()) / int(data.test_mask.sum())  # Derive ratio of correct predictions.
  return test_acc

def validation(model, model_name, x_type='x', edge_type='edge_index'):
  model.eval()
  if model_name == 'MLP':
    out = model(data[x_type])
  else:
    out = model(data[x_type], data[edge_type])  # Perform a single forward pass.
  pred = out.argmax(dim=1)  # Use the class with highest probability.
  val_correct = pred[data.val_mask] == data.y[data.val_mask]  # Check against ground-truth labels.
  val_acc = int(val_correct.sum()) / int(data.val_mask.sum())  # Derive ratio of correct predictions.
  return val_acc

def experiment(dataset_name, model_names, model_params, data, x_types, edge_types, noise_info, opt_params,  num_epochs=50, mlp_epochs=50, repeat_num=1, print_updates=False,print_updates_detail=False):
  '''
  Assumes the noisy data is already created and inside the data object (so that we can use same data sample for different models to compare)
  '''
  exp_count = len(x_types)*len(edge_types)*len(model_names)
  count=0
  res = []
  
  for model_name in model_names:
    for x_type in x_types:
      for ed_type in edge_types:
        count+=1
        test_accs = []
        for exp_num in range(1, repeat_num+1): # we will repeat experiment repeat many times, to increase results reliability
           
          model =  get_model(model_name=model_name, model_params=model_params[model_name])
          optimizer = torch.optim.Adam(model.parameters(), lr=opt_params[model_name]['lr'], weight_decay=opt_params[model_name]['weight_decay']) 
          epochs = mlp_epochs if model_name=='MLP' else num_epochs

          for epoch in range(num_epochs):
              loss = train(model, optimizer, model_name=model_name, x_type=x_type, edge_type=ed_type)
              # print(f'Epoch: {epoch:03d}, Loss: {loss:.4f}')

          # we test our results in original data, no noise added ones 
          # if you want to test on noised data change them to x_type=x_type and edge_type=ed_type below
          test_acc = test(model, model_name=model_name, x_type='x',edge_type='edge_index') 
          test_accs.append(test_acc)
          if print_updates_detail:
            print(f'Exp_num:{exp_num} Test Accuracy: {test_acc:.4f}')

        mean_test_acc = round(np.mean(test_accs), 3)
        # TODO we can add other metrics such as runtime to log in here later
        exp_res = {'dataset_name': dataset_name,
                  'model_name':model_name, 'x_type':x_type, 
                'edge_type':ed_type, 'mean_test_accuracy':str(mean_test_acc), 'test_accuracies': ','.join([str(acc) for acc in test_accs]),
                  'num_epochs':epochs, 'model_params': model_params[model_name],
                  'noise_info':noise_info, 'repeat_num':repeat_num} # TODO add more info on experiment noise_info
        res.append(exp_res)
        if print_updates:
            print(f'Run {model_name}: {count}/{exp_count}: {x_type} - {ed_type} Avg. Test Accuracy: {mean_test_acc}')
        
        

  res_df = pd.DataFrame(res, columns=exp_res.keys())
  return res_df

In [15]:
dataset_name='Cora'

In [16]:
def nodes_percent_to_num_nodes(percents, num_nodes):
  return [round(pct*num_nodes) for pct in percents]

In [188]:
# Optimized Model Params

model_params = {
    'GNN':{'in_channels':1433, 'hidden_channels':30, 'out_channels':7, 'num_layers':2, 'dropout':0.2, 'aggr' :'add'},
    'GAT':{'in_channels':1433, 'hidden_channels':15, 'out_channels':7, 'num_layers':2, 'dropout':0.2},
    'Graphsage':{'in_channels':1433, 'hidden_channels': 42, 'out_channels':7, 'num_layers':2, 'dropout':0.2},
    'MLP': {'in_channels':1433, 'hidden_channels':100,  'out_channels':7, 'num_layers':2, 'dropout': 0.2 },
}
opt_params = {
    'GNN':{'lr':0.05, 'weight_decay':1e-6}, 
    'GAT':{'lr':0.05, 'weight_decay':5e-4}, 
    'Graphsage':{'lr':0.05, 'weight_decay':5e-4}, 
    'MLP':{'lr':0.05, 'weight_decay':0.001}, 
}


### Gaussian Experiment add MLP part

In [193]:
# noise data creation example: remove edges -- strategy 2: remove from selected nodes

# **** DONT forget to change this *****
EXPERIMENT_FILENAME = 'Experiment_MLP_gaussian.csv'
# **** DONT forget to change this *****

data = data.to('cpu')
# noise data creation example: add noise feature
feature_noise_params = {'data':data}
noise_info = {'noise_type':'feature_noise', 'params':feature_noise_params, 
              'noise_levels':[0, 0.01, 0.05, 0.1, 0.15, 0.3, 0.45, 0.6, 0.9, 0.95, 0.99, 1 ],
              'noise_param_name':'noise_level'}

noise_var_names_feat_exp = create_noised_data_for_experiment(data=data, noise_info=noise_info)
#print(data)

# Experiment on edge removal - strategy- nodes
# Put names of the models we want to use for this experiment type
data = data.to('cuda:0')
model_names = ['MLP']
exp_gaussian_feats_df = experiment(dataset_name=dataset_name, model_names=model_names,
                                    model_params=model_params, opt_params=opt_params,
                                    data=data, x_types=noise_var_names_feat_exp, edge_types=['edge_index'],
                                    noise_info=noise_info, num_epochs=50, repeat_num=10, print_updates=True)

# Do for node strategies only
data = data.to('cpu')
exp_gaussian_feats_df.to_csv(EXPERIMENT_FILENAME)
exp_gaussian_feats_df




Run MLP: 1/12: feature_noise__noiselvl-0 - edge_index Avg. Test Accuracy: 0.556
Run MLP: 2/12: feature_noise__noiselvl-0.01 - edge_index Avg. Test Accuracy: 0.527
Run MLP: 3/12: feature_noise__noiselvl-0.05 - edge_index Avg. Test Accuracy: 0.535
Run MLP: 4/12: feature_noise__noiselvl-0.1 - edge_index Avg. Test Accuracy: 0.521
Run MLP: 5/12: feature_noise__noiselvl-0.15 - edge_index Avg. Test Accuracy: 0.525
Run MLP: 6/12: feature_noise__noiselvl-0.3 - edge_index Avg. Test Accuracy: 0.474
Run MLP: 7/12: feature_noise__noiselvl-0.45 - edge_index Avg. Test Accuracy: 0.456
Run MLP: 8/12: feature_noise__noiselvl-0.6 - edge_index Avg. Test Accuracy: 0.462
Run MLP: 9/12: feature_noise__noiselvl-0.9 - edge_index Avg. Test Accuracy: 0.144
Run MLP: 10/12: feature_noise__noiselvl-0.95 - edge_index Avg. Test Accuracy: 0.1
Run MLP: 11/12: feature_noise__noiselvl-0.99 - edge_index Avg. Test Accuracy: 0.132
Run MLP: 12/12: feature_noise__noiselvl-1 - edge_index Avg. Test Accuracy: 0.131


Unnamed: 0,dataset_name,model_name,x_type,edge_type,mean_test_accuracy,test_accuracies,num_epochs,model_params,noise_info,repeat_num
0,Cora,MLP,feature_noise__noiselvl-0,edge_index,0.556,"0.571,0.579,0.559,0.559,0.522,0.552,0.58,0.548...",50,"{'in_channels': 1433, 'hidden_channels': 100, ...","{'noise_type': 'feature_noise', 'params': {'da...",10
1,Cora,MLP,feature_noise__noiselvl-0.01,edge_index,0.527,"0.532,0.534,0.541,0.512,0.524,0.528,0.51,0.506...",50,"{'in_channels': 1433, 'hidden_channels': 100, ...","{'noise_type': 'feature_noise', 'params': {'da...",10
2,Cora,MLP,feature_noise__noiselvl-0.05,edge_index,0.535,"0.54,0.555,0.513,0.548,0.541,0.525,0.525,0.535...",50,"{'in_channels': 1433, 'hidden_channels': 100, ...","{'noise_type': 'feature_noise', 'params': {'da...",10
3,Cora,MLP,feature_noise__noiselvl-0.1,edge_index,0.521,"0.483,0.532,0.502,0.533,0.512,0.552,0.547,0.54...",50,"{'in_channels': 1433, 'hidden_channels': 100, ...","{'noise_type': 'feature_noise', 'params': {'da...",10
4,Cora,MLP,feature_noise__noiselvl-0.15,edge_index,0.525,"0.534,0.538,0.531,0.522,0.508,0.497,0.532,0.53...",50,"{'in_channels': 1433, 'hidden_channels': 100, ...","{'noise_type': 'feature_noise', 'params': {'da...",10
5,Cora,MLP,feature_noise__noiselvl-0.3,edge_index,0.474,"0.443,0.481,0.458,0.5,0.472,0.479,0.465,0.476,...",50,"{'in_channels': 1433, 'hidden_channels': 100, ...","{'noise_type': 'feature_noise', 'params': {'da...",10
6,Cora,MLP,feature_noise__noiselvl-0.45,edge_index,0.456,"0.402,0.505,0.438,0.474,0.459,0.463,0.506,0.41...",50,"{'in_channels': 1433, 'hidden_channels': 100, ...","{'noise_type': 'feature_noise', 'params': {'da...",10
7,Cora,MLP,feature_noise__noiselvl-0.6,edge_index,0.462,"0.489,0.464,0.451,0.484,0.453,0.454,0.467,0.46...",50,"{'in_channels': 1433, 'hidden_channels': 100, ...","{'noise_type': 'feature_noise', 'params': {'da...",10
8,Cora,MLP,feature_noise__noiselvl-0.9,edge_index,0.144,"0.144,0.146,0.144,0.144,0.144,0.144,0.144,0.14...",50,"{'in_channels': 1433, 'hidden_channels': 100, ...","{'noise_type': 'feature_noise', 'params': {'da...",10
9,Cora,MLP,feature_noise__noiselvl-0.95,edge_index,0.1,"0.093,0.103,0.093,0.093,0.119,0.093,0.126,0.09...",50,"{'in_channels': 1433, 'hidden_channels': 100, ...","{'noise_type': 'feature_noise', 'params': {'da...",10


In [192]:
exp_gaussian_feats_df['test_accuracies'][0]

'0.528,0.557,0.566,0.57,0.553,0.585,0.497,0.541,0.578,0.583'

In [196]:
gaus_prev_exp = pd.read_csv('Experiment_1_add_gaussian_noise.csv', index_col=0)
gaus_prev_exp = pd.concat([gaus_prev_exp,exp_gaussian_feats_df] )

gaus_prev_exp.to_csv('Experiment_1_add_gaussian_noise.csv')

## Experiments

### Experiment 1 - Add gaussian noise

In [272]:
folder_name = 'exp_results/results_v5'
bidirectional_type = False
post_fix= f'_bidirectional_{bidirectional_type}'
num_repeat = 5

In [273]:
# noise data creation example: remove edges -- strategy 2: remove from selected nodes

# **** DONT forget to change this *****
EXPERIMENT_FILENAME = f'./{folder_name}/Experiment_1_add_gaussian_noise{post_fix}.csv'
# **** DONT forget to change this *****

data = data.to('cpu')

feature_noise_params = {'data':data}
noise_info = {'noise_type':'feature_noise', 'params':feature_noise_params, \
              'noise_levels':[0, 0.01, 0.05, 0.1, 0.15, 0.3, 0.45, 0.6, 0.9, 0.95, 0.99, 1 ],\
              'noise_param_name':'noise_level'}

noise_var_names_feat_exp = create_noised_data_for_experiment(data=data, noise_info=noise_info)
print(data)

# Experiment on edge removal - strategy- nodes
# Put names of the models we want to use for this experiment type
data = data.to('cuda:0')
model_names = ['GNN', 'GAT', 'Graphsage', 'MLP']
exp_rm_random_nodes_df = experiment(dataset_name=dataset_name, model_names=model_names,
                                    model_params=model_params, opt_params=opt_params,
                                    data=data, x_types=noise_var_names_feat_exp, edge_types=['edge_index'],
                                    noise_info=noise_info, num_epochs=25, repeat_num=num_repeat, print_updates=True)

# Do for node strategies only
# exp_rm_random_nodes_df[['choose_type', 'bidirectional','k_nodes','noise_level']] = exp_rm_random_nodes_df.apply(lambda x: [s.split('-')[-1] for s in x['edge_type'].split('_') if '-' in s], axis=1, result_type='expand')
data = data.to('cpu')
exp_rm_random_nodes_df.to_csv(EXPERIMENT_FILENAME)
exp_rm_random_nodes_df

Data(x=[2708, 1433], edge_index=[2, 10556], y=[2708], train_mask=[2708], val_mask=[2708], test_mask=[2708], edge_index_added_n_0.15=[2, 12138], edge_removal_nodes_choose_type-random_bidirec-True_knodes-135_noiselvl-0=[2, 10556], edge_removal_nodes_choose_type-random_bidirec-True_knodes-135_noiselvl-0.01=[2, 10286], edge_removal_nodes_choose_type-random_bidirec-True_knodes-271_noiselvl-0=[2, 10556], edge_removal_nodes_choose_type-random_bidirec-True_knodes-271_noiselvl-0.01=[2, 10024], edge_removal_nodes_choose_type-random_bidirec-True_knodes-542_noiselvl-0=[2, 10556], edge_removal_nodes_choose_type-random_bidirec-True_knodes-542_noiselvl-0.01=[2, 9496], feature_noise__noiselvl-0=[2708, 1433], feature_noise__noiselvl-0.01=[2708, 1433], feature_noise__noiselvl-0.05=[2708, 1433], feature_noise__noiselvl-0.1=[2708, 1433], feature_noise__noiselvl-0.15=[2708, 1433], feature_noise__noiselvl-0.3=[2708, 1433], feature_noise__noiselvl-0.45=[2708, 1433], feature_noise__noiselvl-0.6=[2708, 1433], 

Unnamed: 0,dataset_name,model_name,x_type,edge_type,mean_test_accuracy,test_accuracies,num_epochs,model_params,noise_info,repeat_num
0,Cora,GNN,feature_noise__noiselvl-0,edge_index,0.798,"0.787,0.799,0.799,0.803,0.8",25,"{'in_channels': 1433, 'hidden_channels': 30, '...","{'noise_type': 'feature_noise', 'params': {'da...",5
1,Cora,GNN,feature_noise__noiselvl-0.01,edge_index,0.782,"0.779,0.796,0.771,0.785,0.779",25,"{'in_channels': 1433, 'hidden_channels': 30, '...","{'noise_type': 'feature_noise', 'params': {'da...",5
2,Cora,GNN,feature_noise__noiselvl-0.05,edge_index,0.747,"0.735,0.747,0.753,0.752,0.75",25,"{'in_channels': 1433, 'hidden_channels': 30, '...","{'noise_type': 'feature_noise', 'params': {'da...",5
3,Cora,GNN,feature_noise__noiselvl-0.1,edge_index,0.633,"0.636,0.596,0.689,0.623,0.619",25,"{'in_channels': 1433, 'hidden_channels': 30, '...","{'noise_type': 'feature_noise', 'params': {'da...",5
4,Cora,GNN,feature_noise__noiselvl-0.15,edge_index,0.517,"0.566,0.553,0.516,0.501,0.447",25,"{'in_channels': 1433, 'hidden_channels': 30, '...","{'noise_type': 'feature_noise', 'params': {'da...",5
5,Cora,GNN,feature_noise__noiselvl-0.3,edge_index,0.128,"0.121,0.08,0.125,0.182,0.134",25,"{'in_channels': 1433, 'hidden_channels': 30, '...","{'noise_type': 'feature_noise', 'params': {'da...",5
6,Cora,GNN,feature_noise__noiselvl-0.45,edge_index,0.13,"0.171,0.144,0.07,0.088,0.177",25,"{'in_channels': 1433, 'hidden_channels': 30, '...","{'noise_type': 'feature_noise', 'params': {'da...",5
7,Cora,GNN,feature_noise__noiselvl-0.6,edge_index,0.064,"0.064,0.064,0.064,0.064,0.064",25,"{'in_channels': 1433, 'hidden_channels': 30, '...","{'noise_type': 'feature_noise', 'params': {'da...",5
8,Cora,GNN,feature_noise__noiselvl-0.9,edge_index,0.171,"0.331,0.132,0.133,0.13,0.13",25,"{'in_channels': 1433, 'hidden_channels': 30, '...","{'noise_type': 'feature_noise', 'params': {'da...",5
9,Cora,GNN,feature_noise__noiselvl-0.95,edge_index,0.162,"0.143,0.134,0.152,0.286,0.096",25,"{'in_channels': 1433, 'hidden_channels': 30, '...","{'noise_type': 'feature_noise', 'params': {'da...",5


### Experiment 2 - Remove edges randomly

In [274]:
# noise data creation example: remove edges -- strategy 2: remove from selected nodes

# **** DONT forget to change this *****
EXPERIMENT_FILENAME = f'./{folder_name}/Experiment_2_remove_random_edges{post_fix}.csv'
# **** DONT forget to change this *****

data = data.to('cpu')

remove_edge_params = {'data':data, 'bidirectional':bidirectional_type}
noise_info = {'noise_type':'edge_removal', 'strategy':'all_edges','params':remove_edge_params, \
              'noise_levels':[0, 0.01, 0.05, 0.1, 0.15, 0.3, 0.45, 0.6, 0.9, 0.95, 0.99, 1 ],\
              'noise_param_name':'edges_to_remove_ratio'}

exp_rm_random_edges = create_noised_data_for_experiment(data=data, noise_info=noise_info)
print(data)

# Experiment on edge removal - strategy- nodes
# Put names of the models we want to use for this experiment type
data = data.to('cuda:0')
model_names = ['GNN', 'GAT', 'Graphsage']
exp_rm_random_nodes_df = experiment(dataset_name=dataset_name, model_names=model_names,
                                    model_params=model_params, opt_params=opt_params,
                                    data=data, x_types=['x'], edge_types=exp_rm_random_edges,
                                    noise_info=noise_info, num_epochs=25, repeat_num=num_repeat, print_updates=True)

# Do for node strategies only
# exp_rm_random_nodes_df[['choose_type', 'bidirectional','k_nodes','noise_level']] = exp_rm_random_nodes_df.apply(lambda x: [s.split('-')[-1] for s in x['edge_type'].split('_') if '-' in s], axis=1, result_type='expand')
data = data.to('cpu')
exp_rm_random_nodes_df.to_csv(EXPERIMENT_FILENAME)
exp_rm_random_nodes_df

Data(x=[2708, 1433], edge_index=[2, 10556], y=[2708], train_mask=[2708], val_mask=[2708], test_mask=[2708], edge_index_added_n_0.15=[2, 12138], edge_removal_nodes_choose_type-random_bidirec-True_knodes-135_noiselvl-0=[2, 10556], edge_removal_nodes_choose_type-random_bidirec-True_knodes-135_noiselvl-0.01=[2, 10286], edge_removal_nodes_choose_type-random_bidirec-True_knodes-271_noiselvl-0=[2, 10556], edge_removal_nodes_choose_type-random_bidirec-True_knodes-271_noiselvl-0.01=[2, 10024], edge_removal_nodes_choose_type-random_bidirec-True_knodes-542_noiselvl-0=[2, 10556], edge_removal_nodes_choose_type-random_bidirec-True_knodes-542_noiselvl-0.01=[2, 9496], feature_noise__noiselvl-0=[2708, 1433], feature_noise__noiselvl-0.01=[2708, 1433], feature_noise__noiselvl-0.05=[2708, 1433], feature_noise__noiselvl-0.1=[2708, 1433], feature_noise__noiselvl-0.15=[2708, 1433], feature_noise__noiselvl-0.3=[2708, 1433], feature_noise__noiselvl-0.45=[2708, 1433], feature_noise__noiselvl-0.6=[2708, 1433], 

Unnamed: 0,dataset_name,model_name,x_type,edge_type,mean_test_accuracy,test_accuracies,num_epochs,model_params,noise_info,repeat_num
0,Cora,GNN,x,edge_removal_all_edges__noiselvl-0,0.799,"0.798,0.802,0.789,0.806,0.8",25,"{'in_channels': 1433, 'hidden_channels': 30, '...","{'noise_type': 'edge_removal', 'strategy': 'al...",5
1,Cora,GNN,x,edge_removal_all_edges__noiselvl-0.01,0.792,"0.796,0.794,0.79,0.769,0.813",25,"{'in_channels': 1433, 'hidden_channels': 30, '...","{'noise_type': 'edge_removal', 'strategy': 'al...",5
2,Cora,GNN,x,edge_removal_all_edges__noiselvl-0.05,0.802,"0.801,0.798,0.811,0.811,0.789",25,"{'in_channels': 1433, 'hidden_channels': 30, '...","{'noise_type': 'edge_removal', 'strategy': 'al...",5
3,Cora,GNN,x,edge_removal_all_edges__noiselvl-0.1,0.795,"0.793,0.801,0.787,0.79,0.804",25,"{'in_channels': 1433, 'hidden_channels': 30, '...","{'noise_type': 'edge_removal', 'strategy': 'al...",5
4,Cora,GNN,x,edge_removal_all_edges__noiselvl-0.15,0.792,"0.791,0.791,0.798,0.779,0.801",25,"{'in_channels': 1433, 'hidden_channels': 30, '...","{'noise_type': 'edge_removal', 'strategy': 'al...",5
5,Cora,GNN,x,edge_removal_all_edges__noiselvl-0.3,0.788,"0.777,0.786,0.794,0.793,0.789",25,"{'in_channels': 1433, 'hidden_channels': 30, '...","{'noise_type': 'edge_removal', 'strategy': 'al...",5
6,Cora,GNN,x,edge_removal_all_edges__noiselvl-0.45,0.801,"0.807,0.792,0.822,0.789,0.797",25,"{'in_channels': 1433, 'hidden_channels': 30, '...","{'noise_type': 'edge_removal', 'strategy': 'al...",5
7,Cora,GNN,x,edge_removal_all_edges__noiselvl-0.6,0.769,"0.766,0.78,0.765,0.756,0.78",25,"{'in_channels': 1433, 'hidden_channels': 30, '...","{'noise_type': 'edge_removal', 'strategy': 'al...",5
8,Cora,GNN,x,edge_removal_all_edges__noiselvl-0.9,0.745,"0.732,0.74,0.754,0.74,0.758",25,"{'in_channels': 1433, 'hidden_channels': 30, '...","{'noise_type': 'edge_removal', 'strategy': 'al...",5
9,Cora,GNN,x,edge_removal_all_edges__noiselvl-0.95,0.764,"0.776,0.725,0.782,0.765,0.77",25,"{'in_channels': 1433, 'hidden_channels': 30, '...","{'noise_type': 'edge_removal', 'strategy': 'al...",5


### Experiment 3 - Add edges randomly

In [275]:
# noise data creation example: remove edges -- strategy 2: remove from selected nodes

# **** DONT forget to change this *****
EXPERIMENT_FILENAME = f'./{folder_name}/Experiment_3_add_random_edges{post_fix}.csv'
# **** DONT forget to change this *****

data = data.to('cpu')

add_edge_params = {'data':data, 'bidirectional':bidirectional_type}
# edge_addition_params = {'data':data}
noise_info = {'noise_type':'edge_addition', 'strategy':'all_edges','params':add_edge_params, \
              'noise_levels':[0, 0.01, 0.05, 0.1, 0.15, 0.3, 0.45, 0.6, 0.9, 0.95, 0.99, 1 ],\
              'noise_param_name':'edges_to_add_ratio'}

exp_add_random_edges = create_noised_data_for_experiment(data=data, noise_info=noise_info)
print(data)

# Experiment on edge removal - strategy- nodes
# Put names of the models we want to use for this experiment type
data = data.to('cuda:0')
model_names = ['GNN', 'GAT', 'Graphsage']
exp_rm_random_nodes_df = experiment(dataset_name=dataset_name, model_names=model_names,
                                    model_params=model_params, opt_params=opt_params,
                                    data=data, x_types=['x'], edge_types=exp_add_random_edges,
                                    noise_info=noise_info, num_epochs=25, repeat_num=num_repeat, print_updates=True)

# Do for node strategies only
# exp_rm_random_nodes_df[['choose_type', 'bidirectional','k_nodes','noise_level']] = exp_rm_random_nodes_df.apply(lambda x: [s.split('-')[-1] for s in x['edge_type'].split('_') if '-' in s], axis=1, result_type='expand')
data = data.to('cpu')
exp_rm_random_nodes_df.to_csv(EXPERIMENT_FILENAME)
exp_rm_random_nodes_df

Data(x=[2708, 1433], edge_index=[2, 10556], y=[2708], train_mask=[2708], val_mask=[2708], test_mask=[2708], edge_index_added_n_0.15=[2, 12138], edge_removal_nodes_choose_type-random_bidirec-True_knodes-135_noiselvl-0=[2, 10556], edge_removal_nodes_choose_type-random_bidirec-True_knodes-135_noiselvl-0.01=[2, 10286], edge_removal_nodes_choose_type-random_bidirec-True_knodes-271_noiselvl-0=[2, 10556], edge_removal_nodes_choose_type-random_bidirec-True_knodes-271_noiselvl-0.01=[2, 10024], edge_removal_nodes_choose_type-random_bidirec-True_knodes-542_noiselvl-0=[2, 10556], edge_removal_nodes_choose_type-random_bidirec-True_knodes-542_noiselvl-0.01=[2, 9496], feature_noise__noiselvl-0=[2708, 1433], feature_noise__noiselvl-0.01=[2708, 1433], feature_noise__noiselvl-0.05=[2708, 1433], feature_noise__noiselvl-0.1=[2708, 1433], feature_noise__noiselvl-0.15=[2708, 1433], feature_noise__noiselvl-0.3=[2708, 1433], feature_noise__noiselvl-0.45=[2708, 1433], feature_noise__noiselvl-0.6=[2708, 1433], 

Unnamed: 0,dataset_name,model_name,x_type,edge_type,mean_test_accuracy,test_accuracies,num_epochs,model_params,noise_info,repeat_num
0,Cora,GNN,x,edge_addition_all_edges__noiselvl-0,0.795,"0.792,0.79,0.785,0.8,0.807",25,"{'in_channels': 1433, 'hidden_channels': 30, '...","{'noise_type': 'edge_addition', 'strategy': 'a...",5
1,Cora,GNN,x,edge_addition_all_edges__noiselvl-0.01,0.791,"0.786,0.804,0.783,0.787,0.793",25,"{'in_channels': 1433, 'hidden_channels': 30, '...","{'noise_type': 'edge_addition', 'strategy': 'a...",5
2,Cora,GNN,x,edge_addition_all_edges__noiselvl-0.05,0.803,"0.791,0.803,0.804,0.812,0.804",25,"{'in_channels': 1433, 'hidden_channels': 30, '...","{'noise_type': 'edge_addition', 'strategy': 'a...",5
3,Cora,GNN,x,edge_addition_all_edges__noiselvl-0.1,0.793,"0.799,0.801,0.79,0.775,0.799",25,"{'in_channels': 1433, 'hidden_channels': 30, '...","{'noise_type': 'edge_addition', 'strategy': 'a...",5
4,Cora,GNN,x,edge_addition_all_edges__noiselvl-0.15,0.791,"0.808,0.781,0.779,0.796,0.789",25,"{'in_channels': 1433, 'hidden_channels': 30, '...","{'noise_type': 'edge_addition', 'strategy': 'a...",5
5,Cora,GNN,x,edge_addition_all_edges__noiselvl-0.3,0.765,"0.759,0.746,0.775,0.779,0.768",25,"{'in_channels': 1433, 'hidden_channels': 30, '...","{'noise_type': 'edge_addition', 'strategy': 'a...",5
6,Cora,GNN,x,edge_addition_all_edges__noiselvl-0.45,0.764,"0.762,0.76,0.75,0.766,0.782",25,"{'in_channels': 1433, 'hidden_channels': 30, '...","{'noise_type': 'edge_addition', 'strategy': 'a...",5
7,Cora,GNN,x,edge_addition_all_edges__noiselvl-0.6,0.777,"0.767,0.77,0.773,0.785,0.79",25,"{'in_channels': 1433, 'hidden_channels': 30, '...","{'noise_type': 'edge_addition', 'strategy': 'a...",5
8,Cora,GNN,x,edge_addition_all_edges__noiselvl-0.9,0.77,"0.76,0.764,0.77,0.774,0.784",25,"{'in_channels': 1433, 'hidden_channels': 30, '...","{'noise_type': 'edge_addition', 'strategy': 'a...",5
9,Cora,GNN,x,edge_addition_all_edges__noiselvl-0.95,0.741,"0.739,0.746,0.742,0.761,0.719",25,"{'in_channels': 1433, 'hidden_channels': 30, '...","{'noise_type': 'edge_addition', 'strategy': 'a...",5


### Experiment 4 - Remove Edges: Random - nodes

In [276]:
# noise data creation example: remove edges -- strategy 2: remove from selected nodes

# **** DONT forget to change this *****
EXPERIMENT_FILENAME = f'./{folder_name}/Experiment_4_remove_random_nodes{post_fix}.csv'
# **** DONT forget to change this *****

nodes_percent = [0.05, 0.1, 0.2, 0.25, 0.4, 0.5, 0.6, 0.7, 0.8, 0.95, 0.9, 1] #[0.05, 0.1, 0.2, 0.25, 0.4, 0.5]
k_nodes_list = nodes_percent_to_num_nodes(percents=nodes_percent, num_nodes=data.num_nodes)

data = data.to('cpu')
remove_edge_from_nodes_params = {'data':data, 'bidirectional':bidirectional_type, 'choose_type':'random'}

noise_info = {'noise_type':'edge_removal', 'strategy':'nodes', 'params':remove_edge_from_nodes_params, 
              'noise_levels':[0, 0.01, 0.05, 0.1, 0.15, 0.3, 0.45, 0.6, 0.9, 0.95, 0.99, 1 ], 
              'k_nodes_list':k_nodes_list,
              'noise_param_name':'noise_level'}

exp_rm_random_nodes_var_names = create_noised_data_for_experiment(data=data, noise_info=noise_info)
print(data)

# Experiment on edge removal - strategy- nodes
# Put names of the models we want to use for this experiment type
data = data.to('cuda:0')
model_names = ['GNN', 'GAT', 'Graphsage']
exp_rm_random_nodes_df = experiment(dataset_name=dataset_name, model_names=model_names,
                                    model_params=model_params, opt_params=opt_params,
                                    data=data, x_types=['x'], edge_types=exp_rm_random_nodes_var_names,
                                    noise_info=noise_info, num_epochs=25, repeat_num=num_repeat, print_updates=True)

# Do for node strategies only
exp_rm_random_nodes_df[['choose_type', 'bidirectional','k_nodes','noise_level']] = exp_rm_random_nodes_df.apply(lambda x: [s.split('-')[-1] for s in x['edge_type'].split('_') if '-' in s], axis=1, result_type='expand')
data = data.to('cpu')
exp_rm_random_nodes_df.to_csv(EXPERIMENT_FILENAME)
exp_rm_random_nodes_df

Data(x=[2708, 1433], edge_index=[2, 10556], y=[2708], train_mask=[2708], val_mask=[2708], test_mask=[2708], edge_index_added_n_0.15=[2, 12138], edge_removal_nodes_choose_type-random_bidirec-True_knodes-135_noiselvl-0=[2, 10556], edge_removal_nodes_choose_type-random_bidirec-True_knodes-135_noiselvl-0.01=[2, 10288], edge_removal_nodes_choose_type-random_bidirec-True_knodes-271_noiselvl-0=[2, 10556], edge_removal_nodes_choose_type-random_bidirec-True_knodes-271_noiselvl-0.01=[2, 10016], edge_removal_nodes_choose_type-random_bidirec-True_knodes-542_noiselvl-0=[2, 10556], edge_removal_nodes_choose_type-random_bidirec-True_knodes-542_noiselvl-0.01=[2, 9500], feature_noise__noiselvl-0=[2708, 1433], feature_noise__noiselvl-0.01=[2708, 1433], feature_noise__noiselvl-0.05=[2708, 1433], feature_noise__noiselvl-0.1=[2708, 1433], feature_noise__noiselvl-0.15=[2708, 1433], feature_noise__noiselvl-0.3=[2708, 1433], feature_noise__noiselvl-0.45=[2708, 1433], feature_noise__noiselvl-0.6=[2708, 1433], 

Unnamed: 0,dataset_name,model_name,x_type,edge_type,mean_test_accuracy,test_accuracies,num_epochs,model_params,noise_info,repeat_num,choose_type,bidirectional,k_nodes,noise_level
0,Cora,GNN,x,edge_removal_nodes_choose_type-random_bidirec-...,0.8,"0.789,0.806,0.789,0.802,0.813",25,"{'in_channels': 1433, 'hidden_channels': 30, '...","{'noise_type': 'edge_removal', 'strategy': 'no...",5,random,True,135,0
1,Cora,GNN,x,edge_removal_nodes_choose_type-random_bidirec-...,0.791,"0.789,0.799,0.777,0.796,0.792",25,"{'in_channels': 1433, 'hidden_channels': 30, '...","{'noise_type': 'edge_removal', 'strategy': 'no...",5,random,True,135,0.01
2,Cora,GNN,x,edge_removal_nodes_choose_type-random_bidirec-...,0.796,"0.801,0.798,0.793,0.789,0.799",25,"{'in_channels': 1433, 'hidden_channels': 30, '...","{'noise_type': 'edge_removal', 'strategy': 'no...",5,random,True,135,0.05
3,Cora,GNN,x,edge_removal_nodes_choose_type-random_bidirec-...,0.79,"0.789,0.783,0.795,0.789,0.794",25,"{'in_channels': 1433, 'hidden_channels': 30, '...","{'noise_type': 'edge_removal', 'strategy': 'no...",5,random,True,135,0.1
4,Cora,GNN,x,edge_removal_nodes_choose_type-random_bidirec-...,0.79,"0.79,0.791,0.79,0.788,0.793",25,"{'in_channels': 1433, 'hidden_channels': 30, '...","{'noise_type': 'edge_removal', 'strategy': 'no...",5,random,True,135,0.15
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
427,Cora,Graphsage,x,edge_removal_nodes_choose_type-random_bidirec-...,0.742,"0.74,0.736,0.748,0.74,0.746",25,"{'in_channels': 1433, 'hidden_channels': 42, '...","{'noise_type': 'edge_removal', 'strategy': 'no...",5,random,True,2708,0.6
428,Cora,Graphsage,x,edge_removal_nodes_choose_type-random_bidirec-...,0.724,"0.696,0.71,0.741,0.731,0.741",25,"{'in_channels': 1433, 'hidden_channels': 42, '...","{'noise_type': 'edge_removal', 'strategy': 'no...",5,random,True,2708,0.9
429,Cora,Graphsage,x,edge_removal_nodes_choose_type-random_bidirec-...,0.716,"0.718,0.714,0.705,0.723,0.72",25,"{'in_channels': 1433, 'hidden_channels': 42, '...","{'noise_type': 'edge_removal', 'strategy': 'no...",5,random,True,2708,0.95
430,Cora,Graphsage,x,edge_removal_nodes_choose_type-random_bidirec-...,0.677,"0.666,0.665,0.715,0.653,0.688",25,"{'in_channels': 1433, 'hidden_channels': 42, '...","{'noise_type': 'edge_removal', 'strategy': 'no...",5,random,True,2708,0.99


### Experiment 5 - Remove Edges: topk - nodes

In [277]:
# noise data creation example: remove edges -- strategy 2: remove from selected nodes

# **** DONT forget to change this *****
EXPERIMENT_FILENAME = f'./{folder_name}/Experiment_5_remove_topk_nodes{post_fix}.csv'
# **** DONT forget to change this *****

nodes_percent = [0.05, 0.1, 0.2, 0.25, 0.4, 0.5, 0.6, 0.7, 0.8, 0.95, 0.9, 1] #[0.05, 0.1, 0.2, 0.25, 0.4, 0.5]
k_nodes_list = nodes_percent_to_num_nodes(percents=nodes_percent, num_nodes=data.num_nodes)

data = data.to('cpu')
remove_edge_from_nodes_params = {'data':data, 'bidirectional':bidirectional_type, 'choose_type':'top_k'}

noise_info = {'noise_type':'edge_removal', 'strategy':'nodes', 'params':remove_edge_from_nodes_params, 
              'noise_levels':[0, 0.01, 0.05, 0.1, 0.15, 0.3, 0.45, 0.6, 0.9, 0.95, 0.99, 1 ], 
              'k_nodes_list':k_nodes_list,
              'noise_param_name':'noise_level'}

exp_rm_random_nodes_var_names = create_noised_data_for_experiment(data=data, noise_info=noise_info)
print(data)

# Experiment on edge removal - strategy- nodes
# Put names of the models we want to use for this experiment type
data = data.to('cuda:0')
model_names = ['GNN', 'GAT', 'Graphsage']
exp_rm_random_nodes_df = experiment(dataset_name=dataset_name, model_names=model_names,
                                    model_params=model_params, opt_params=opt_params,
                                    data=data, x_types=['x'], edge_types=exp_rm_random_nodes_var_names,
                                    noise_info=noise_info, num_epochs=25, repeat_num=num_repeat, print_updates=True)

# Do for node strategies only
exp_rm_random_nodes_df[['choose_type', 'bidirectional','k_nodes','noise_level']] = exp_rm_random_nodes_df.apply(lambda x: [s.split('-')[-1] for s in x['edge_type'].split('_') if '-' in s], axis=1, result_type='expand')
data = data.to('cpu')
exp_rm_random_nodes_df.to_csv(EXPERIMENT_FILENAME)
exp_rm_random_nodes_df

Data(x=[2708, 1433], edge_index=[2, 10556], y=[2708], train_mask=[2708], val_mask=[2708], test_mask=[2708], edge_index_added_n_0.15=[2, 12138], edge_removal_nodes_choose_type-random_bidirec-True_knodes-135_noiselvl-0=[2, 10556], edge_removal_nodes_choose_type-random_bidirec-True_knodes-135_noiselvl-0.01=[2, 10288], edge_removal_nodes_choose_type-random_bidirec-True_knodes-271_noiselvl-0=[2, 10556], edge_removal_nodes_choose_type-random_bidirec-True_knodes-271_noiselvl-0.01=[2, 10016], edge_removal_nodes_choose_type-random_bidirec-True_knodes-542_noiselvl-0=[2, 10556], edge_removal_nodes_choose_type-random_bidirec-True_knodes-542_noiselvl-0.01=[2, 9500], feature_noise__noiselvl-0=[2708, 1433], feature_noise__noiselvl-0.01=[2708, 1433], feature_noise__noiselvl-0.05=[2708, 1433], feature_noise__noiselvl-0.1=[2708, 1433], feature_noise__noiselvl-0.15=[2708, 1433], feature_noise__noiselvl-0.3=[2708, 1433], feature_noise__noiselvl-0.45=[2708, 1433], feature_noise__noiselvl-0.6=[2708, 1433], 

Unnamed: 0,dataset_name,model_name,x_type,edge_type,mean_test_accuracy,test_accuracies,num_epochs,model_params,noise_info,repeat_num,choose_type,bidirectional,k_nodes,noise_level
0,Cora,GNN,x,edge_removal_nodes_choose_type-top_k_bidirec-T...,0.799,"0.792,0.8,0.805,0.787,0.811",25,"{'in_channels': 1433, 'hidden_channels': 30, '...","{'noise_type': 'edge_removal', 'strategy': 'no...",5,top,True,135,0
1,Cora,GNN,x,edge_removal_nodes_choose_type-top_k_bidirec-T...,0.793,"0.792,0.786,0.801,0.8,0.785",25,"{'in_channels': 1433, 'hidden_channels': 30, '...","{'noise_type': 'edge_removal', 'strategy': 'no...",5,top,True,135,0.01
2,Cora,GNN,x,edge_removal_nodes_choose_type-top_k_bidirec-T...,0.796,"0.788,0.787,0.799,0.811,0.797",25,"{'in_channels': 1433, 'hidden_channels': 30, '...","{'noise_type': 'edge_removal', 'strategy': 'no...",5,top,True,135,0.05
3,Cora,GNN,x,edge_removal_nodes_choose_type-top_k_bidirec-T...,0.788,"0.79,0.79,0.773,0.797,0.788",25,"{'in_channels': 1433, 'hidden_channels': 30, '...","{'noise_type': 'edge_removal', 'strategy': 'no...",5,top,True,135,0.1
4,Cora,GNN,x,edge_removal_nodes_choose_type-top_k_bidirec-T...,0.796,"0.805,0.802,0.791,0.778,0.806",25,"{'in_channels': 1433, 'hidden_channels': 30, '...","{'noise_type': 'edge_removal', 'strategy': 'no...",5,top,True,135,0.15
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
427,Cora,Graphsage,x,edge_removal_nodes_choose_type-top_k_bidirec-T...,0.737,"0.735,0.723,0.731,0.732,0.762",25,"{'in_channels': 1433, 'hidden_channels': 42, '...","{'noise_type': 'edge_removal', 'strategy': 'no...",5,top,True,2708,0.6
428,Cora,Graphsage,x,edge_removal_nodes_choose_type-top_k_bidirec-T...,0.69,"0.703,0.69,0.694,0.673,0.69",25,"{'in_channels': 1433, 'hidden_channels': 42, '...","{'noise_type': 'edge_removal', 'strategy': 'no...",5,top,True,2708,0.9
429,Cora,Graphsage,x,edge_removal_nodes_choose_type-top_k_bidirec-T...,0.71,"0.699,0.719,0.707,0.701,0.725",25,"{'in_channels': 1433, 'hidden_channels': 42, '...","{'noise_type': 'edge_removal', 'strategy': 'no...",5,top,True,2708,0.95
430,Cora,Graphsage,x,edge_removal_nodes_choose_type-top_k_bidirec-T...,0.714,"0.696,0.724,0.71,0.72,0.722",25,"{'in_channels': 1433, 'hidden_channels': 42, '...","{'noise_type': 'edge_removal', 'strategy': 'no...",5,top,True,2708,0.99


### Experiment 6 - Remove Edges: Bottom k - nodes

In [278]:
# noise data creation example: remove edges -- strategy 2: remove from selected nodes

# **** DONT forget to change this *****
EXPERIMENT_FILENAME = f'./{folder_name}/Experiment_6_remove_bottomk_nodes{post_fix}.csv'
# **** DONT forget to change this *****

nodes_percent =  [0.05, 0.1, 0.2, 0.25, 0.4, 0.5, 0.6, 0.7, 0.8, 0.95, 0.9, 1] #[0.05, 0.1, 0.2, 0.25, 0.4, 0.5]
k_nodes_list = nodes_percent_to_num_nodes(percents=nodes_percent, num_nodes=data.num_nodes)

data = data.to('cpu')
remove_edge_from_nodes_params = {'data':data, 'bidirectional':bidirectional_type, 'choose_type':'bottom_k'}

noise_info = {'noise_type':'edge_removal', 'strategy':'nodes', 'params':remove_edge_from_nodes_params, 
              'noise_levels':[0, 0.01, 0.05, 0.1, 0.15, 0.3, 0.45, 0.6, 0.9, 0.95, 0.99, 1 ], 
              'k_nodes_list':k_nodes_list,
              'noise_param_name':'noise_level'}

exp_rm_random_nodes_var_names = create_noised_data_for_experiment(data=data, noise_info=noise_info)
print(data)

# Experiment on edge removal - strategy- nodes
# Put names of the models we want to use for this experiment type
data = data.to('cuda:0')
model_names = ['GNN', 'GAT', 'Graphsage']
exp_rm_random_nodes_df = experiment(dataset_name=dataset_name, model_names=model_names,
                                    model_params=model_params, opt_params=opt_params,
                                    data=data, x_types=['x'], edge_types=exp_rm_random_nodes_var_names,
                                    noise_info=noise_info, num_epochs=25, repeat_num=num_repeat, print_updates=True)

# Do for node strategies only
exp_rm_random_nodes_df[['choose_type', 'bidirectional','k_nodes','noise_level']] = exp_rm_random_nodes_df.apply(lambda x: [s.split('-')[-1] for s in x['edge_type'].split('_') if '-' in s], axis=1, result_type='expand')
data = data.to('cpu')
exp_rm_random_nodes_df.to_csv(EXPERIMENT_FILENAME)
exp_rm_random_nodes_df

Data(x=[2708, 1433], edge_index=[2, 10556], y=[2708], train_mask=[2708], val_mask=[2708], test_mask=[2708], edge_index_added_n_0.15=[2, 12138], edge_removal_nodes_choose_type-random_bidirec-True_knodes-135_noiselvl-0=[2, 10556], edge_removal_nodes_choose_type-random_bidirec-True_knodes-135_noiselvl-0.01=[2, 10288], edge_removal_nodes_choose_type-random_bidirec-True_knodes-271_noiselvl-0=[2, 10556], edge_removal_nodes_choose_type-random_bidirec-True_knodes-271_noiselvl-0.01=[2, 10016], edge_removal_nodes_choose_type-random_bidirec-True_knodes-542_noiselvl-0=[2, 10556], edge_removal_nodes_choose_type-random_bidirec-True_knodes-542_noiselvl-0.01=[2, 9500], feature_noise__noiselvl-0=[2708, 1433], feature_noise__noiselvl-0.01=[2708, 1433], feature_noise__noiselvl-0.05=[2708, 1433], feature_noise__noiselvl-0.1=[2708, 1433], feature_noise__noiselvl-0.15=[2708, 1433], feature_noise__noiselvl-0.3=[2708, 1433], feature_noise__noiselvl-0.45=[2708, 1433], feature_noise__noiselvl-0.6=[2708, 1433], 

Unnamed: 0,dataset_name,model_name,x_type,edge_type,mean_test_accuracy,test_accuracies,num_epochs,model_params,noise_info,repeat_num,choose_type,bidirectional,k_nodes,noise_level
0,Cora,GNN,x,edge_removal_nodes_choose_type-bottom_k_bidire...,0.794,"0.796,0.789,0.803,0.789,0.794",25,"{'in_channels': 1433, 'hidden_channels': 30, '...","{'noise_type': 'edge_removal', 'strategy': 'no...",5,bottom,True,135,0
1,Cora,GNN,x,edge_removal_nodes_choose_type-bottom_k_bidire...,0.787,"0.798,0.792,0.764,0.793,0.789",25,"{'in_channels': 1433, 'hidden_channels': 30, '...","{'noise_type': 'edge_removal', 'strategy': 'no...",5,bottom,True,135,0.01
2,Cora,GNN,x,edge_removal_nodes_choose_type-bottom_k_bidire...,0.788,"0.777,0.791,0.789,0.798,0.787",25,"{'in_channels': 1433, 'hidden_channels': 30, '...","{'noise_type': 'edge_removal', 'strategy': 'no...",5,bottom,True,135,0.05
3,Cora,GNN,x,edge_removal_nodes_choose_type-bottom_k_bidire...,0.796,"0.794,0.801,0.797,0.801,0.787",25,"{'in_channels': 1433, 'hidden_channels': 30, '...","{'noise_type': 'edge_removal', 'strategy': 'no...",5,bottom,True,135,0.1
4,Cora,GNN,x,edge_removal_nodes_choose_type-bottom_k_bidire...,0.794,"0.792,0.809,0.797,0.779,0.791",25,"{'in_channels': 1433, 'hidden_channels': 30, '...","{'noise_type': 'edge_removal', 'strategy': 'no...",5,bottom,True,135,0.15
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
427,Cora,Graphsage,x,edge_removal_nodes_choose_type-bottom_k_bidire...,0.748,"0.759,0.754,0.731,0.756,0.741",25,"{'in_channels': 1433, 'hidden_channels': 42, '...","{'noise_type': 'edge_removal', 'strategy': 'no...",5,bottom,True,2708,0.6
428,Cora,Graphsage,x,edge_removal_nodes_choose_type-bottom_k_bidire...,0.667,"0.656,0.663,0.659,0.684,0.675",25,"{'in_channels': 1433, 'hidden_channels': 42, '...","{'noise_type': 'edge_removal', 'strategy': 'no...",5,bottom,True,2708,0.9
429,Cora,Graphsage,x,edge_removal_nodes_choose_type-bottom_k_bidire...,0.671,"0.657,0.645,0.707,0.658,0.689",25,"{'in_channels': 1433, 'hidden_channels': 42, '...","{'noise_type': 'edge_removal', 'strategy': 'no...",5,bottom,True,2708,0.95
430,Cora,Graphsage,x,edge_removal_nodes_choose_type-bottom_k_bidire...,0.703,"0.68,0.684,0.719,0.732,0.699",25,"{'in_channels': 1433, 'hidden_channels': 42, '...","{'noise_type': 'edge_removal', 'strategy': 'no...",5,bottom,True,2708,0.99


### Experiment 7 - Add Edges: random nodes

In [279]:
# noise data creation example: remove edges -- strategy 2: remove from selected nodes

# **** DONT forget to change this *****
EXPERIMENT_FILENAME = f'./{folder_name}/Experiment_7_add_random_nodes{post_fix}.csv'
# **** DONT forget to change this *****

nodes_percent = [0.05, 0.1, 0.2, 0.25, 0.4, 0.5, 0.6, 0.7, 0.8, 0.95, 0.9, 1] #[0.05, 0.1, 0.2, 0.25, 0.4, 0.5]
k_nodes_list = nodes_percent_to_num_nodes(percents=nodes_percent, num_nodes=data.num_nodes)

data = data.to('cpu')
add_edge_from_nodes_params = {'data':data, 'bidirectional':bidirectional_type, 'choose_type':'random'}

noise_info = {'noise_type':'edge_removal', 'strategy':'nodes', 'params':add_edge_from_nodes_params, 
              'noise_levels':[0, 0.01, 0.05, 0.1, 0.15, 0.3, 0.45, 0.6, 0.9, 0.95, 0.99, 1 ], 
              'k_nodes_list':k_nodes_list,
              'noise_param_name':'noise_level'}

exp_add_random_nodes_var_names = create_noised_data_for_experiment(data=data, noise_info=noise_info)
print(data)

# Experiment on edge removal - strategy- nodes
# Put names of the models we want to use for this experiment type
data = data.to('cuda:0')
model_names = ['GNN', 'GAT', 'Graphsage']
exp_add_random_nodes_df = experiment(dataset_name=dataset_name, model_names=model_names,
                                    model_params=model_params, opt_params=opt_params,
                                    data=data, x_types=['x'], edge_types=exp_add_random_nodes_var_names,
                                    noise_info=noise_info, num_epochs=25, repeat_num=num_repeat, print_updates=True)

# Do for node strategies only
exp_add_random_nodes_df[['choose_type', 'bidirectional','k_nodes','noise_level']] = exp_add_random_nodes_df.apply(lambda x: [s.split('-')[-1] for s in x['edge_type'].split('_') if '-' in s], axis=1, result_type='expand')
data = data.to('cpu')
exp_add_random_nodes_df.to_csv(EXPERIMENT_FILENAME)
exp_add_random_nodes_df

Data(x=[2708, 1433], edge_index=[2, 10556], y=[2708], train_mask=[2708], val_mask=[2708], test_mask=[2708], edge_index_added_n_0.15=[2, 12138], edge_removal_nodes_choose_type-random_bidirec-True_knodes-135_noiselvl-0=[2, 10556], edge_removal_nodes_choose_type-random_bidirec-True_knodes-135_noiselvl-0.01=[2, 10288], edge_removal_nodes_choose_type-random_bidirec-True_knodes-271_noiselvl-0=[2, 10556], edge_removal_nodes_choose_type-random_bidirec-True_knodes-271_noiselvl-0.01=[2, 10016], edge_removal_nodes_choose_type-random_bidirec-True_knodes-542_noiselvl-0=[2, 10556], edge_removal_nodes_choose_type-random_bidirec-True_knodes-542_noiselvl-0.01=[2, 9514], feature_noise__noiselvl-0=[2708, 1433], feature_noise__noiselvl-0.01=[2708, 1433], feature_noise__noiselvl-0.05=[2708, 1433], feature_noise__noiselvl-0.1=[2708, 1433], feature_noise__noiselvl-0.15=[2708, 1433], feature_noise__noiselvl-0.3=[2708, 1433], feature_noise__noiselvl-0.45=[2708, 1433], feature_noise__noiselvl-0.6=[2708, 1433], 

Unnamed: 0,dataset_name,model_name,x_type,edge_type,mean_test_accuracy,test_accuracies,num_epochs,model_params,noise_info,repeat_num,choose_type,bidirectional,k_nodes,noise_level
0,Cora,GNN,x,edge_removal_nodes_choose_type-random_bidirec-...,0.802,"0.814,0.801,0.802,0.796,0.799",25,"{'in_channels': 1433, 'hidden_channels': 30, '...","{'noise_type': 'edge_removal', 'strategy': 'no...",5,random,True,135,0
1,Cora,GNN,x,edge_removal_nodes_choose_type-random_bidirec-...,0.791,"0.802,0.791,0.792,0.777,0.793",25,"{'in_channels': 1433, 'hidden_channels': 30, '...","{'noise_type': 'edge_removal', 'strategy': 'no...",5,random,True,135,0.01
2,Cora,GNN,x,edge_removal_nodes_choose_type-random_bidirec-...,0.795,"0.801,0.797,0.803,0.789,0.787",25,"{'in_channels': 1433, 'hidden_channels': 30, '...","{'noise_type': 'edge_removal', 'strategy': 'no...",5,random,True,135,0.05
3,Cora,GNN,x,edge_removal_nodes_choose_type-random_bidirec-...,0.791,"0.783,0.78,0.809,0.801,0.782",25,"{'in_channels': 1433, 'hidden_channels': 30, '...","{'noise_type': 'edge_removal', 'strategy': 'no...",5,random,True,135,0.1
4,Cora,GNN,x,edge_removal_nodes_choose_type-random_bidirec-...,0.798,"0.791,0.796,0.807,0.807,0.791",25,"{'in_channels': 1433, 'hidden_channels': 30, '...","{'noise_type': 'edge_removal', 'strategy': 'no...",5,random,True,135,0.15
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
427,Cora,Graphsage,x,edge_removal_nodes_choose_type-random_bidirec-...,0.742,"0.756,0.751,0.745,0.737,0.719",25,"{'in_channels': 1433, 'hidden_channels': 42, '...","{'noise_type': 'edge_removal', 'strategy': 'no...",5,random,True,2708,0.6
428,Cora,Graphsage,x,edge_removal_nodes_choose_type-random_bidirec-...,0.695,"0.685,0.699,0.7,0.694,0.695",25,"{'in_channels': 1433, 'hidden_channels': 42, '...","{'noise_type': 'edge_removal', 'strategy': 'no...",5,random,True,2708,0.9
429,Cora,Graphsage,x,edge_removal_nodes_choose_type-random_bidirec-...,0.685,"0.695,0.666,0.685,0.692,0.686",25,"{'in_channels': 1433, 'hidden_channels': 42, '...","{'noise_type': 'edge_removal', 'strategy': 'no...",5,random,True,2708,0.95
430,Cora,Graphsage,x,edge_removal_nodes_choose_type-random_bidirec-...,0.724,"0.722,0.704,0.73,0.726,0.739",25,"{'in_channels': 1433, 'hidden_channels': 42, '...","{'noise_type': 'edge_removal', 'strategy': 'no...",5,random,True,2708,0.99


### Experiment 8 - Add Edges: topk nodes

In [280]:
# noise data creation example: remove edges -- strategy 2: remove from selected nodes

# **** DONT forget to change this *****
EXPERIMENT_FILENAME = f'./{folder_name}/Experiment_8_add_topk_nodes{post_fix}.csv'
# **** DONT forget to change this *****

nodes_percent = [0.05, 0.1, 0.2, 0.25, 0.4, 0.5, 0.6, 0.7, 0.8, 0.95, 0.9, 1] #[0.05, 0.1, 0.2, 0.25, 0.4, 0.5]
k_nodes_list = nodes_percent_to_num_nodes(percents=nodes_percent, num_nodes=data.num_nodes)

data = data.to('cpu')
add_edge_from_nodes_params = {'data':data, 'bidirectional':bidirectional_type, 'choose_type':'top_k'}

noise_info = {'noise_type':'edge_removal', 'strategy':'nodes', 'params':add_edge_from_nodes_params, 
              'noise_levels':[0, 0.01, 0.05, 0.1, 0.15, 0.3, 0.45, 0.6, 0.9, 0.95, 0.99, 1 ], 
              'k_nodes_list':k_nodes_list,
              'noise_param_name':'noise_level'}

exp_add_random_nodes_var_names = create_noised_data_for_experiment(data=data, noise_info=noise_info)
print(data)

# Experiment on edge removal - strategy- nodes
# Put names of the models we want to use for this experiment type
data = data.to('cuda:0')
model_names = ['GNN', 'GAT', 'Graphsage']
exp_add_random_nodes_df = experiment(dataset_name=dataset_name, model_names=model_names,
                                    model_params=model_params, opt_params=opt_params,
                                    data=data, x_types=['x'], edge_types=exp_add_random_nodes_var_names,
                                    noise_info=noise_info, num_epochs=25, repeat_num=num_repeat, print_updates=True)

# Do for node strategies only
exp_add_random_nodes_df[['choose_type', 'bidirectional','k_nodes','noise_level']] = exp_rm_random_nodes_df.apply(lambda x: [s.split('-')[-1] for s in x['edge_type'].split('_') if '-' in s], axis=1, result_type='expand')
data = data.to('cpu')
exp_add_random_nodes_df.to_csv(EXPERIMENT_FILENAME)
exp_add_random_nodes_df

Data(x=[2708, 1433], edge_index=[2, 10556], y=[2708], train_mask=[2708], val_mask=[2708], test_mask=[2708], edge_index_added_n_0.15=[2, 12138], edge_removal_nodes_choose_type-random_bidirec-True_knodes-135_noiselvl-0=[2, 10556], edge_removal_nodes_choose_type-random_bidirec-True_knodes-135_noiselvl-0.01=[2, 10288], edge_removal_nodes_choose_type-random_bidirec-True_knodes-271_noiselvl-0=[2, 10556], edge_removal_nodes_choose_type-random_bidirec-True_knodes-271_noiselvl-0.01=[2, 10016], edge_removal_nodes_choose_type-random_bidirec-True_knodes-542_noiselvl-0=[2, 10556], edge_removal_nodes_choose_type-random_bidirec-True_knodes-542_noiselvl-0.01=[2, 9514], feature_noise__noiselvl-0=[2708, 1433], feature_noise__noiselvl-0.01=[2708, 1433], feature_noise__noiselvl-0.05=[2708, 1433], feature_noise__noiselvl-0.1=[2708, 1433], feature_noise__noiselvl-0.15=[2708, 1433], feature_noise__noiselvl-0.3=[2708, 1433], feature_noise__noiselvl-0.45=[2708, 1433], feature_noise__noiselvl-0.6=[2708, 1433], 

Unnamed: 0,dataset_name,model_name,x_type,edge_type,mean_test_accuracy,test_accuracies,num_epochs,model_params,noise_info,repeat_num,choose_type,bidirectional,k_nodes,noise_level
0,Cora,GNN,x,edge_removal_nodes_choose_type-top_k_bidirec-T...,0.798,"0.799,0.804,0.792,0.796,0.801",25,"{'in_channels': 1433, 'hidden_channels': 30, '...","{'noise_type': 'edge_removal', 'strategy': 'no...",5,bottom,True,135,0
1,Cora,GNN,x,edge_removal_nodes_choose_type-top_k_bidirec-T...,0.797,"0.787,0.805,0.804,0.787,0.801",25,"{'in_channels': 1433, 'hidden_channels': 30, '...","{'noise_type': 'edge_removal', 'strategy': 'no...",5,bottom,True,135,0.01
2,Cora,GNN,x,edge_removal_nodes_choose_type-top_k_bidirec-T...,0.795,"0.792,0.796,0.801,0.788,0.8",25,"{'in_channels': 1433, 'hidden_channels': 30, '...","{'noise_type': 'edge_removal', 'strategy': 'no...",5,bottom,True,135,0.05
3,Cora,GNN,x,edge_removal_nodes_choose_type-top_k_bidirec-T...,0.791,"0.788,0.8,0.792,0.782,0.793",25,"{'in_channels': 1433, 'hidden_channels': 30, '...","{'noise_type': 'edge_removal', 'strategy': 'no...",5,bottom,True,135,0.1
4,Cora,GNN,x,edge_removal_nodes_choose_type-top_k_bidirec-T...,0.793,"0.781,0.797,0.802,0.792,0.792",25,"{'in_channels': 1433, 'hidden_channels': 30, '...","{'noise_type': 'edge_removal', 'strategy': 'no...",5,bottom,True,135,0.15
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
427,Cora,Graphsage,x,edge_removal_nodes_choose_type-top_k_bidirec-T...,0.722,"0.71,0.741,0.727,0.726,0.708",25,"{'in_channels': 1433, 'hidden_channels': 42, '...","{'noise_type': 'edge_removal', 'strategy': 'no...",5,bottom,True,2708,0.6
428,Cora,Graphsage,x,edge_removal_nodes_choose_type-top_k_bidirec-T...,0.705,"0.699,0.695,0.717,0.721,0.695",25,"{'in_channels': 1433, 'hidden_channels': 42, '...","{'noise_type': 'edge_removal', 'strategy': 'no...",5,bottom,True,2708,0.9
429,Cora,Graphsage,x,edge_removal_nodes_choose_type-top_k_bidirec-T...,0.718,"0.716,0.701,0.702,0.724,0.746",25,"{'in_channels': 1433, 'hidden_channels': 42, '...","{'noise_type': 'edge_removal', 'strategy': 'no...",5,bottom,True,2708,0.95
430,Cora,Graphsage,x,edge_removal_nodes_choose_type-top_k_bidirec-T...,0.692,"0.689,0.691,0.687,0.692,0.699",25,"{'in_channels': 1433, 'hidden_channels': 42, '...","{'noise_type': 'edge_removal', 'strategy': 'no...",5,bottom,True,2708,0.99


### Experiment 9 - Add Edges: bottomk nodes

In [None]:
# noise data creation example: remove edges -- strategy 2: remove from selected nodes

# **** DONT forget to change this *****
EXPERIMENT_FILENAME = f'./{folder_name}/Experiment_9_add_bottomk_nodes{post_fix}.csv'
# **** DONT forget to change this *****

nodes_percent = [0.05, 0.1, 0.2, 0.25, 0.4, 0.5, 0.6, 0.7, 0.8, 0.95, 0.9, 1] #[0.05, 0.1, 0.2, 0.25, 0.4, 0.5]
k_nodes_list = nodes_percent_to_num_nodes(percents=nodes_percent, num_nodes=data.num_nodes)

data = data.to('cpu')
add_edge_from_nodes_params = {'data':data, 'bidirectional':bidirectional_type, 'choose_type':'bottom_k'}

noise_info = {'noise_type':'edge_removal', 'strategy':'nodes', 'params':add_edge_from_nodes_params, 
              'noise_levels':[0, 0.01, 0.05, 0.1, 0.15, 0.3, 0.45, 0.6, 0.9, 0.95, 0.99, 1 ], 
              'k_nodes_list':k_nodes_list,
              'noise_param_name':'noise_level'}

exp_add_random_nodes_var_names = create_noised_data_for_experiment(data=data, noise_info=noise_info)
print(data)

# Experiment on edge removal - strategy- nodes
# Put names of the models we want to use for this experiment type
data = data.to('cuda:0')
model_names = ['GNN', 'GAT', 'Graphsage']
exp_add_random_nodes_df = experiment(dataset_name=dataset_name, model_names=model_names,
                                    model_params=model_params, opt_params=opt_params,
                                    data=data, x_types=['x'], edge_types=exp_add_random_nodes_var_names,
                                    noise_info=noise_info, num_epochs=25, repeat_num=num_repeat, print_updates=True)

# Do for node strategies only
exp_add_random_nodes_df[['choose_type', 'bidirectional','k_nodes','noise_level']] = exp_rm_random_nodes_df.apply(lambda x: [s.split('-')[-1] for s in x['edge_type'].split('_') if '-' in s], axis=1, result_type='expand')
data = data.to('cpu')
exp_add_random_nodes_df.to_csv(EXPERIMENT_FILENAME)
exp_add_random_nodes_df

Data(x=[2708, 1433], edge_index=[2, 10556], y=[2708], train_mask=[2708], val_mask=[2708], test_mask=[2708], edge_index_added_n_0.15=[2, 12138], edge_removal_nodes_choose_type-random_bidirec-True_knodes-135_noiselvl-0=[2, 10556], edge_removal_nodes_choose_type-random_bidirec-True_knodes-135_noiselvl-0.01=[2, 10288], edge_removal_nodes_choose_type-random_bidirec-True_knodes-271_noiselvl-0=[2, 10556], edge_removal_nodes_choose_type-random_bidirec-True_knodes-271_noiselvl-0.01=[2, 10016], edge_removal_nodes_choose_type-random_bidirec-True_knodes-542_noiselvl-0=[2, 10556], edge_removal_nodes_choose_type-random_bidirec-True_knodes-542_noiselvl-0.01=[2, 9514], feature_noise__noiselvl-0=[2708, 1433], feature_noise__noiselvl-0.01=[2708, 1433], feature_noise__noiselvl-0.05=[2708, 1433], feature_noise__noiselvl-0.1=[2708, 1433], feature_noise__noiselvl-0.15=[2708, 1433], feature_noise__noiselvl-0.3=[2708, 1433], feature_noise__noiselvl-0.45=[2708, 1433], feature_noise__noiselvl-0.6=[2708, 1433], 

In [221]:
import shutil
shutil.make_archive('results_v5', 'zip', 'results_v5')

'/content/results_v4.zip'

In [234]:
# noise data creation example: remove edges -- strategy 2: remove from selected nodes

# **** DONT forget to change this *****
EXPERIMENT_FILENAME = f'./{folder_name}/deneme.csv'
# **** DONT forget to change this *****

nodes_percent =  [0.05, 0.2, 0.5, 0.7, 0.8]
k_nodes_list = nodes_percent_to_num_nodes(percents=nodes_percent, num_nodes=data.num_nodes)

data = data.to('cpu')
remove_edge_from_nodes_params = {'data':data, 'bidirectional':bidirectional_type, 'choose_type':'bottom_k'}

noise_info = {'noise_type':'edge_removal', 'strategy':'nodes', 'params':remove_edge_from_nodes_params, 
              'noise_levels':[1], #[0, 0.3, 0.6, 0.9, 1 ], 
              'k_nodes_list':k_nodes_list,
              'noise_param_name':'noise_level'}

exp_rm_random_nodes_var_names = create_noised_data_for_experiment(data=data, noise_info=noise_info)
print(data)

# Experiment on edge removal - strategy- nodes
# Put names of the models we want to use for this experiment type
data = data.to('cuda:0')
model_names = ['GNN', 'GAT', 'Graphsage']
exp_rm_random_nodes_df = experiment(dataset_name=dataset_name, model_names=model_names,
                                    model_params=model_params, opt_params=opt_params,
                                    data=data, x_types=['x'], edge_types=exp_rm_random_nodes_var_names,
                                    noise_info=noise_info, num_epochs=25, repeat_num=num_repeat, print_updates=True)

# Do for node strategies only
exp_rm_random_nodes_df[['choose_type', 'bidirectional','k_nodes','noise_level']] = exp_rm_random_nodes_df.apply(lambda x: [s.split('-')[-1] for s in x['edge_type'].split('_') if '-' in s], axis=1, result_type='expand')
data = data.to('cpu')
exp_rm_random_nodes_df.to_csv(EXPERIMENT_FILENAME)
exp_rm_random_nodes_df

Data(x=[2708, 1433], edge_index=[2, 10556], y=[2708], train_mask=[2708], val_mask=[2708], test_mask=[2708], edge_index_added_n_0.15=[2, 12138], edge_removal_nodes_choose_type-random_bidirec-True_knodes-135_noiselvl-0=[2, 10556], edge_removal_nodes_choose_type-random_bidirec-True_knodes-135_noiselvl-0.01=[2, 10286], edge_removal_nodes_choose_type-random_bidirec-True_knodes-271_noiselvl-0=[2, 10556], edge_removal_nodes_choose_type-random_bidirec-True_knodes-271_noiselvl-0.01=[2, 10022], edge_removal_nodes_choose_type-random_bidirec-True_knodes-542_noiselvl-0=[2, 10556], edge_removal_nodes_choose_type-random_bidirec-True_knodes-542_noiselvl-0.01=[2, 9502], feature_noise__noiselvl-0=[2708, 1433], feature_noise__noiselvl-0.01=[2708, 1433], feature_noise__noiselvl-0.05=[2708, 1433], feature_noise__noiselvl-0.1=[2708, 1433], feature_noise__noiselvl-0.15=[2708, 1433], feature_noise__noiselvl-0.3=[2708, 1433], feature_noise__noiselvl-0.45=[2708, 1433], feature_noise__noiselvl-0.6=[2708, 1433], 

Unnamed: 0,dataset_name,model_name,x_type,edge_type,mean_test_accuracy,test_accuracies,num_epochs,model_params,noise_info,repeat_num,choose_type,bidirectional,k_nodes,noise_level
0,Cora,GNN,x,edge_removal_nodes_choose_type-bottom_k_bidire...,0.74,"0.729,0.739,0.748,0.742,0.744",25,"{'in_channels': 1433, 'hidden_channels': 30, '...","{'noise_type': 'edge_removal', 'strategy': 'no...",5,bottom,False,2708,1
1,Cora,GAT,x,edge_removal_nodes_choose_type-bottom_k_bidire...,0.701,"0.727,0.714,0.712,0.657,0.696",25,"{'in_channels': 1433, 'hidden_channels': 15, '...","{'noise_type': 'edge_removal', 'strategy': 'no...",5,bottom,False,2708,1
2,Cora,Graphsage,x,edge_removal_nodes_choose_type-bottom_k_bidire...,0.585,"0.579,0.584,0.588,0.591,0.585",25,"{'in_channels': 1433, 'hidden_channels': 42, '...","{'noise_type': 'edge_removal', 'strategy': 'no...",5,bottom,False,2708,1


In [238]:
data['edge_index'].shape

torch.Size([2, 10556])

In [243]:
print(data['edge_removal_nodes_choose_type-bottom_k_bidirec-False_knodes-135_noiselvl-0.05'].shape)
print(data['edge_removal_nodes_choose_type-bottom_k_bidirec-False_knodes-135_noiselvl-0.3'].shape)
print(data['edge_removal_nodes_choose_type-bottom_k_bidirec-False_knodes-135_noiselvl-0.6'].shape)
print(data['edge_removal_nodes_choose_type-bottom_k_bidirec-False_knodes-135_noiselvl-0.9'].shape)
print(data['edge_removal_nodes_choose_type-bottom_k_bidirec-False_knodes-135_noiselvl-1'].shape)

torch.Size([2, 10421])
torch.Size([2, 10421])
torch.Size([2, 10421])
torch.Size([2, 10421])
torch.Size([2, 10421])


In [255]:
k_nod= 1354
#print(data[f'edge_removal_nodes_choose_type-bottom_k_bidirec-False_knodes-{k_nod}_noiselvl-0.05'].shape)
print(data[f'edge_removal_nodes_choose_type-bottom_k_bidirec-False_knodes-{k_nod}_noiselvl-0.3'].shape)
print(data[f'edge_removal_nodes_choose_type-bottom_k_bidirec-False_knodes-{k_nod}_noiselvl-0.6'].shape)
print(data[f'edge_removal_nodes_choose_type-bottom_k_bidirec-False_knodes-{k_nod}_noiselvl-0.9'].shape)
print(data[f'edge_removal_nodes_choose_type-bottom_k_bidirec-False_knodes-{k_nod}_noiselvl-1'].shape)

torch.Size([2, 9202])
torch.Size([2, 8333])
torch.Size([2, 8047])
torch.Size([2, 8047])


In [254]:
k_nod= 2708
#print(data[f'edge_removal_nodes_choose_type-bottom_k_bidirec-False_knodes-{k_nod}_noiselvl-0.05'].shape)
print(data[f'edge_removal_nodes_choose_type-bottom_k_bidirec-False_knodes-{k_nod}_noiselvl-0.3'].shape)
print(data[f'edge_removal_nodes_choose_type-bottom_k_bidirec-False_knodes-{k_nod}_noiselvl-0.6'].shape)
print(data[f'edge_removal_nodes_choose_type-bottom_k_bidirec-False_knodes-{k_nod}_noiselvl-0.9'].shape)
print(data[f'edge_removal_nodes_choose_type-bottom_k_bidirec-False_knodes-{k_nod}_noiselvl-1'].shape)

torch.Size([2, 6121])
torch.Size([2, 3027])
torch.Size([2, 188])
torch.Size([2, 0])


In [233]:
data.num_nodes

2708

In [251]:
nodes_percent =  [0.05, 0.2, 0.5, 0.7, 0.8] # [0.05, 0.1, 0.2, 0.25, 0.4, 0.5, 0.6, 0.7, 0.8, 0.95, 0.9, 1]
k_nodes_list = nodes_percent_to_num_nodes(percents=nodes_percent, num_nodes=data.num_nodes)
k_nodes_list

[135, 542, 1354, 1896, 2166]

In [None]:
[0.1, 0.2, 0.4, 0.6, 0.8, 0.9, 0.95, 0.99, 1]