<a href="https://colab.research.google.com/github/camligorkem/cs-260c-project/blob/main/CS_260_Node_Classification_Experiments_GC.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [8]:
# Install required packages.
!pip install -q torch-scatter -f https://data.pyg.org/whl/torch-1.10.0+cu113.html
!pip install -q torch-sparse -f https://data.pyg.org/whl/torch-1.10.0+cu113.html
!pip install -q git+https://github.com/pyg-team/pytorch_geometric.git

# Helper function for visualization.
%matplotlib inline
import matplotlib.pyplot as plt
from sklearn.manifold import TSNE

def visualize(h, color):
    z = TSNE(n_components=2).fit_transform(h.detach().cpu().numpy())

    plt.figure(figsize=(10,10))
    plt.xticks([])
    plt.yticks([])

    plt.scatter(z[:, 0], z[:, 1], s=70, c=color, cmap="Set2")
    plt.show()

In [9]:
import torch 
import numpy as np
import math


from torch_geometric.utils import degree
import torch_geometric
import torch_geometric.utils as tg_utils
import pandas as pd

In [10]:
!rm -r data

In [11]:
from torch_geometric.datasets import Planetoid
from torch_geometric.transforms import NormalizeFeatures

dataset = Planetoid(root='data/Planetoid', name='Cora', transform=NormalizeFeatures())

print()
print(f'Dataset: {dataset}:')
print('======================')
print(f'Number of graphs: {len(dataset)}')
print(f'Number of features: {dataset.num_features}')
print(f'Number of classes: {dataset.num_classes}')

data = dataset[0]  # Get the first graph object.

print()
print(data)
print('===========================================================================================================')

# Gather some statistics about the graph.
print(f'Number of nodes: {data.num_nodes}')
print(f'Number of edges: {data.num_edges}')
print(f'Average node degree: {data.num_edges / data.num_nodes:.2f}')
print(f'Number of training nodes: {data.train_mask.sum()}')
print(f'Training node label rate: {int(data.train_mask.sum()) / data.num_nodes:.2f}')
print(f'Has isolated nodes: {data.has_isolated_nodes()}')
print(f'Has self-loops: {data.has_self_loops()}')
print(f'Is undirected: {data.is_undirected()}')

Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.x
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.tx
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.allx
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.y
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.ty
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.ally
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.graph
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.test.index
Processing...



Dataset: Cora():
Number of graphs: 1
Number of features: 1433
Number of classes: 7

Data(x=[2708, 1433], edge_index=[2, 10556], y=[2708], train_mask=[2708], val_mask=[2708], test_mask=[2708])
Number of nodes: 2708
Number of edges: 10556
Average node degree: 3.90
Number of training nodes: 140
Training node label rate: 0.05
Has isolated nodes: False
Has self-loops: False
Is undirected: True


Done!


In [12]:
def get_masked_noise(data,  noise_level=0.15):
  x = data.x
  noise_added_node_num = int(noise_level * x.shape[0])
  chose_random_rows = np.random.choice(x.shape[0], noise_added_node_num, replace=False)
  #print(chose_random_rows)
  mask_rows = torch.zeros(x.shape)
  mask_rows[chose_random_rows,:] = torch.ones(1, x.shape[1])
  noise = (0.1**0.5)*torch.randn(x.shape)
  masked_noise = noise* mask_rows.int().float()

  #print(mask_rows)
  #print(noise)
  #print(masked_noise)
  return x + masked_noise

In [13]:
# remove x% edges
def remove_from_all_edges(data, noise_level = 0.15, bidirectional=False):
  edge_index = data.edge_index

  if bidirectional:
    noise_level /= 2
  
  edge_ratio_to_keep = 1 - noise_level
  num_edges_keep= int(edge_ratio_to_keep * edge_index.shape[1])
  chose_random_edge_indices = np.random.choice(edge_index.shape[1], num_edges_keep, replace=False)

  #print(edge_index[0][chose_random_edge_indices].shape)
  #print(num_edges_keep)
  #print(edge_index.shape[1])

  edge_index_removed = torch.zeros((2,num_edges_keep), dtype=torch.int64)
  edge_index_removed[0] = edge_index[0][chose_random_edge_indices]
  edge_index_removed[1] = edge_index[1][chose_random_edge_indices]

  if bidirectional:
    # find the node names deleted in below indices and delete also for the opposite side.
    final_edges_bidirec_0 = []
    final_edges_bidirec_1 = []

    # create a set with all edges
    edge_maps = set()
    for e_0, e_1 in zip(edge_index_removed[0], edge_index_removed[1]):
      edge_maps.add((e_0.item(),e_1.item()))

    for e_0, e_1 in zip(edge_index_removed[0], edge_index_removed[1]):
      e_0_val = e_0.item()
      e_1_val = e_1.item()
      # check an edge has its other direction, if yes add to the final list, if not skip
      if (e_0_val, e_1_val) in edge_maps and (e_1_val, e_0_val) in edge_maps:
        final_edges_bidirec_0.append(e_0_val) 
        final_edges_bidirec_1.append(e_1_val)

    final_edges_bidirec_0 = torch.tensor(final_edges_bidirec_0)
    final_edges_bidirec_1 = torch.tensor(final_edges_bidirec_1)
    edge_index_removed = torch.zeros((2,len(final_edges_bidirec_1)), dtype=torch.int64)
    edge_index_removed[0] = final_edges_bidirec_0
    edge_index_removed[1] = final_edges_bidirec_1

  return edge_index_removed



In [14]:
# remove x% edges from random k, top_k, bottom_k nodes

def choose_nodes(data, num_nodes, k_nodes, choose_type):
  if choose_type=='random':
    nodes_chosen = torch.from_numpy(np.random.choice(num_nodes, k_nodes, replace=False))
  elif choose_type=='top_k':
    # find indegree edges
    dg = torch_geometric.utils.degree(data.edge_index[0])
    top_k_nodes_degrees, top_k_nodes_indices = torch.topk(dg, k_nodes)
    #print(top_k_nodes_degrees, top_k_nodes_indices)
    nodes_chosen = top_k_nodes_indices
  elif choose_type=='bottom_k':
    # find indegree edges
    dg = torch_geometric.utils.degree(data.edge_index[0])
    bottom_k_nodes_degrees, bottom_k_nodes_indices = torch.topk(dg, k_nodes, largest=False)
    #print(bottom_k_nodes_degrees, bottom_k_nodes_indices)
    nodes_chosen = bottom_k_nodes_indices
  else:
    raise 'choose_type should be from random, top_k, bottom_k'
  return nodes_chosen

# to do loop for each node separately
def remove_edges_from_chosen_nodes(data, nodes_chosen, edges_to_remove_per_node_ratio):
  edges_0_list, edges_1_list = [],[]
  for nc in nodes_chosen:
    edge_0, edge_1 = remove_edge_per_node(data=data, node=nc, 
                                          edges_to_remove_per_node_ratio=edges_to_remove_per_node_ratio)
    edges_0_list.append(edge_0)
    edges_1_list.append(edge_1)

  edges_0 = torch.cat(edges_0_list, 0)
  edges_1 = torch.cat(edges_1_list, 0)

  return edges_0, edges_1

def remove_edge_per_node(data, node, edges_to_remove_per_node_ratio=0.1):
  mask_node_indices = torch.isin(data.edge_index[0], node)

  select_node_edges_0 = data.edge_index[0][mask_node_indices]
  select_node_edges_1 = data.edge_index[1][mask_node_indices]
  #print(select_node_edges_0)
  #print(select_node_edges_1)

  # choose how much of the edges we will remove for this node
  # we decide on number of edges to remove for each node based on the number of edges each node has
  # and by taking the ratio given by edges_to_remove_per_node_ratio
  # note: we use ceil to remove at least one node (unless ratio is 0)
  num_edges_remove = int(math.ceil(edges_to_remove_per_node_ratio* select_node_edges_0.shape[0]))
  # print(num_edges_remove)
  num_edges_keep = select_node_edges_0.shape[0] - num_edges_remove

  # choose random edges to keep, the rest is removed
  chose_random_edge_indices = np.random.choice(select_node_edges_0.shape[0], num_edges_keep, replace=False)
  # print(num_edges_keep)
  
  edge_index_removed = torch.zeros((2,num_edges_keep), dtype=torch.int64)
  edge_node_index_removed_0 = select_node_edges_0[chose_random_edge_indices]
  edge_node_index_removed_1 = select_node_edges_1[chose_random_edge_indices]

  return edge_node_index_removed_0, edge_node_index_removed_1

def remove_edges_from_nodes(data, noise_level = 0.15, k_nodes=10,
                            choose_type='random', bidirectional=False):
  if bidirectional:
    noise_level /= 2

  edge_p_node_ratio_to_keep = 1 - noise_level

  # choose topk, bottomk, or random
  nodes_chosen = choose_nodes(data=data, num_nodes=data.num_nodes, k_nodes=k_nodes, choose_type=choose_type)

  # keep edges from remaining nodes
  mask_node_indices = torch.isin(data.edge_index[0],nodes_chosen)
  index_keep = torch.ones(data.edge_index[0].shape[0], dtype=bool)
  index_keep[mask_node_indices] = False
  edges_to_keep_0 = data.edge_index[0][index_keep]
  edges_to_keep_1 = data.edge_index[1][index_keep]
  #print(edges_to_keep_0) 
  #print(edges_to_keep_1)

  # remove one-directional or bi-directional
  edges_0_kept_chosen_nodes, edges_1_kept_chosen_nodes = remove_edges_from_chosen_nodes(data=data, 
                                                                                        nodes_chosen=nodes_chosen,
                                                                                        edges_to_remove_per_node_ratio=noise_level)
  # concat edges to keep and edges_kept_chosen_nodes
  final_edges_0 = torch.cat([edges_to_keep_0, edges_0_kept_chosen_nodes], 0)
  final_edges_1 = torch.cat([edges_to_keep_1, edges_1_kept_chosen_nodes], 0)

  # do bidirectional here! IF bidirectional set to true remove both directions of the edges.
  if bidirectional:
    # find the node names deleted in below indices and delete also for the opposite side.
    final_edges_bidirec_0 = []
    final_edges_bidirec_1 = []

    # create a set with all edges
    edge_maps = set()
    for e_0, e_1 in zip(final_edges_0, final_edges_1):
      edge_maps.add((e_0.item(),e_1.item()))

    for e_0, e_1 in zip(final_edges_0, final_edges_1):
      e_0_val = e_0.item()
      e_1_val = e_1.item()
      # check an edge has its other direction, if yes add to the final list, if not skip
      if (e_0_val, e_1_val) in edge_maps and (e_1_val, e_0_val) in edge_maps:
        final_edges_bidirec_0.append(e_0_val) 
        final_edges_bidirec_1.append(e_1_val)

    final_edges_bidirec_0 = torch.tensor(final_edges_bidirec_0)
    final_edges_bidirec_1 = torch.tensor(final_edges_bidirec_1)
    final_edges_0 = final_edges_bidirec_0
    final_edges_1 = final_edges_bidirec_1
  edge_index_removed = torch.zeros((2, final_edges_0.shape[0]), dtype=torch.int64)
  edge_index_removed[0] = final_edges_0
  edge_index_removed[1] = final_edges_1

    
  # use TORCH_GEOMETRIC.UTILS.SORT_EDGE_INDEX
  edge_index_removed_sorted = tg_utils.sort_edge_index(edge_index_removed)
  return edge_index_removed_sorted


In [16]:
def add_random_edges(data, noise_level = 0.15, bidirectional=False):
  edge_index = data.edge_index
  num_nodes = data.num_nodes

  if bidirectional:
    noise_level /= 2

  new_edges = edge_index.T
  edge_num = new_edges.shape[0]
  num_of_new_edges = int(edge_num * noise_level)

  for i in range(num_of_new_edges):
    while True:
      new_edge = ((torch.rand(1,2) * 1000000).to(int) % num_nodes)
      new_edge_flip = torch.flip(new_edge, [1])
      new_edge_exist = torch.any(torch.all(torch.eq(new_edges,new_edge),1))
      new_edge_flip_exist = torch.any(torch.all(torch.eq(new_edges,new_edge_flip),1))
      if not new_edge_exist and not new_edge_flip_exist:
        new_edges = torch.cat((new_edges,new_edge), 0)
        if bidirectional:
          new_edges = torch.cat((new_edges,new_edge_flip), 0)
        break
      elif not new_edge_exist:
        new_edges = torch.cat((new_edges,new_edge), 0)
        break
      elif not new_edge_flip_exist:
        new_edges = torch.cat((new_edges,new_edge_flip), 0)
        break

  edge_index_sorted = tg_utils.sort_edge_index(new_edges.T)
  return edge_index_sorted

In [17]:
for added_ratio in [0.15]:
  new_edges = add_random_edges(data,  noise_level=added_ratio, bidirectional=True)
  data[f'edge_index_added_n_{added_ratio}'] = new_edges


In [18]:
## Helper function of add_edges_to_nodes

def add_edges_to_a_node(data, new_edges, node, num_to_add=0, bidirectional=False):
  for i in range(num_to_add):

    # Running till we find the right edge to add
    while True:
      #create a random node. 1000000 is a arbitrary number which can be replaced to any number bigger than data.num_nodes
      new_index = ((torch.rand(1) * 1000000).to(int) % data.num_nodes).item()
      new_edge = torch.tensor([[node, new_index]])
      new_edge_flip = torch.flip(new_edge, [1])
      #check whether the new edge and flip one exists or not 
      new_edge_exist = torch.any(torch.all(torch.eq(new_edges,new_edge),1))
      new_edge_flip_exist = torch.any(torch.all(torch.eq(new_edges,new_edge_flip),1))
      if not new_edge_exist and not new_edge_flip_exist:
        new_edges = torch.cat((new_edges,new_edge), 0)
        if bidirectional:
          new_edges = torch.cat((new_edges,new_edge_flip), 0)
        break
      elif not new_edge_exist:
        new_edges = torch.cat((new_edges,new_edge), 0)
        break
      elif not new_edge_flip_exist:
        new_edges = torch.cat((new_edges,new_edge_flip), 0)
        break
  return new_edges

## Adding x% edges to random k, top k, or bottom k nodes

def add_edges_to_nodes(data, noise_level=0.15, k_nodes=10, chosse_type='random', bidirectional=False):
  if bidirectional:
    noise_level /= 2

  #choose nodes by three different types
  nodes_chosen = choose_nodes(data=data, num_nodes=data.num_nodes, k_nodes=k_nodes, choose_type=chosse_type)
  
  new_edges = data.edge_index.T

  #add new edges to every chosen node
  for node in nodes_chosen:
    edge_num_of_node = torch.isin(data.edge_index, node).to(int).sum()
    edge_num_to_add = int(edge_num_of_node * noise_level)
    
    new_edges = add_edges_to_a_node(data, new_edges, node.item(), edge_num_to_add, bidirectional)
  
  edge_index_sorted = tg_utils.sort_edge_index(new_edges.T)
  return edge_index_sorted

## Experiments

In [19]:
!pip install class-resolver

from torch_geometric.nn import MLP, GCN, GraphSAGE, GAT
from class_resolver import ClassResolver

Collecting class-resolver
  Downloading class_resolver-0.3.4-py3-none-any.whl (20 kB)
Installing collected packages: class-resolver
Successfully installed class-resolver-0.3.4


In [81]:
def get_model(model_name, model_params):
  # add more model from here if needed: https://pytorch-geometric.readthedocs.io/en/latest/modules/nn.html#models
  # you can also check model parameters from above
  model = None
  device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
  if model_name=='MLP':
    model = MLP(**model_params).to(device)
  elif model_name=='GNN':
    model = GCN(**model_params).to(device)
  elif model_name=='GAT':
    model = GAT(**model_params).to(device)
  elif model_name=='Graphsage':
    model = GraphSAGE(**model_params).to(device)
  else:
    raise 'Model names should be within MLP, GNN, GAT, Graphsage'
  return model

In [21]:
# Creating MLP example, add all parameters you want to use to create/tune model in the below dictionary
model_params= {'in_channels':1433, 'hidden_channels':16, 'out_channels':7, 'num_layers':3}
mlp_model = get_model(model_name='MLP', model_params=model_params)
print(mlp_model)

model_params= {'in_channels':1433, 'hidden_channels':16, 'out_channels':7, 'num_layers':3, 'dropout':0.1}
gnn_model = get_model(model_name='GNN', model_params=model_params)
print(gnn_model)


MLP(1433, 16, 16, 7)
GCN(1433, 7, num_layers=3)


In [78]:
def get_noise_function(noise_info):
  '''
  This function directly returns the function we created for different noise logics
  Change the name of the function below if noise logic function name is changed.
  '''
  noise_fn = None 
  if noise_info['noise_type']=='feature_noise':
    noise_fn = get_masked_noise
  elif noise_info['noise_type']=='edge_removal':
    if noise_info['strategy']=='all_edges' :
      noise_fn = remove_from_all_edges
    elif noise_info['strategy']=='nodes' :
      noise_fn = remove_edges_from_nodes
  elif noise_info['noise_type']=='edge_addition':
    if noise_info['strategy']=='all_edges' :
      noise_fn = add_random_edges
    elif noise_info['strategy']=='nodes' :
      noise_fn = add_edges_to_nodes
  else:
    raise 'Noise type should be chosen from feature_noise, edge_removal, edge_addition' 
  return noise_fn
  

def create_noised_data_for_experiment(data, noise_info):
  '''
  Create noised data outside of experiment, so that you can use same data in different experiments
  '''
  noise_data_names = []
  noise_fn = get_noise_function(noise_info)
  
  if noise_info.get('strategy')=='nodes':
    add_prefix = f"{noise_info.get('strategy', '')}_choose_type-{noise_info['params'].get('choose_type', '')}_bidirec-{noise_info['params'].get('bidirectional', '')}"
  elif noise_info.get('strategy')=='all_edges':
    add_prefix = f"{noise_info.get('strategy', '')}_"
  else: 
    add_prefix=''

  if noise_info.get('strategy')=='nodes':
    for k_nodes in noise_info['k_nodes_list']:
      for noise_level in noise_info['noise_levels']:
        noise_data_name = f'{noise_info["noise_type"]}_{add_prefix}_knodes-{k_nodes}_noiselvl-{noise_level}'
        noise_info['params']['noise_level'] = noise_level
        noise_info['params']['k_nodes'] = k_nodes
        noised_data = noise_fn(**noise_info['params'])
        data[noise_data_name] = noised_data
        noise_data_names.append(noise_data_name)

  else: # all_edges and gaussian noisee case
    for noise_level in noise_info['noise_levels']:
      noise_data_name = f'{noise_info["noise_type"]}_{add_prefix}_noiselvl-{noise_level}'
      noise_info['params']['noise_level'] = noise_level
      noised_data = noise_fn(**noise_info['params'])
      data[noise_data_name] = noised_data
      noise_data_names.append(noise_data_name)
  return noise_data_names
 
# reuse train and test
def train(model, optimizer, x_type='x', edge_type='edge_index'):
  criterion = torch.nn.CrossEntropyLoss()
  model.train()
  optimizer.zero_grad()  # Clear gradients.
  out = model(data[x_type], data[edge_type])  # Perform a single forward pass.
  loss = criterion(out[data.train_mask], data.y[data.train_mask])  # Compute the loss solely based on the training nodes.
  loss.backward()  # Derive gradients.
  optimizer.step()  # Update parameters based on gradients.
  return loss

def test(model, x_type='x', edge_type='edge_index'):
  model.eval()
  out = model(data[x_type], data[edge_type])
  pred = out.argmax(dim=1)  # Use the class with highest probability.
  test_correct = pred[data.test_mask] == data.y[data.test_mask]  # Check against ground-truth labels.
  test_acc = int(test_correct.sum()) / int(data.test_mask.sum())  # Derive ratio of correct predictions.
  return test_acc

def validation(model, x_type='x', edge_type='edge_index'):
  model.eval()
  out = model(data[x_type], data[edge_type])
  pred = out.argmax(dim=1)  # Use the class with highest probability.
  val_correct = pred[data.val_mask] == data.y[data.val_mask]  # Check against ground-truth labels.
  val_acc = int(val_correct.sum()) / int(data.val_mask.sum())  # Derive ratio of correct predictions.
  return val_acc

def experiment(dataset_name, model_names, model_params, data, x_types, edge_types, noise_info, opt_params,  num_epochs=50, repeat_num=1, print_updates=False,print_updates_detail=False):
  '''
  Assumes the noisy data is already created and inside the data object (so that we can use same data sample for different models to compare)
  '''
  exp_count = len(x_types)*len(edge_types)*len(model_names)
  count=0
  res = []
  for model_name in model_names:
    for x_type in x_types:
      for ed_type in edge_types:
        count+=1
        test_accs = []
        for exp_num in range(1, repeat_num+1): # we will repeat experiment repeat many times, to increase results reliability
           
          model =  get_model(model_name=model_name, model_params=model_params[model_name])
          optimizer = torch.optim.Adam(model.parameters(), lr=opt_params[model_name]['lr'], weight_decay=opt_params[model_name]['weight_decay']) 
          for epoch in range(num_epochs):
              loss = train(model, optimizer, x_type=x_type, edge_type=ed_type)
              # print(f'Epoch: {epoch:03d}, Loss: {loss:.4f}')

          # we test our results in original data, no noise added ones 
          # if you want to test on noised data change them to x_type=x_type and edge_type=ed_type below
          test_acc = test(model, x_type='x',edge_type='edge_index') 
          test_accs.append(test_acc)
          if print_updates_detail:
            print(f'Exp_num:{exp_num} Test Accuracy: {test_acc:.4f}')

        mean_test_acc = round(np.mean(test_accs), 3)
        # TODO we can add other metrics such as runtime to log in here later
        exp_res = {'dataset_name': dataset_name,
                  'model_name':model_name, 'x_type':x_type, 
                'edge_type':ed_type, 'mean_test_accuracy':str(mean_test_acc), 'test_accuracies': ','.join([str(acc) for acc in test_accs]),
                  'num_epochs':num_epochs, 'model_params': model_params[model_name],
                  'noise_info':noise_info, 'repeat_num':repeat_num} # TODO add more info on experiment noise_info
        res.append(exp_res)
        if print_updates:
            print(f'Run {model_name}: {count}/{exp_count}: {x_type} - {ed_type} Avg. Test Accuracy: {mean_test_acc}')
        
        

  res_df = pd.DataFrame(res, columns=exp_res.keys())
  return res_df

In [23]:
dataset_name='Cora'

In [24]:
def nodes_percent_to_num_nodes(percents, num_nodes):
  return [round(pct*num_nodes) for pct in percents]

In [90]:
# Optimized Model Params

model_params = {
    'GNN':{'in_channels':1433, 'hidden_channels':30, 'out_channels':7, 'num_layers':2, 'dropout':0.2, 'aggr' :'add'},
    'GAT':{'in_channels':1433, 'hidden_channels':15, 'out_channels':7, 'num_layers':2, 'dropout':0.2},
    'Graphsage':{'in_channels':1433, 'hidden_channels':42, 'out_channels':7, 'num_layers':2, 'dropout':0.2}
}
opt_params = {
    'GNN':{'lr':0.05, 'weight_decay':1e-6}, 
    'GAT':{'lr':0.05, 'weight_decay':5e-4}, 
    'Graphsage':{'lr':0.05, 'weight_decay':5e-4}, 
}


### Experiment 1 - Remove Edges: Random - nodes

In [None]:
# noise data creation example: remove edges -- strategy 2: remove from selected nodes

# **** DONT forget to change this *****
EXPERIMENT_FILENAME = 'Experiment_1_remove_random_nodes.csv'
# **** DONT forget to change this *****

nodes_percent = [0.05, 0.1, 0.2]  # [0.05, 0.1, 0.2, 0.25, 0.4, 0.5]
k_nodes_list = nodes_percent_to_num_nodes(percents=nodes_percent, num_nodes=data.num_nodes)

data = data.to('cpu')
remove_edge_from_nodes_params = {'data':data, 'bidirectional':True, 'choose_type':'random'}

noise_info = {'noise_type':'edge_removal', 'strategy':'nodes', 'params':remove_edge_from_nodes_params, 
              'noise_levels':[0, 0.01, 0.05, 0.1, 0.15, 0.3, 0.45, 0.6, 0.9, 0.95, 0.99, 1 ], 
              'k_nodes_list':k_nodes_list,
              'noise_param_name':'noise_level'}

exp_rm_random_nodes_var_names = create_noised_data_for_experiment(data=data, noise_info=noise_info)
print(data)

# Experiment on edge removal - strategy- nodes
# Put names of the models we want to use for this experiment type
data = data.to('cuda:0')
model_names = ['GNN', 'GAT', 'GraphSage']
exp_rm_random_nodes_df = experiment(dataset_name=dataset_name, model_names=model_names,
                                    model_params=model_params, opt_params=opt_params,
                                    data=data, x_types=['x'], edge_types=exp_rm_random_nodes_var_names,
                                    noise_info=noise_info, num_epochs=25, repeat_num=3, print_updates=True)

# Do for node strategies only
exp_rm_random_nodes_df[['choose_type', 'bidirectional','k_nodes','noise_level']] = exp_rm_random_nodes_df.apply(lambda x: [s.split('-')[-1] for s in x['edge_type'].split('_') if '-' in s], axis=1, result_type='expand')
data = data.to('cpu')
exp_rm_random_nodes_df.to_csv(EXPERIMENT_FILENAME)
exp_rm_random_nodes_df

### Experiment 2 - Remove Edges: topk - nodes

In [None]:
# noise data creation example: remove edges -- strategy 2: remove from selected nodes

# **** DONT forget to change this *****
EXPERIMENT_FILENAME = 'Experiment_2_remove_topk_nodes.csv'
# **** DONT forget to change this *****

nodes_percent = [0.05, 0.1, 0.2]  # [0.05, 0.1, 0.2, 0.25, 0.4, 0.5]
k_nodes_list = nodes_percent_to_num_nodes(percents=nodes_percent, num_nodes=data.num_nodes)

data = data.to('cpu')
remove_edge_from_nodes_params = {'data':data, 'bidirectional':True, 'choose_type':'top_k'}

noise_info = {'noise_type':'edge_removal', 'strategy':'nodes', 'params':remove_edge_from_nodes_params, 
              'noise_levels':[0, 0.01, 0.05, 0.1, 0.15, 0.3, 0.45, 0.6, 0.9, 0.95, 0.99, 1 ], 
              'k_nodes_list':k_nodes_list,
              'noise_param_name':'noise_level'}

exp_rm_random_nodes_var_names = create_noised_data_for_experiment(data=data, noise_info=noise_info)
print(data)

# Experiment on edge removal - strategy- nodes
# Put names of the models we want to use for this experiment type
data = data.to('cuda:0')
model_names = ['GNN', 'GAT', 'GraphSage']
exp_rm_random_nodes_df = experiment(dataset_name=dataset_name, model_names=model_names,
                                    model_params=model_params, opt_params=opt_params,
                                    data=data, x_types=['x'], edge_types=exp_rm_random_nodes_var_names,
                                    noise_info=noise_info, num_epochs=25, repeat_num=3, print_updates=True)

# Do for node strategies only
exp_rm_random_nodes_df[['choose_type', 'bidirectional','k_nodes','noise_level']] = exp_rm_random_nodes_df.apply(lambda x: [s.split('-')[-1] for s in x['edge_type'].split('_') if '-' in s], axis=1, result_type='expand')
data = data.to('cpu')
exp_rm_random_nodes_df.to_csv(EXPERIMENT_FILENAME)
exp_rm_random_nodes_df

### Experiment 3 - Remove Edges: Bottom k - nodes

In [None]:
# noise data creation example: remove edges -- strategy 2: remove from selected nodes

# **** DONT forget to change this *****
EXPERIMENT_FILENAME = 'Experiment_3_remove_bottomk_nodes.csv'
# **** DONT forget to change this *****

nodes_percent = [0.05, 0.1, 0.2]  # [0.05, 0.1, 0.2, 0.25, 0.4, 0.5]
k_nodes_list = nodes_percent_to_num_nodes(percents=nodes_percent, num_nodes=data.num_nodes)

data = data.to('cpu')
remove_edge_from_nodes_params = {'data':data, 'bidirectional':True, 'choose_type':'bottom_k'}

noise_info = {'noise_type':'edge_removal', 'strategy':'nodes', 'params':remove_edge_from_nodes_params, 
              'noise_levels':[0, 0.01, 0.05, 0.1, 0.15, 0.3, 0.45, 0.6, 0.9, 0.95, 0.99, 1 ], 
              'k_nodes_list':k_nodes_list,
              'noise_param_name':'noise_level'}

exp_rm_random_nodes_var_names = create_noised_data_for_experiment(data=data, noise_info=noise_info)
print(data)

# Experiment on edge removal - strategy- nodes
# Put names of the models we want to use for this experiment type
data = data.to('cuda:0')
model_names = ['GNN', 'GAT', 'GraphSage']
exp_rm_random_nodes_df = experiment(dataset_name=dataset_name, model_names=model_names,
                                    model_params=model_params, opt_params=opt_params,
                                    data=data, x_types=['x'], edge_types=exp_rm_random_nodes_var_names,
                                    noise_info=noise_info, num_epochs=25, repeat_num=3, print_updates=True)

# Do for node strategies only
exp_rm_random_nodes_df[['choose_type', 'bidirectional','k_nodes','noise_level']] = exp_rm_random_nodes_df.apply(lambda x: [s.split('-')[-1] for s in x['edge_type'].split('_') if '-' in s], axis=1, result_type='expand')
data = data.to('cpu')
exp_rm_random_nodes_df.to_csv(EXPERIMENT_FILENAME)
exp_rm_random_nodes_df

## Sample Experiments for other experiments

Old versions may need to be modified based on above.

In [None]:
# noise data creation example: add edges
edge_addition_params = {'data':data}
noise_info = {'noise_type':'edge_addition', 'params':edge_addition_params, 'noise_levels':[0.3, 0.6, 0.9 ], 'strategy': 'all_edges'}

noise_var_names = create_noised_data_for_experiment(data=data, noise_info=noise_info)
print(data)

# Experiment on edge addition noise
model_names = ['GNN', 'GAT', 'Graphsage']
model_params = {  # TODO random params change later
    'GNN':{'in_channels':1433, 'hidden_channels':16, 'out_channels':7, 'num_layers':3, 'dropout':0.1},
    'GAT':{'in_channels':1433, 'hidden_channels':12, 'out_channels':7, 'num_layers':3, 'dropout':0.2},
    'Graphsage':{'in_channels':1433, 'hidden_channels':14, 'out_channels':7, 'num_layers':3, 'dropout':0.1}
}
exp_df = experiment(dataset_name=dataset_name, model_names=model_names, model_params=model_params, data=data, x_types=['x'], 
           edge_types=noise_var_names, noise_info=noise_info, num_epochs=25, repeat_num=3, print_updates=True)

exp_df

In [None]:
# noise data creation example: add noise feature
feature_noise_params = {'x':data.x}
noise_info = {'noise_type':'feature_noise', 'params':feature_noise_params, 'noise_levels':[0, 0.3, 0.6 ], 'noise_param_name':'noise_level'}

noise_var_names_feat_exp =  create_noised_data_for_experiment(data=data, noise_info=noise_info)
print(data)

# Experiment on feature noise 
model_names = ['GNN', 'GAT']
model_params = {  # TODO random params change later
    'GNN':{'in_channels':1433, 'hidden_channels':16, 'out_channels':7, 'num_layers':3, 'dropout':0.1},
    'GAT':{'in_channels':1433, 'hidden_channels':12, 'out_channels':7, 'num_layers':3, 'dropout':0.2},
    'Graphsage':{'in_channels':1433, 'hidden_channels':14, 'out_channels':7, 'num_layers':3, 'dropout':0.1}
}
exp_df2 = experiment(dataset_name=dataset_name,model_names=model_names, model_params=model_params, data=data, x_types=noise_var_names_feat_exp, 
           edge_types=['edge_index'], noise_info=noise_info, num_epochs=25, repeat_num=3, print_updates=True)

exp_df2

In [None]:
# noise data creation example: remove edges -- strategy 1: from all edges

remove_edge_params = {'edge_index':data.edge_index, 'remove_bidirectional':True}
noise_info = {'noise_type':'edge_removal', 'strategy':'all_edges', 'params':remove_edge_params, 
              'noise_levels':[0, 0.3, 0.6, 0.9 ], 'noise_param_name':'edges_to_remove_ratio'}

exp3_edge_var_names = create_noised_data_for_experiment(data=data, noise_info=noise_info)
print(data)


# Experiment on edge removal - strategy-all edges
model_names = ['GNN', 'Graphsage']
model_params = {  # TODO random params change later
    'GNN':{'in_channels':1433, 'hidden_channels':16, 'out_channels':7, 'num_layers':3, 'dropout':0.1},
    'GAT':{'in_channels':1433, 'hidden_channels':12, 'out_channels':7, 'num_layers':3, 'dropout':0.2},
    'Graphsage':{'in_channels':1433, 'hidden_channels':14, 'out_channels':7, 'num_layers':3, 'dropout':0.1}
}
exp_df3 = experiment(dataset_name=dataset_name,model_names=model_names, model_params=model_params, data=data, x_types=['x'], 
           edge_types=exp3_edge_var_names, noise_info=noise_info, num_epochs=25, repeat_num=3, print_updates=True)

exp_df3