# Notebook for the experiments
In this notebook are contained the following features:
* Re-implementation of GRAFF applied to [node classification](https://arxiv.org/abs/2206.10991),

The main tools that have been exploited are [PyTorch](https://pytorch.org/) (1.13.0), [PyTorch-Lightning](https://www.pytorchlightning.ai/index.html) (1.5.10), [Pytorch-geometric](https://pytorch-geometric.readthedocs.io/en/latest/install/installation.html) (2.3.0) and [Weights & Biases](https://wandb.ai/)

### Requirements to run the notebook

In [1]:
# !pip install torch==1.12.1+cu113 torchvision==0.13.1+cu113 torchaudio==0.12.1 --extra-index-url https://download.pytorch.org/whl/cu113
# !pip install pytorch-lightning==1.5.10
# !pip install pyg_lib torch_scatter torch_sparse torch_cluster torch_spline_conv -f https://data.pyg.org/whl/torch-1.12.0+cu113.html
# !pip install torch_geometric
# !pip install wandb

## Importing the libraries

In [2]:
######## IMPORT EXTERNAL FILES ###########
import torch
import torch.nn.functional as F
import torch.nn.utils.parametrize as parametrize
import torch.nn as nn

import wandb

import torch_geometric
from torch_geometric.nn import MessagePassing
from torch_geometric.loader import NeighborLoader
from torch_geometric.utils import add_self_loops, degree, homophily
from torch_geometric.loader import DataLoader
import torch_geometric.transforms as T



import pytorch_lightning as pl
from pytorch_lightning.callbacks.early_stopping import EarlyStopping
from pytorch_lightning.callbacks import Callback
from pytorch_lightning.loggers import WandbLogger

from torch_geometric.datasets import WebKB, Planetoid, WikipediaNetwork
# WebKB: (Texas, Wisconsin, Cornell); Planetoid: (Citeseer, Pubmed, Cora); WikipediaNetwork: (Squirrel, Chameleon)

######### IMPORT INTERNAL FILES ###########
import sys

sys.path.append("../../src")


from GRAFF import *
from config import *

  from .autonotebook import tqdm as notebook_tqdm


Node classification features initialized.....


### System configuration

In [3]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'
num_gpus = 1 if device == 'cuda' else 0

if wb:
    wandb.login()

Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: Currently logged in as: [33mdifra00[0m ([33mdeepl_wizards[0m). Use [1m`wandb login --relogin`[0m to force relogin


## PyTorch Lightning DataModule (Node classification)

In [4]:
class DataModuleNC(pl.LightningDataModule):

    def __init__(self,  dataset, mode, split, batch_size):

        self.mode = mode  # "hp" or "test"
        self.split = split
        self.batch_size = batch_size
        self.dataset = dataset

    def setup(self, stage=None):
        if stage == 'fit':
            if self.mode == 'hp':
                self.train_mask_pred = self.dataset[0].train_mask.bool()[
                    :, self.split]
                
            else:
                self.train_mask_pred = self.dataset[0].train_mask.bool(
                )[:, self.split] + self.dataset[0].val_mask.bool()[:, self.split]

        elif stage == 'test':
          
            if self.mode == 'hp':
                
                self.test_mask_pred = self.dataset[0].val_mask.bool()[
                    :, self.split]
            else:
                self.test_mask_pred = self.dataset[0].test_mask.bool()[
                    :, self.split]

    # In the transductive setting, we have available all the original graph's topology.
    def train_dataloader(self, *args, **kwargs):
        return DataLoader(self.dataset, batch_size = batch_size, shuffle = False)
    def val_dataloader(self, *args, **kwargs):
        return DataLoader(self.dataset, batch_size = batch_size, shuffle = False)
    

In [5]:
mode = 'hp'  # hp: Hyperparameter selection mode
dataM = DataModuleNC(dataset, split=0, mode=mode, batch_size=batch_size)
dataM.setup(stage='fit')
dataM.setup(stage='test') 

### PyTorch Lightning Callbacks

In [6]:

class Get_Metrics(Callback):

    def on_train_epoch_end(self, trainer: "pl.Trainer", pl_module: "pl.LightningModule"):

        # Compute the metrics
        train_loss = sum(
            pl_module.train_prop['loss']) / len(pl_module.train_prop['loss'])
        train_acc = sum(
            pl_module.train_prop['acc']) / len(pl_module.train_prop['acc'])
        test_loss = sum(
            pl_module.test_prop['loss']) / len(pl_module.test_prop['loss'])
        test_acc = sum(pl_module.test_prop['acc']) / \
            len(pl_module.test_prop['acc'])

        # Log the metrics
        pl_module.log(name='Loss on train', value=train_loss,
                      on_epoch=True, prog_bar=True, logger=True)
        pl_module.log(name='Accuracy on train', value=train_acc,
                      on_epoch=True, prog_bar=True, logger=True)
        pl_module.log(name='Loss on test', value=test_loss,
                      on_epoch=True, prog_bar=True, logger=True)
        pl_module.log(name='Accuracy on test', value=test_acc,
                      on_epoch=True, prog_bar=True, logger=True)

        # Re-initialize the metrics
        pl_module.train_prop['loss'] = []
        pl_module.train_prop['acc'] = []
        pl_module.test_prop['loss'] = []
        pl_module.test_prop['acc'] = []

## PyTorch Lightning Training Module (Node Classification)

In [7]:
class TrainingModule(pl.LightningModule):

    def __init__(self, model, loss_type, train_mask, test_mask, lr, wd):
        super().__init__()
        self.model = model.to(device)
        self.loss_type = loss_type  # nll or ce
        self.lr = lr
        self.wd = wd

        self.train_prop = {'loss': [], 'acc': []}
        self.test_prop = {'loss': [], 'acc': []}
        self.test_mask = test_mask.to(device)  # Move test_mask to device
        self.train_mask = train_mask.to(device)

    def training_step(self, batch, batch_idx):
        y = batch.y
        
        y = y[:batch_size]
        # print("Y AFTER: ", y.shape)

        out = self.model(batch)

        out = out[:batch_size]

        mask = self.train_mask

        if self.loss_type == 'nll':
            loss = F.nll_loss(F.log_softmax(out[mask], dim=1), y[mask])
        elif self.loss_type == 'ce':
            loss = F.cross_entropy(out[mask], y[mask])

        acc = evaluate(out, batch, mask)
        self.train_prop['loss'].append(loss)
        self.train_prop['acc'].append(acc)

        return loss

    def validation_step(self, batch, batch_idx):

        y = batch.y
        # print("Y BEFORE: (test)", y.shape)
        y = y[:batch_size]
        # print("Y AFTER: (test)", y.shape)

        mask = self.test_mask

        out = self.model(batch)

        out = out[:batch_size]
        # print("OUT_SHAPE: ", out.shape)

        # print("TEST_MASK: ", mask.shape)

        if self.loss_type == 'nll':
            loss = F.nll_loss(F.log_softmax(
                out[mask], dim=1), y[mask])
        elif self.loss_type == 'ce':
            loss = F.cross_entropy(out[mask], y[mask])

        acc = evaluate(out, batch, mask)
        self.test_prop['loss'].append(loss)
        self.test_prop['acc'].append(acc)

        return loss

    def configure_optimizers(self):
        self.optimizer = torch.optim.Adam(
            self.model.parameters(), lr=self.lr, weight_decay=self.wd)
        return self.optimizer


def evaluate(out, data, mask=None):
    # print("OUTPUT IS DIM: ", out.shape)
    out = F.log_softmax(out, dim=1)
    # print("OUTPUT log_softmax shape is: ", out.shape)

    pred = out[mask].argmax(1)

    acc = pred.eq(data.y[mask]).sum().item() / mask.sum().item(
    )

    return acc

In [8]:
# This mask contains the nodes for which we want to do the inference.
test_mask = dataM.test_mask_pred
train_mask = dataM.train_mask_pred

#### hp enables a grid search on a wide set of hyperparameters.
if mode != 'hp':
   model = PhysicsGNN_NC(dataset, hidden_dim, num_layers, step = step, device = device)
   # model = GRAFFNet(dataset, hidden_dim, self_loops=False, step_size=step)
   pl_training_module = TrainingModule(model, loss_type, train_mask, test_mask, lr, wd)
 


### Hyperparameters Tuning

In [None]:
def sweep_train(config=None):
    # Initialize a new wandb run
    with wandb.init(config=config):
        # If called by wandb.agent, as below,
        # this config will be set by Sweep Controller
        config = wandb.config
        model = PhysicsGNN_NC(dataset, config.hidden_dim,
                              config.num_layers, step=config.step, device = device)
        pl_training_module = TrainingModule(
            model, config.loss_type, train_mask, test_mask, config.lr, config.wd)
        exp_name = "Sweep_" + dataset_name
        wandb_logger = WandbLogger(
            project=project_name, name=exp_name, config=hyperparameters)
        trainer = trainer = pl.Trainer(
            max_epochs=epochs,  # maximum number of epochs.
            gpus=num_gpus,  # the number of gpus we have at our disposal.
            default_root_dir="", callbacks=[Get_Metrics(), EarlyStopping('Loss on test', mode='min', patience=15)],
            logger=wandb_logger
        )
        trainer.fit(model = pl_training_module, datamodule = dataM)

if mode == 'hp':

    import pprint

    pprint.pprint(sweep_config)

    sweep_id = wandb.sweep(sweep_config, project=project_name)

    wandb.agent(sweep_id, sweep_train, count=1000)

    wandb.finish()


In [None]:
if wb:
    exp_name = "Node_class_lr: " + \
        str(hyperparameters['learning rate']) + \
        '_wd: ' + str(hyperparameters['weight decay'])
    description = ' initial tests'
    exp_name += description
    wandb_logger = WandbLogger(
        project=project_name, name=exp_name, config=hyperparameters)


trainer = trainer = pl.Trainer(
    max_epochs=epochs,  # maximum number of epochs.
    gpus=num_gpus,  # the number of gpus we have at our disposal.
    default_root_dir="", callbacks=[Get_Metrics(), EarlyStopping('Loss on test', mode='min', patience=15)],
    logger=wandb_logger if wb else None

)

GPU available: True, used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs


In [11]:
trainer.fit(model = pl_training_module, datamodule = dataM)
if wb:
    wandb.finish()

  rank_zero_deprecation(
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name  | Type          | Params
----------------------------------------
0 | model | PhysicsGNN_NC | 96.4 K
----------------------------------------
96.4 K    Trainable params
0         Non-trainable params
96.4 K    Total params
0.386     Total estimated model params size (MB)
  rank_zero_warn(f"Checkpoint directory {dirpath} exists and is not empty.")


Validation sanity check:   0%|          | 0/1 [00:00<?, ?it/s]

  rank_zero_warn(


                                                                      

  rank_zero_warn(
  rank_zero_warn(


Epoch 0: 100%|██████████| 2/2 [00:00<00:00, 62.74it/s, loss=1.94, Loss on train=1.940, Accuracy on train=0.175, Loss on test=1.930, Accuracy on test=0.240]

Epoch 126: 100%|██████████| 2/2 [00:00<00:00, 66.14it/s, loss=0.0482, v_num=c8nd, Loss on train=0.0394, Accuracy on train=0.999, Loss on test=0.395, Accuracy on test=0.889] 


0,1
Accuracy on test,▁▃▃▃▃▄▅▅▆▆▆▇▇▇▇▇▇███████████████████████
Accuracy on train,▁▄▄▄▄▄▅▆▆▆▇▇▇▇▇▇▇███████████████████████
Loss on test,██▇▇▆▆▆▅▅▄▄▃▃▃▃▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
Loss on train,██▇▇▇▆▆▅▅▄▄▄▃▃▃▃▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
trainer/global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
Accuracy on test,0.8893
Accuracy on train,0.99861
Loss on test,0.39472
Loss on train,0.03945
epoch,126.0
trainer/global_step,126.0
