<a href="https://colab.research.google.com/github/ccunique/ds_code_examples/blob/main/Graph/GNN/grapg_env_setting_v2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!nvidia-smi

Wed Oct 25 01:35:37 2023       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 525.105.17   Driver Version: 525.105.17   CUDA Version: 12.0     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  Tesla T4            Off  | 00000000:00:04.0 Off |                    0 |
| N/A   40C    P8     9W /  70W |      0MiB / 15360MiB |      0%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces

# install torch, dgl, pyG, pygod

## torch
need torch 2.0.1, if 2.1.0 dgl sparse will not work

In [2]:
!pip install torch==2.0.1 torchvision==0.15.2 torchaudio==2.0.2 --index-url https://download.pytorch.org/whl/cu118

Looking in indexes: https://download.pytorch.org/whl/cu118


## dgl

In [3]:
!pip install dgl -f https://data.dgl.ai/wheels/cu118/repo.html

Looking in links: https://data.dgl.ai/wheels/cu118/repo.html


## pyG series

In [4]:
!pip install torch_geometric



In [5]:
!pip install pyg_lib torch_scatter torch_sparse torch_cluster torch_spline_conv -f https://data.pyg.org/whl/torch-2.0.0+cu118.html

Looking in links: https://data.pyg.org/whl/torch-2.0.0+cu118.html


## pygod

In [6]:
!pip install pygod



# version summary

In [7]:
import sys
# python >= 3.8
sys.version

'3.10.12 (main, Jun 11 2023, 05:26:28) [GCC 11.4.0]'

In [8]:
import torch

print(torch.__name__,' version:', torch.__version__)

torch  version: 2.0.1+cu118


In [9]:
import dgl

print(dgl.__name__,' version:', dgl.__version__)

dgl  version: 1.1.2+cu118


In [11]:
# torch_geometric
import pyg_lib
import torch_scatter
import torch_sparse
import torch_cluster
import torch_spline_conv
import torch_geometric

for lib in [pyg_lib, torch_scatter, torch_sparse, torch_cluster, torch_spline_conv, torch_geometric]:
  print(lib.__name__,' version:', lib.__version__)

OSError: ignored

In [12]:
import pygod

print(pygod.__name__,' version:', pygod.__version__)



OSError: ignored

# test DGL on gpu

In [13]:
# test dglsp
import dgl.sparse as dglsp

In [11]:
# multigraph test
import os
import dgl
import dgl.data
import torch
import torch.nn as nn
import torch.nn.functional as F
from dgl.nn import GraphConv
from dgl.

import numpy as np

u, v = torch.tensor([0, 0, 0, 1, 1]), torch.tensor([1, 2, 3, 3, 3])
g = dgl.graph((u, v))
print(g)
print(g.nodes())
print(g.edges())
print(g.edges(form='all'))
g = dgl.graph((u, v), num_nodes=4)

g.ndata['feat']=torch.tensor(np.random.rand(4,20), dtype=torch.float32)
g.ndata["label"]=torch.tensor(np.random.randint(0,2,(4,)))
g.ndata['train_mask']=torch.tensor(np.random.randint(0,2,(4,)), dtype=torch.bool)
g.ndata['val_mask']=torch.tensor(np.random.randint(0,2,(4,)), dtype=torch.bool)
g.ndata['test_mask']=torch.tensor(np.random.randint(0,2,(4,)), dtype=torch.bool)
g = dgl.add_self_loop(g)



class GCN(nn.Module):
    def __init__(self, in_feats, h_feats, num_classes):
        super(GCN, self).__init__()
        self.conv1 = GraphConv(in_feats, h_feats)
        self.conv2 = GraphConv(h_feats, num_classes)

    def forward(self, g, in_feat):
        h = self.conv1(g, in_feat)
        h = F.relu(h)
        h = self.conv2(g, h)
        return h


def train(g, model):
    optimizer = torch.optim.Adam(model.parameters(), lr=0.01)
    best_val_acc = 0
    best_test_acc = 0

    features = g.ndata["feat"]
    labels = g.ndata["label"]
    train_mask = g.ndata["train_mask"]
    val_mask = g.ndata["val_mask"]
    test_mask = g.ndata["test_mask"]
    for e in range(100):
        # Forward
        logits = model(g, features)

        # Compute prediction
        pred = logits.argmax(1)

        # Compute loss
        # Note that you should only compute the losses of the nodes in the training set.
        loss = F.cross_entropy(logits[train_mask], labels[train_mask])

        # Compute accuracy on training/validation/test
        train_acc = (pred[train_mask] == labels[train_mask]).float().mean()
        val_acc = (pred[val_mask] == labels[val_mask]).float().mean()
        test_acc = (pred[test_mask] == labels[test_mask]).float().mean()

        # Save the best validation accuracy and the corresponding test accuracy.
        if best_val_acc < val_acc:
            best_val_acc = val_acc
            best_test_acc = test_acc

        # Backward
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        if e % 5 == 0:
            print(
                f"In epoch {e}, loss: {loss:.3f}, val acc: {val_acc:.3f} (best {best_val_acc:.3f}), test acc: {test_acc:.3f} (best {best_test_acc:.3f})"
            )

# # train on cpu
# train(g, model)

# train on gpu
g = g.to('cuda')
model = GCN(g.ndata["feat"].shape[1], 16, 2).to('cuda')
train(g, model)


Graph(num_nodes=4, num_edges=5,
      ndata_schemes={}
      edata_schemes={})
tensor([0, 1, 2, 3])
(tensor([0, 0, 0, 1, 1]), tensor([1, 2, 3, 3, 3]))
(tensor([0, 0, 0, 1, 1]), tensor([1, 2, 3, 3, 3]), tensor([0, 1, 2, 3, 4]))
In epoch 0, loss: 1.421, val acc: 0.000 (best 0.000), test acc: 0.000 (best 0.000)
In epoch 5, loss: 0.615, val acc: 1.000 (best 1.000), test acc: 1.000 (best 1.000)
In epoch 10, loss: 0.505, val acc: 1.000 (best 1.000), test acc: 1.000 (best 1.000)
In epoch 15, loss: 0.524, val acc: 1.000 (best 1.000), test acc: 1.000 (best 1.000)
In epoch 20, loss: 0.497, val acc: 1.000 (best 1.000), test acc: 1.000 (best 1.000)
In epoch 25, loss: 0.460, val acc: 1.000 (best 1.000), test acc: 1.000 (best 1.000)
In epoch 30, loss: 0.437, val acc: 1.000 (best 1.000), test acc: 1.000 (best 1.000)
In epoch 35, loss: 0.424, val acc: 1.000 (best 1.000), test acc: 1.000 (best 1.000)
In epoch 40, loss: 0.409, val acc: 1.000 (best 1.000), test acc: 1.000 (best 1.000)
In epoch 45, loss: 

# test pyG on gpu

In [None]:
# from torch_geometric.datasets import Planetoid
# from torch_geometric.transforms import NormalizeFeatures

# dataset = Planetoid(root='data/Planetoid', name='Cora', transform=NormalizeFeatures())

# print()
# print(f'Dataset: {dataset}:')
# print('======================')
# print(f'Number of graphs: {len(dataset)}')
# print(f'Number of features: {dataset.num_features}')
# print(f'Number of classes: {dataset.num_classes}')

# data = dataset[0]  # Get the first graph object.

# print()
# print(data)
# print('===========================================================================================================')

# # Gather some statistics about the graph.
# print(f'Number of nodes: {data.num_nodes}')
# print(f'Number of edges: {data.num_edges}')
# print(f'Average node degree: {data.num_edges / data.num_nodes:.2f}')
# print(f'Number of training nodes: {data.train_mask.sum()}')
# print(f'Training node label rate: {int(data.train_mask.sum()) / data.num_nodes:.2f}')
# print(f'Has isolated nodes: {data.has_isolated_nodes()}')
# print(f'Has self-loops: {data.has_self_loops()}')
# print(f'Is undirected: {data.is_undirected()}')

In [None]:
import numpy as np
import torch
from torch_geometric.data import Data

edge_index = torch.tensor([[0, 0, 0, 1],
                           [1, 2, 3, 3]], dtype=torch.long)

feat = torch.tensor(np.random.rand(4,20), dtype=torch.float32)
label = torch.tensor(np.random.randint(0,2,(4,)))

data = Data(x=feat, edge_index=edge_index, y=label)
data.num_classes=2


data.train_mask = torch.tensor(np.random.randint(0,2,(4,)), dtype=torch.bool)
data.val_mask = torch.tensor(np.random.randint(0,2,(4,)), dtype=torch.bool)
data.test_mask = torch.tensor(np.random.randint(0,2,(4,)), dtype=torch.bool)

In [None]:
data

Data(x=[4, 20], edge_index=[2, 4], y=[4], num_classes=2, train_mask=[4], val_mask=[4], test_mask=[4])

In [None]:
data.node_attrs()

['x', 'test_mask', 'train_mask', 'y', 'val_mask']

In [None]:
data.num_features

20

In [None]:
import torch
import torch.nn.functional as F
from torch_geometric.nn import GCNConv


class GCN(torch.nn.Module):
    def __init__(self, hidden_channels):
        super().__init__()
        torch.manual_seed(1234567)
        self.conv1 = GCNConv(data.num_features, hidden_channels)
        self.conv2 = GCNConv(hidden_channels, data.num_classes)

    def forward(self, x, edge_index):
        x = self.conv1(x, edge_index)
        x = x.relu()
        x = F.dropout(x, p=0.5, training=self.training)
        x = self.conv2(x, edge_index)
        return x

model = GCN(hidden_channels=16)
print(model)

GCN(
  (conv1): GCNConv(20, 16)
  (conv2): GCNConv(16, 2)
)


In [None]:
model = GCN(hidden_channels=16)
optimizer = torch.optim.Adam(model.parameters(), lr=0.01, weight_decay=5e-4)
criterion = torch.nn.CrossEntropyLoss()

def train():
      model.train()
      optimizer.zero_grad()  # Clear gradients.
      out = model(data.x, data.edge_index)  # Perform a single forward pass.
      loss = criterion(out[data.train_mask], data.y[data.train_mask])  # Compute the loss solely based on the training nodes.
      loss.backward()  # Derive gradients.
      optimizer.step()  # Update parameters based on gradients.
      return loss


# # train on cpu
# for epoch in range(1, 5):
#     loss = train()
#     print(f'Epoch: {epoch:03d}, Loss: {loss:.4f}')


# train on gpu
data = data.to('cuda')
model = model.to('cuda')
for epoch in range(1, 5):
    loss = train()
    print(f'Epoch: {epoch:03d}, Loss: {loss:.4f}')

Epoch: 001, Loss: 1.1124
Epoch: 002, Loss: 0.9380
Epoch: 003, Loss: 0.8755
Epoch: 004, Loss: 0.7514


In [None]:
next(model.parameters()).device

device(type='cuda', index=0)