In [1]:
"""
Purpose: To practice implementation of graph convolution and 
transformer used for graph classification

GCN example: https://seunghan96.github.io/gnn/PyG_review1/

"""



'\nPurpose: To practice implementation of graph \ntransformer used for graph classification\n'

## Models

In [2]:
import torch
from torch.nn import Linear
import torch.nn.functional as F 
from torch_geometric.nn import GCNConv, TopKPooling, global_mean_pool
from torch_geometric.nn import global_mean_pool as gap, global_max_pool as gmp

class GCN(torch.nn.Module):
    def __init__(
        self,
        embedding_size=64
                ):
        # Init parent
        super(GCN, self).__init__()
        torch.manual_seed(42)

        # GCN layers ( for Message Passing )
        self.initial_conv = GCNConv(data.num_features, embedding_size)
        self.conv1 = GCNConv(embedding_size, embedding_size)
        self.conv2 = GCNConv(embedding_size, embedding_size)
        self.conv3 = GCNConv(embedding_size, embedding_size)

        # Output layer ( for scalar output ... REGRESSION )
        self.out = Linear(embedding_size*2, 1)

    def forward(self, x, edge_index, batch_index):
        hidden = F.tanh(self.initial_conv(x, edge_index))
        hidden = F.tanh(self.conv1(hidden, edge_index))
        hidden = F.tanh(self.conv2(hidden, edge_index))
        hidden = F.tanh(self.conv3(hidden, edge_index))
          
        # Global Pooling (stack different aggregations)
        ### (reason) multiple nodes in one graph....
        ## how to make 1 representation for graph??
        ### use POOLING! 
        ### ( gmp : global MAX pooling, gap : global AVERAGE pooling )
        hidden = torch.cat([gmp(hidden, batch_index), 
                            gap(hidden, batch_index)], dim=1)

        out = self.out(hidden)
        return out, hidden
    
import torch
import torch.nn.functional as F 
from torch.nn import Linear, BatchNorm1d, ModuleList
from torch_geometric.nn import TransformerConv, TopKPooling 
from torch_geometric.nn import global_mean_pool as gap, global_max_pool as gmp
torch.manual_seed(42)

class GNN(torch.nn.Module):
    def __init__(
        self, 
        feature_size, model_params):
        super(GNN, self).__init__()
        embedding_size = model_params["model_embedding_size"]
        n_heads = model_params["model_attention_heads"]
        self.n_layers = model_params["model_layers"]
        dropout_rate = model_params["model_dropout_rate"]
        top_k_ratio = model_params["model_top_k_ratio"]
        self.top_k_every_n = model_params["model_top_k_every_n"]
        dense_neurons = model_params["model_dense_neurons"]
        edge_dim = model_params["model_edge_dim"]

        self.conv_layers = ModuleList([])
        self.transf_layers = ModuleList([])
        self.pooling_layers = ModuleList([])
        self.bn_layers = ModuleList([])

        # Transformation layer
        self.conv1 = TransformerConv(feature_size, 
                                    embedding_size, 
                                    heads=n_heads, 
                                    dropout=dropout_rate,
                                    edge_dim=edge_dim,
                                    beta=True) 

        self.transf1 = Linear(embedding_size*n_heads, embedding_size)
        self.bn1 = BatchNorm1d(embedding_size)

        # Other layers
        for i in range(self.n_layers):
            self.conv_layers.append(TransformerConv(embedding_size, 
                                                    embedding_size, 
                                                    heads=n_heads, 
                                                    dropout=dropout_rate,
                                                    edge_dim=edge_dim,
                                                    beta=True))

            self.transf_layers.append(Linear(embedding_size*n_heads, embedding_size))
            self.bn_layers.append(BatchNorm1d(embedding_size))
            if i % self.top_k_every_n == 0:
                self.pooling_layers.append(TopKPooling(embedding_size, ratio=top_k_ratio))
            

        # Linear layers
        self.linear1 = Linear(embedding_size*2, dense_neurons)
        self.linear2 = Linear(dense_neurons, int(dense_neurons/2))  
        self.linear3 = Linear(int(dense_neurons/2), 1)  

    def forward(self, x, edge_attr, edge_index, batch_index):
        #------------------------------------------------------------------------#
        # step 1) Node & Edge 정보 사용하여, node들의 initial representation 생성
        x = self.conv1(x, edge_index, edge_attr)
        x = torch.relu(self.transf1(x))
        x = self.bn1(x)
        
        #------------------------------------------------------------------------#
        # step 2) Graph Representation 얻어내기
        ## top K pooling 사용
        global_representation = []

        for i in range(self.n_layers):
            x = self.conv_layers[i](x, edge_index, edge_attr)
            x = torch.relu(self.transf_layers[i](x))
            x = self.bn_layers[i](x)

            # top K & 가장 마지막 layer 추가하기
            if i % self.top_k_every_n == 0 or i == self.n_layers:
                x , edge_index, edge_attr, batch_index, _, _ = self.pooling_layers[int(i/self.top_k_every_n)](
                    x, edge_index, edge_attr, batch_index
                    )
                global_representation.append(torch.cat([gmp(x, batch_index), gap(x, batch_index)], dim=1))
    
        x = sum(global_representation)

        #------------------------------------------------------------------------#
        # step 3) Output ( scalar ) 산출
        x = torch.relu(self.linear1(x))
        x = F.dropout(x, p=0.8, training=self.training)
        x = torch.relu(self.linear2(x))
        x = F.dropout(x, p=0.8, training=self.training)
        x = self.linear3(x)
        return x

# 1) Download Data

In [4]:
import rdkit
from torch_geometric.datasets import MoleculeNet
 
data = MoleculeNet(root=".", name="ESOL")

print("Dataset type: ", type(data))
print("Dataset features: ", data.num_features)
print("Dataset target: ", data.num_classes)
print("Dataset length: ", data.len)
print("Dataset sample: ", data[0])
print("Sample  nodes: ", data[0].num_nodes)
print("Sample  edges: ", data[0].num_edges)

Dataset type:  torch_geometric.datasets.molecule_net.MoleculeNet
Dataset features:  9
Dataset target:  734
Dataset length:  <bound method InMemoryDataset.len of ESOL(1128)>
Dataset sample:  Data(x=[32, 9], edge_index=[2, 68], edge_attr=[68, 3], y=[1, 1], smiles='OCC3OC(OCC2OC(OC(C#N)c1ccccc1)C(O)C(O)C2O)C(O)C(O)C3O ')
Sample  nodes:  32
Sample  edges:  68


In [8]:
data_size = len(data)
NUM_GRAPHS_PER_BATCH = 64

# performing the split
loader = DataLoader(data[:int(data_size * 0.8)], 
                    batch_size=NUM_GRAPHS_PER_BATCH, shuffle=True)
test_loader = DataLoader(data[int(data_size * 0.8):], 
                         batch_size=NUM_GRAPHS_PER_BATCH, shuffle=True)

# 2) Creating the model

In [5]:
model = GCN()
print(model)
print("Number of parameters: ", sum(p.numel() for p in model.parameters()))

GCN(
  (initial_conv): GCNConv(9, 64)
  (conv1): GCNConv(64, 64)
  (conv2): GCNConv(64, 64)
  (conv3): GCNConv(64, 64)
  (out): Linear(in_features=128, out_features=1, bias=True)
)
Number of parameters:  13249


# 3) Defining Parameters for Training

In [6]:
from torch_geometric.data import DataLoader
import warnings
warnings.filterwarnings("ignore")

loss_fn = torch.nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.0007)  
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
model = model.to(device)

# 5) Running the Training Loop

In [10]:
def train(data):
    for batch in loader:
        batch.to(device)  
        optimizer.zero_grad() 
        #---------------------------------------------------------------#
        # data : (1) node features & (2) connection info
        # [batch.x] : torch.Size([796, 9]) ... 64개 그래프 내에 총 796개의 노드 & 각각 8차원
        # [batch.edge_index] : torch.Size([2, 1602]) ... 64개 그래프 내에 총 1602개의 엣지
        # [batch.batch] : torch.Size([796]) ... 64개 그래프 내에 총 796개의 노드 
        # 								각 노드가 어느 그래프에서 왔는지의 정보 ( 0,0,0...,63,63 )
        # [pred] : torch.Size([64, 1]) .... 그래프 당 1개의 예측값 
        # [embedding] : torch.Size([64, 128]) .... 그래프 당 1개의 임베딩
        pred, embedding = model(batch.x.float(), batch.edge_index, batch.batch) 
        #---------------------------------------------------------------#
        loss = torch.sqrt(loss_fn(pred, batch.y))       
        loss.backward()  
        optimizer.step()   
    return loss, embedding

In [11]:
print("Starting training...")
losses = []

n_epochs = 2000
for epoch in range(n_epochs):
    loss, h = train(data)
    losses.append(loss)
    if epoch % 100 == 0:
        print(f"Epoch {epoch} | Train Loss {loss}")

Starting training...
Epoch 0 | Train Loss 3.377596378326416
Epoch 100 | Train Loss 0.9625376462936401
Epoch 200 | Train Loss 1.069977879524231


KeyboardInterrupt: 