<a href="https://colab.research.google.com/github/WIQHE/GNN-J/blob/main/mutagenic_molecules_pf.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
! pip install torch torchvision



In [None]:
! pip install torch_geometric
! pip install torch-scatter torch-sparse torch-cluster torch-spline-conv



In [None]:
from torch_geometric.datasets import TUDataset
from torch_geometric.data import DataLoader

In [None]:
dataset = TUDataset(root='data/TUDataset', name='Mutagenicity')
dataset = dataset.shuffle()
train_dataset = dataset[:1500]
test_dataset = dataset[1500:]

train_loader = DataLoader(train_dataset, batch_size=64)
test_loader = DataLoader(test_dataset, batch_size=64)

print(dataset)
print(dataset[0])

Mutagenicity(4337)
Data(edge_index=[2, 36], x=[17, 14], edge_attr=[36, 3], y=[1])




In [None]:
import torch
import torch.nn.functional as F
from torch_geometric.nn import GCNConv, GCN2Conv, global_mean_pool


class GCN(torch.nn.Module):
  def __init__(self, in_ch, hid_ch, out_ch=1):
    super().__init__()
    self.conv1 = GCNConv(in_ch, hid_ch)
    self.conv2 = GCNConv(hid_ch, hid_ch)
    self.lin = torch.nn.Linear(hid_ch, out_ch)
  def forward(self, x, edge_idx, batch):
    x = self.conv1(x, edge_idx)
    x = F.relu(x)
    x = self.conv2(x,edge_idx)
    x = F.relu(x)
    x = global_mean_pool(x, batch)

    return torch.sigmoid(self.lin(x)).squeeze()


In [None]:
def train(model, loader, optimizer, criterion):
  model.train()
  t_loss = 0
  for data in loader:
    optimizer.zero_grad()
    out = model(data.x.float(), data.edge_index, data.batch)
    loss = criterion(out, data.y.float())
    loss.backward()
    optimizer.step()
    t_loss += loss.item() * data.num_graphs
  return t_loss / len(loader.dataset)

In [None]:

def test(model, loader):
  model.eval()
  correct =0
  for data in loader:
    with torch.no_grad():
      out = model(data.x.float(), data.edge_index, data.batch)
      pred = ( out > 0.5).long()
      correct +=(pred == data.y).sum().item()
  return correct / len(loader.dataset)



In [None]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = GCN(dataset.num_node_features, 64).to(device)
optimizer = torch.optim.Adagrad(model.parameters(), lr=0.01)
criterion = torch.nn.BCELoss()

In [None]:
for epoch in range(1,31):
  loss = train(model, train_loader, optimizer, criterion)
  acc = test(model, test_loader)
  print(f'epoch: {epoch:02d}, loss: {loss:.4f}, Test Acc: {acc:.4f}')

epoch: 01, loss: 0.6234, Test Acc: 0.6528
epoch: 02, loss: 0.6128, Test Acc: 0.6570
epoch: 03, loss: 0.6066, Test Acc: 0.6588
epoch: 04, loss: 0.6026, Test Acc: 0.6584
epoch: 05, loss: 0.5997, Test Acc: 0.6641
epoch: 06, loss: 0.5973, Test Acc: 0.6648
epoch: 07, loss: 0.5954, Test Acc: 0.6644
epoch: 08, loss: 0.5936, Test Acc: 0.6648
epoch: 09, loss: 0.5921, Test Acc: 0.6690
epoch: 10, loss: 0.5907, Test Acc: 0.6687
epoch: 11, loss: 0.5894, Test Acc: 0.6704
epoch: 12, loss: 0.5882, Test Acc: 0.6718
epoch: 13, loss: 0.5871, Test Acc: 0.6754
epoch: 14, loss: 0.5860, Test Acc: 0.6771
epoch: 15, loss: 0.5850, Test Acc: 0.6771
epoch: 16, loss: 0.5840, Test Acc: 0.6775
epoch: 17, loss: 0.5830, Test Acc: 0.6785
epoch: 18, loss: 0.5821, Test Acc: 0.6789
epoch: 19, loss: 0.5812, Test Acc: 0.6792
epoch: 20, loss: 0.5804, Test Acc: 0.6810
epoch: 21, loss: 0.5795, Test Acc: 0.6814
epoch: 22, loss: 0.5787, Test Acc: 0.6849
epoch: 23, loss: 0.5780, Test Acc: 0.6852
epoch: 24, loss: 0.5772, Test Acc: