In [1]:
import torch
import torch_geometric

In [2]:
from torch_geometric.datasets import TUDataset
from torch_geometric.nn import GCNConv, GATConv, GraphConv, GNNExplainer
import torch.nn.functional as F
from torch_geometric.loader import DataLoader, NeighborLoader

In [3]:
dataset = TUDataset(root="data/TUDataset", name="MUTAG")
print(dataset)

Downloading https://www.chrsmrrs.com/graphkerneldatasets/MUTAG.zip
Extracting data\TUDataset\MUTAG\MUTAG.zip
Processing...


MUTAG(188)


Done!


In [4]:
data = dataset[0]
data

Data(edge_index=[2, 38], x=[17, 7], edge_attr=[38, 4], y=[1])

In [5]:
print(f"dataset:{dataset}")
print("=========")
print(f"dataset_length: {len(dataset)}")
print(f"dataset feature numbers: {dataset.num_features}")
print(f"datset class numbers:{dataset.num_classes}")

dataset:MUTAG(188)
dataset_length: 188
dataset feature numbers: 7
datset class numbers:2


In [6]:
print(f"data node features:{data.num_node_features}")
print(f"data edges:{data.num_edges}")
print(f"data nodes number:{data.num_nodes}")
print(f"data degree per node:{data.num_edges/data.num_nodes}")
print(f"data has self loops:{data.has_self_loops}")
print(f"data has isolated nodes{data.has_isolated_nodes}")
print(f"data has non-undirected edges:{data.is_undirected}")

data node features:7
data edges:38
data nodes number:17
data degree per node:2.235294117647059
data has self loops:<bound method BaseData.has_self_loops of Data(edge_index=[2, 38], x=[17, 7], edge_attr=[38, 4], y=[1])>
data has isolated nodes<bound method BaseData.has_isolated_nodes of Data(edge_index=[2, 38], x=[17, 7], edge_attr=[38, 4], y=[1])>
data has non-undirected edges:<bound method BaseData.is_undirected of Data(edge_index=[2, 38], x=[17, 7], edge_attr=[38, 4], y=[1])>


In [7]:
torch.manual_seed(12345)
dataset.shuffle()

MUTAG(188)

In [8]:
train_set = dataset[:150]
test_set = dataset[150:]

In [9]:
print(f"Number of training graphs: {len(train_set)}")
print(f"Number of test dataset: {len(test_set)}")

Number of training graphs: 150
Number of test dataset: 38


In [10]:
BATCH_SIZE = 64
train_loader = DataLoader(train_set, BATCH_SIZE, shuffle=True, num_workers=4, pin_memory = True)
test_loader = DataLoader(test_set, BATCH_SIZE, num_workers=4, pin_memory =True)

In [11]:
for step, data in enumerate(train_loader):
    print(f"Step: {step+1}")
    print("========")
    print(f"number of graphs in current batch: {data.num_graphs}")
    print(data)

Step: 1
number of graphs in current batch: 64
DataBatch(edge_index=[2, 2450], x=[1113, 7], edge_attr=[2450, 4], y=[64], batch=[1113], ptr=[65])
Step: 2
number of graphs in current batch: 64
DataBatch(edge_index=[2, 2562], x=[1161, 7], edge_attr=[2562, 4], y=[64], batch=[1161], ptr=[65])
Step: 3
number of graphs in current batch: 22
DataBatch(edge_index=[2, 918], x=[410, 7], edge_attr=[918, 4], y=[22], batch=[410], ptr=[23])


In [12]:
from torch_geometric.nn import global_mean_pool, global_max_pool
from torch.nn import Linear

In [32]:
import warnings
warnings.filterwarnings("ignore")

In [33]:
class GCN(torch.nn.Module):
    def __init__(self, hidden_channels):
        super(GCN, self).__init__()
        torch.manual_seed(12345)
        self.conv1 = GCNConv(dataset.num_node_features,hidden_channels)
        self.conv2 = GCNConv(hidden_channels, hidden_channels)
        self.conv3 = GCNConv(hidden_channels, hidden_channels)
        self.drop = torch.nn.Dropout(p=0.5, training = self.training)
        self.lin = Linear(hidden_channels, dataset.num_classes)
    def forward(self, x, edge_index, batch):
        x = self.conv1(x, edge_index)
        x = F.relu(x)
        x = self.conv2(x, edge_index)
        x = F.relu(x)
        x = self.conv3(x, edge_index)

        x = global_mean_pool(x, batch)

        x = F.dropout(x, p=0.5)

        out = self.lin(x)
        return out

model = GCN(hidden_channels=64)
print(model)



TypeError: __init__() got an unexpected keyword argument 'training'

In [23]:
optimizer = torch.optim.Adam(params=model.parameters(), lr=0.0007)
criterion = torch.nn.CrossEntropyLoss()

In [24]:
def train():
    model.train()
    for data in train_loader:
        out = model(data.x, data.edge_index, data.batch)
        loss = criterion(out, data.y)
        loss.backward()
        optimizer.step()
        optimizer.zero_grad()

def test(loader):
    model.eval()
    count=0
    for data in loader:
        out = model(data.x, data.edge_index, data.batch)
        pred = out.argmax(dim=1)
        count+= int((pred==data.y).sum())
    return count/len(loader.dataset)

In [25]:
for epoch in range(10):
    train()
    train_acc = test(train_loader)
    test_acc = test(test_loader)
    print(f"epoch:{epoch:03d}   train_accuracy:{train_acc:.4f}  test_accuracy:{test_acc:.4f}")

epoch:000   train_accuracy:0.6333  test_accuracy:0.6842
epoch:001   train_accuracy:0.6600  test_accuracy:0.6842
epoch:002   train_accuracy:0.6600  test_accuracy:0.6842
epoch:003   train_accuracy:0.6600  test_accuracy:0.6842
epoch:004   train_accuracy:0.6600  test_accuracy:0.6842
epoch:005   train_accuracy:0.6600  test_accuracy:0.6842


KeyboardInterrupt: 