### hidden feature 크기 64로 변경(논문과 같이)

In [50]:
from torch_geometric.nn import GCNConv
import torch
import torch.nn.functional as F

# class GCN(torch.nn.Module):
#     def __init__(self):
#         super(GCN, self).__init__()
#         self.conv1 = GCNConv(dataset.num_node_features, 64, dropout=0.6)
#         self.conv2 = GCNConv(64, dataset.num_classes, dropout=0.6)

#     def forward(self, data):
#         x, edge_index = data.x, data.edge_index

#         x = self.conv1(x, edge_index)
#         x = F.relu(x)
#         x = F.dropout(x, training=self.training)
#         x = self.conv2(x, edge_index)

#         return F.softmax(x, dim=1)

import torch
from torch_geometric.nn import GCNConv

class GCN(torch.nn.Module):
    def __init__(self, num_node_features, num_classes):
        super().__init__()

        # GCN layer 1
        self.conv1 = GCNConv(in_channels=num_node_features, out_channels=64)
        # GCN layer 2 (classification layer)
        self.conv2 = GCNConv(in_channels=64, out_channels=num_classes)

        # Dropout layer
        self.dropout = torch.nn.Dropout(p=0.6)

    def forward(self, data):
        x, edge_index = data.x, data.edge_index

        # GCN layer 1
        x = self.dropout(x)
        x = self.conv1(x, edge_index)
        x = torch.nn.functional.relu(x)

        # GCN layer 2
        x = self.dropout(x)
        x = self.conv2(x, edge_index)

        return torch.nn.functional.softmax(x, dim=1)


In [51]:
# from torch_geometric.nn import GATConv

# class GAT(torch.nn.Module):
#     def __init__(self):
#         super(GAT, self).__init__()
#         self.conv1 = GATConv(dataset.num_node_features, 64, heads=8, dropout=0.6)
#         self.conv2 = GATConv(64*8, dataset.num_classes, heads=8, concat=False, dropout=0.6)

#     def forward(self, data):
#         x, edge_index = data.x, data.edge_index

#         x = self.conv1(x, edge_index)
#         x = F.elu(x)
#         x = F.dropout(x, training=self.training)
#         x = self.conv2(x, edge_index)

#         return F.softmax(x, dim=1)


import torch
from torch_geometric.nn import GATConv

class GAT(torch.nn.Module):
    def __init__(self, num_node_features, num_classes):
        super().__init__()

        # GAT layer 1
        self.conv1 = GATConv(in_channels=num_node_features, out_channels=8, heads=8)
        # GAT layer 2 (classification layer)
        self.conv2 = GATConv(in_channels=8*8, out_channels=num_classes, heads=1, concat=False)

        # Dropout layer
        self.dropout = torch.nn.Dropout(p=0.6)

    def forward(self, data):
        x, edge_index = data.x, data.edge_index

        # GAT layer 1
        x = self.dropout(x)
        x = self.conv1(x, edge_index)
        x = torch.nn.functional.elu(x)

        # GAT layer 2
        x = self.dropout(x)
        x = self.conv2(x, edge_index)

        return torch.nn.functional.softmax(x, dim=1)



# Cora Dataset으로 GCN과 GAT 성능 비교

In [52]:
from torch_geometric.datasets import Planetoid

dataset = Planetoid(root='/tmp/Cora', name='Cora')

In [36]:
print(dataset.num_node_features)
print(dataset.num_classes)

1433
7


In [53]:
def train(model, data, optimizer):
    model.train()
    optimizer.zero_grad()
    out = model(data)
    loss = F.nll_loss(out[data.train_mask], data.y[data.train_mask])
    loss.backward()
    optimizer.step()

def test(model, data):
    model.eval()
    out = model(data)
    pred = out.argmax(dim=1)
    correct = pred[data.test_mask] == data.y[data.test_mask]
    return int(correct.sum()) / int(data.test_mask.sum())

data = dataset[0]
model_gcn = GCN(dataset.num_node_features, dataset.num_classes)
model_gat = GAT(dataset.num_node_features, dataset.num_classes)
optimizer_gcn = torch.optim.Adam(model_gcn.parameters(), lr=0.01, weight_decay=5e-4)
optimizer_gat = torch.optim.Adam(model_gat.parameters(), lr=0.01, weight_decay=5e-4)

for epoch in range(100):
    train(model_gcn, data, optimizer_gcn)
    train(model_gat, data, optimizer_gat)
    if epoch % 10 == 0:
        acc_gcn = test(model_gcn, data)
        acc_gat = test(model_gat, data)
        print('Epoch: {:03d}, GCN Accuracy: {:.4f}, GAT Accuracy: {:.4f}'.format(epoch, acc_gcn, acc_gat))
print('Epoch: {:03d}, GCN Accuracy: {:.4f}, GAT Accuracy: {:.4f}'.format(epoch, acc_gcn, acc_gat))

Epoch: 000, GCN Accuracy: 0.5720, GAT Accuracy: 0.5910
Epoch: 010, GCN Accuracy: 0.8020, GAT Accuracy: 0.7790
Epoch: 020, GCN Accuracy: 0.8020, GAT Accuracy: 0.7850
Epoch: 030, GCN Accuracy: 0.7910, GAT Accuracy: 0.7770
Epoch: 040, GCN Accuracy: 0.7950, GAT Accuracy: 0.7990
Epoch: 050, GCN Accuracy: 0.7930, GAT Accuracy: 0.7810
Epoch: 060, GCN Accuracy: 0.7910, GAT Accuracy: 0.8060
Epoch: 070, GCN Accuracy: 0.8030, GAT Accuracy: 0.7960
Epoch: 080, GCN Accuracy: 0.7970, GAT Accuracy: 0.7990
Epoch: 090, GCN Accuracy: 0.7800, GAT Accuracy: 0.7950
Epoch: 099, GCN Accuracy: 0.7800, GAT Accuracy: 0.7950


In [38]:
data

Data(x=[2708, 1433], edge_index=[2, 10556], y=[2708], train_mask=[2708], val_mask=[2708], test_mask=[2708])

# Citeseer Dataset으로 GCN과 GAT 성능 비교

In [1]:
from torch_geometric.datasets import Planetoid

dataset = Planetoid(root='/tmp/Citeseer', name='Citeseer')

Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.citeseer.x
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.citeseer.tx
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.citeseer.allx
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.citeseer.y
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.citeseer.ty
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.citeseer.ally
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.citeseer.graph
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.citeseer.test.index
Processing...
Done!


In [49]:
data = dataset[0]
model_gcn = GCN(dataset.num_node_features, dataset.num_classes)
model_gat = GAT(dataset.num_node_features, dataset.num_classes)
optimizer_gcn = torch.optim.Adam(model_gcn.parameters(), lr=0.01, weight_decay=5e-4)
optimizer_gat = torch.optim.Adam(model_gat.parameters(), lr=0.01, weight_decay=5e-4)

for epoch in range(100):
    train(model_gcn, data, optimizer_gcn)
    train(model_gat, data, optimizer_gat)
    if epoch % 10 == 0:
        acc_gcn = test(model_gcn, data)
        acc_gat = test(model_gat, data)
        print('Epoch: {:03d}, GCN Accuracy: {:.4f}, GAT Accuracy: {:.4f}'.format(epoch, acc_gcn, acc_gat))
print('Epoch: {:03d}, GCN Accuracy: {:.4f}, GAT Accuracy: {:.4f}'.format(epoch, acc_gcn, acc_gat))

Epoch: 000, GCN Accuracy: 0.5840, GAT Accuracy: 0.5970
Epoch: 010, GCN Accuracy: 0.6880, GAT Accuracy: 0.6740
Epoch: 020, GCN Accuracy: 0.6650, GAT Accuracy: 0.6590
Epoch: 030, GCN Accuracy: 0.6680, GAT Accuracy: 0.6660
Epoch: 040, GCN Accuracy: 0.6840, GAT Accuracy: 0.6470
Epoch: 050, GCN Accuracy: 0.6570, GAT Accuracy: 0.6790
Epoch: 060, GCN Accuracy: 0.6760, GAT Accuracy: 0.6890
Epoch: 070, GCN Accuracy: 0.6880, GAT Accuracy: 0.6550
Epoch: 080, GCN Accuracy: 0.6510, GAT Accuracy: 0.6560
Epoch: 090, GCN Accuracy: 0.6670, GAT Accuracy: 0.6850
Epoch: 099, GCN Accuracy: 0.6670, GAT Accuracy: 0.6850


In [22]:
dataset.data

Data(x=[3327, 3703], edge_index=[2, 9104], y=[3327], train_mask=[3327], val_mask=[3327], test_mask=[3327])

# DBLP Dataset으로 GCN과 GAT 성능 비교

In [27]:
from torch_geometric.datasets import DBLP

dataset = DBLP(root='/tmp/DBLP')

Downloading https://www.dropbox.com/s/yh4grpeks87ugr2/DBLP_processed.zip?dl=1
Extracting /tmp/DBLP/raw/DBLP_processed.zip
Processing...
Done!


In [31]:
print(dataset.num_node_features)
print(dataset.num_classes)

{'author': 334, 'paper': 4231, 'term': 50, 'conference': 0}


AttributeError: 'DBLP' object has no attribute 'num_classes'

In [30]:
data

HeteroData(
  author={
    x=[4057, 334],
    y=[4057],
    train_mask=[4057],
    val_mask=[4057],
    test_mask=[4057],
  },
  paper={ x=[14328, 4231] },
  term={ x=[7723, 50] },
  conference={ num_nodes=20 },
  (author, to, paper)={ edge_index=[2, 19645] },
  (paper, to, author)={ edge_index=[2, 19645] },
  (paper, to, term)={ edge_index=[2, 85810] },
  (paper, to, conference)={ edge_index=[2, 14328] },
  (term, to, paper)={ edge_index=[2, 85810] },
  (conference, to, paper)={ edge_index=[2, 14328] }
)

In [29]:
data = dataset[0]
model_gcn = GCN()
model_gat = GAT()
optimizer_gcn = torch.optim.Adam(model_gcn.parameters(), lr=0.01, weight_decay=5e-4)
optimizer_gat = torch.optim.Adam(model_gat.parameters(), lr=0.01, weight_decay=5e-4)

for epoch in range(200):
    train(model_gcn, data, optimizer_gcn)
    train(model_gat, data, optimizer_gat)
    if epoch % 10 == 0:
        acc_gcn = test(model_gcn, data)
        acc_gat = test(model_gat, data)
        print('Epoch: {:03d}, GCN Accuracy: {:.4f}, GAT Accuracy: {:.4f}'.format(epoch, acc_gcn, acc_gat))

TypeError: '>' not supported between instances of 'dict' and 'int'