<a href="https://colab.research.google.com/github/h5ng/GNN/blob/master/GAT_with_pytorch2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [19]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.autograd import Variable

# Utils

In [33]:
import numpy as np
import scipy.sparse as sp

def encode_onehot(labels):
  # The classes must be sorted before encoding to enable static class encoding.
  # In other words, make sure the first class always maps to index 0.
  classes = sorted(list(set(labels)))
  classes_dict = {c: np.identity(len(classes))[i, :] for i, c in enumerate(classes)}
  labels_onehot = np.array(list(map(classes_dict.get, labels)), dtype=np.int32)
  return labels_onehot

def normalize_features(mx):
  """Row-normalize sparse matrix"""
  rowsum = np.array(mx.sum(1))
  r_inv = np.power(rowsum, -1).flatten()
  r_inv[np.isinf(r_inv)] = 0.
  r_mat_inv = sp.diags(r_inv)
  mx = r_mat_inv.dot(mx)
  return mx

def normalize_adj(mx):
  """Row-normalize sparse matrix"""
  rowsum = np.array(mx.sum(1))
  r_inv_sqrt = np.power(rowsum, -0.5).flatten()
  r_inv_sqrt[np.isinf(r_inv_sqrt)] = 0.
  r_mat_inv_sqrt = sp.diags(r_inv_sqrt)
  return mx.dot(r_mat_inv_sqrt).transpose().dot(r_mat_inv_sqrt)

def accuracy(output, labels):
  preds = output.max(1)[1].type_as(labels)
  correct = preds.eq(labels).double()
  correct = correct.sum()
  return correct / len(labels)

def load_data(path="./sample_data/cora/", dataset="cora"):
  print('Loading {} dataset...'.format(dataset))

  # <paper_id> <word_attributes>+ <class_label>
  idx_features_labels = np.genfromtxt("{}{}.content".format(path, dataset), dtype=np.dtype(str))
  features = sp.csr_matrix(idx_features_labels[:, 1:-1], dtype=np.float32)
  labels = encode_onehot(idx_features_labels[:, -1])

  # build graph
  idx = np.array(idx_features_labels[:, 0], dtype=np.int32)
  idx_map = {j: i for i, j in enumerate(idx)}
  edges_unordered = np.genfromtxt("{}{}.cites".format(path, dataset), dtype=np.int32)
  edges = np.array(list(map(idx_map.get, edges_unordered.flatten())), dtype=np.int32).reshape(edges_unordered.shape)
  adj = sp.coo_matrix((np.ones(edges.shape[0]), (edges[:, 0], edges[:, 1])), shape=(labels.shape[0], labels.shape[0]), dtype=np.float32)

  # build symmetric adjacency matrix
  adj = adj + adj.T.multiply(adj.T > adj) - adj.multiply(adj.T > adj)

  features = normalize_features(features)
  adj = normalize_adj(adj + sp.eye(adj.shape[0]))

  idx_train = range(140)
  idx_val = range(200, 500)
  idx_test = range(500, 1500)

  adj = torch.FloatTensor(np.array(adj.todense()))
  features = torch.FloatTensor(np.array(features.todense()))
  labels = torch.LongTensor(np.where(labels)[1])

  idx_train = torch.LongTensor(idx_train)
  idx_val = torch.LongTensor(idx_val)
  idx_test = torch.LongTensor(idx_test)

  return adj, features, labels, idx_train, idx_val, idx_test

In [34]:
adj, features, labels, idx_train, idx_val, idx_test = load_data()

Loading cora dataset...


In [35]:
print('adj', adj.shape)
print('features', features.shape)
print('labels', labels.shape)
print('idx_train', idx_train.shape)
print('idx_val', idx_val.shape)
print('idx_test', idx_test.shape)

adj torch.Size([2708, 2708])
features torch.Size([2708, 1433])
labels torch.Size([2708])
idx_train torch.Size([140])
idx_val torch.Size([300])
idx_test torch.Size([1000])


# Training settings

In [23]:
import random

seed = 72
random.seed(seed)
np.random.seed(seed)
torch.manual_seed(seed)
torch.cuda.manual_seed(seed)
dropout = 0.6
alpha = 0.2
hidden = 8
nb_heads = 8
lr = 0.05
weight_decay=5e-4
fastmode = False
epochs = 10000
patience = 100

# GAT

In [24]:
# GAT model
class GAT(nn.Module):
  def __init__(self, nfeat, nhid, nclass, dropout, alpha, nheads):
    """Dense version of GAT."""
    super(GAT, self).__init__()
    self.dropout = dropout
    
    self.attentions = [GraphAttentionLayer(nfeat, nhid, dropout=dropout, alpha=alpha, concat=True) for _ in range(nheads)]
    for i, attention in enumerate(self.attentions):
      self.add_module('attention_{}'.format(i), attention)
    
    self.out_att = GraphAttentionLayer(nhid * nheads, nclass, dropout=dropout, alpha=alpha, concat=False)


  def forward(self, x, adj):
    x = F.dropout(x, self.dropout, training=self.training)
    x = torch.cat([att(x, adj) for att in self.attentions], dim=1)
    x = F.dropout(x, self.dropout, training=self.training)
    x = F.elu(self.out_att(x, adj))
    return F.log_softmax(x, dim=1)

# Graph Attention Layer

In [25]:
class GraphAttentionLayer(nn.Module):
  """ Simple Gat layer """
  def __init__(self, in_features, out_features, dropout, alpha, concat=True):
    super(GraphAttentionLayer, self).__init__()
    self.dropout = dropout
    self.in_features = in_features
    self.out_features = out_features
    self.alpha = alpha
    self.concat = concat

    self.W = nn.Parameter(torch.empty(size=(in_features, out_features)))
    nn.init.xavier_uniform_(self.W.data, gain=1.414)
    self.a = nn.Parameter(torch.empty(size=(2*out_features, 1)))
    nn.init.xavier_uniform_(self.a.data, gain=1.414)

    self.leakyrelu = nn.LeakyReLU(self.alpha)


  def forward(self, h, adj):
    # W.shape: (in_features, out_features)
    # h.shape: (N, in_features)
    # Wh.shape: (N, out_features)
    Wh = torch.mm(h, self.W) 
    a_input = self._prepare_attentional_mechanism_input(Wh)
    e = self.leakyrelu(torch.matmul(a_input, self.a).squeeze(2))
    
    zero_vec = -9e15*torch.ones_like(e)
    attention = torch.where(adj > 0, e, zero_vec)
    attention = F.softmax(attention, dim=1)
    attention = F.dropout(attention, self.dropout, training=self.training)
    h_prime = torch.matmul(attention, Wh)

    if self.concat:
        return F.elu(h_prime)
    else:
        return h_prime

  def _prepare_attentional_mechanism_input(self, Wh):
    N = Wh.size()[0] # number of nodes
    Wh_repeated_in_chunks = Wh.repeat_interleave(N, dim=0)
    Wh_repeated_alternating = Wh.repeat(N, 1)

    all_combinations_matrix = torch.cat([Wh_repeated_in_chunks, Wh_repeated_alternating], dim=1)
    return all_combinations_matrix.view(N, N, 2 * self.out_features)



# Model and optimizer

In [26]:
model = GAT(
    nfeat=features.shape[1],
    nhid=hidden,
    nclass=int(labels.max()) + 1,
    dropout=dropout,
    alpha=alpha,
    nheads=nb_heads)

optimizer = optim.Adam(model.parameters(),
                       lr=lr,
                       weight_decay=weight_decay)

model.cuda()
features = features.cuda()
adj = adj.cuda()
labels = labels.cuda()
idx_train = idx_train.cuda()
idx_val = idx_val.cuda()
idx_test = idx_test.cuda()

# 미분할 변수 선택?
features, adj, labels = Variable(features), Variable(adj), Variable(labels)

# train 함수

In [27]:
def train(epoch):
  t = time.time()
  model.train()
  optimizer.zero_grad()
  output = model(features, adj)
  loss_train = F.nll_loss(output[idx_train], labels[idx_train])
  acc_train = accuracy(output[idx_train], labels[idx_train])
  loss_train.backward()
  optimizer.step()

  if not fastmode:
    # Evaluate validation set performance separately,
    # deactivates dropout during validation run.
    model.eval()
    output = model(features, adj)

  loss_val = F.nll_loss(output[idx_val], labels[idx_val])
  acc_val = accuracy(output[idx_val], labels[idx_val])
  print('Epoch: {:04d}'.format(epoch+1),
        'loss_train: {:.4f}'.format(loss_train.data.item()),
        'acc_train: {:.4f}'.format(acc_train.data.item()),
        'loss_val: {:.4f}'.format(loss_val.data.item()),
        'acc_val: {:.4f}'.format(acc_val.data.item()),
        'time: {:.4f}s'.format(time.time() - t))

  return loss_val.data.item()

In [28]:
def compute_test():
  model.eval()
  output = model(features, adj)
  loss_test = F.nll_loss(output[idx_test], labels[idx_test])
  acc_test = accuracy(output[idx_test], labels[idx_test])
  print("Test set results:",
        "loss= {:.4f}".format(loss_test.data[0]),
        "accuracy= {:.4f}".format(acc_test.data[0]))

In [29]:
import time
import glob
import os

t_total = time.time()
loss_values = []
bad_counter = 0
best = epochs + 1
best_epoch = 0

for epoch in range(epochs):
  loss_values.append(train(epoch))

  torch.save(model.state_dict(), '{}.pkl'.format(epoch))
  if loss_values[-1] < best:
    best = loss_values[-1]
    best_epoch = epoch
    bad_counter = 0
  else:
    bad_counter += 1
  
  if bad_counter == patience:
    break

  files = glob.glob('*.pkl')
  for file in files:
    epoch_nb = int(file.split('.')[0])
    if epoch_nb < best_epoch:
      os.remove(file)

Epoch: 0001 loss_train: 1.9519 acc_train: 0.0786 loss_val: 1.8714 acc_val: 0.5767 time: 0.4394s
Epoch: 0002 loss_train: 1.8412 acc_train: 0.5071 loss_val: 1.7757 acc_val: 0.5733 time: 0.4316s
Epoch: 0003 loss_train: 1.7346 acc_train: 0.5429 loss_val: 1.6643 acc_val: 0.5767 time: 0.4303s
Epoch: 0004 loss_train: 1.6407 acc_train: 0.5429 loss_val: 1.5510 acc_val: 0.5900 time: 0.4265s
Epoch: 0005 loss_train: 1.5116 acc_train: 0.5571 loss_val: 1.4412 acc_val: 0.6167 time: 0.4245s
Epoch: 0006 loss_train: 1.4349 acc_train: 0.5929 loss_val: 1.3388 acc_val: 0.6800 time: 0.4291s
Epoch: 0007 loss_train: 1.3208 acc_train: 0.6429 loss_val: 1.2419 acc_val: 0.7200 time: 0.4267s
Epoch: 0008 loss_train: 1.2011 acc_train: 0.6357 loss_val: 1.1547 acc_val: 0.7433 time: 0.4294s
Epoch: 0009 loss_train: 1.1871 acc_train: 0.6857 loss_val: 1.0764 acc_val: 0.7667 time: 0.4268s
Epoch: 0010 loss_train: 1.1242 acc_train: 0.6786 loss_val: 1.0069 acc_val: 0.7933 time: 0.4266s
Epoch: 0011 loss_train: 1.0543 acc_train

In [30]:
files = glob.glob('*.pkl')
for file in files:
    epoch_nb = int(file.split('.')[0])
    if epoch_nb > best_epoch:
        os.remove(file)

In [31]:
print("Optimization Finished!")
print("Total time elapsed: {:.4f}s".format(time.time() - t_total))

print('Loading {}th epoch'.format(best_epoch))

Optimization Finished!
Total time elapsed: 99.1086s
Loading 128th epoch


In [32]:
compute_test()

IndexError: ignored