<a href="https://colab.research.google.com/github/XIAO-HOU/Colab-code/blob/main/test.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [25]:
import torch
import math
import time

import numpy as np
import torch.nn.functional as F
import torch.optim as optim

from collections import defaultdict
from torch import nn

cora_path = 'drive/MyDrive/Colab Notebooks/data/cora'

In [26]:
def load_data(source):
  content_path = source + '/cora.content'
  cite_path = source + '/cora.cites'

  features = []
  labels = []
  node_map = {}
  label_map = {}

  with open(content_path) as f:
    for i, line in enumerate(f):
      info = line.strip().split()
      features.append([float(x) for x in info[1:-1]])
      node_map[info[0]] = i
      if info[-1] not in label_map:
        label_map[info[-1]] = len(label_map)
      labels.append(label_map[info[-1]])
  features = np.asarray(features)
  labels = np.asarray(labels)

  adj_list = defaultdict(set)
  adj_matrix = np.zeros((features.shape[0], features.shape[0]))
  with open(cite_path) as f:
    for i, line in enumerate(f):
      info = line.strip().split()
      assert len(info) == 2
      paper1 = node_map[info[0]]
      paper2 = node_map[info[1]]
      adj_list[paper1].add(paper2)
      adj_matrix[paper1][paper2] = 1
      adj_list[paper2].add(paper1)
      adj_matrix[paper2][paper1] = 1
  
  return features, labels, adj_list, adj_matrix

features, labels, adj_list, adj_matrix = load_data(cora_path)

In [27]:
adj_matrix

array([[0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       ...,
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.]])

In [28]:
class GCN(nn.Module):
  def __init__(self, nfeat, nhid, nclass, dropout):
      super(GCN, self).__init__()

      self.gc1 = GraphConvolution(nfeat, nhid)
      self.gc2 = GraphConvolution(nhid, nclass)
      self.dropout = dropout

  def forward(self, x, adj):
      x = F.relu(self.gc1(x, adj))
      x = F.dropout(x, self.dropout, training=self.training)
      x = self.gc2(x, adj)
      return F.log_softmax(x, dim=1)

class GraphConvolution(nn.Module):
  def __init__(self, in_features, out_features, bias = True):
    super(GraphConvolution, self).__init__()
    self.in_features = in_features
    self.out_features = out_features
    self.weight = nn.Parameter(torch.FloatTensor(in_features, out_features))
    if bias:
      self.bias = nn.Parameter(torch.FloatTensor(out_features))
    else:
      self.register_parameter('bias', None)
    self.reset_parameters()

  def reset_parameters(self):
    stdv = 1. / math.sqrt(self.weight.size(1))
    self.weight.data.uniform_(-stdv, stdv)
    if self.bias is not None:
        self.bias.data.uniform_(-stdv, stdv)

  def forward(self, input, adj):
      support = torch.mm(input, self.weight)
      output = torch.mm(adj, support)
      if self.bias is not None:
          return output + self.bias
      else:
          return output

In [29]:
def normalize(mx):
  row_sum = np.array(mx.sum(1))
  r_inv = np.power(row_sum, -0.5).flatten()
  r_inv[np.isinf(r_inv)] = 0.
  r_mat_inv = np.diag(r_inv)
  mx = r_mat_inv.dot(mx).dot(r_mat_inv)
  return mx

def accuracy(output, labels):
  preds = output.max(1)[1].type_as(labels)
  correct = preds.eq(labels).double()
  correct = correct.sum()
  return correct / len(labels)

def split_data(nodes_num, test_split=3, val_split=6):
  rand_indices = np.random.permutation(nodes_num)

  test_size = nodes_num // test_split
  val_size = nodes_num // val_split
  # train_size = nodes_num - test_size - val_size

  test_indexes = rand_indices[:test_size]
  val_indexes = rand_indices[test_size:(test_size + val_size)]
  train_indexes = rand_indices[test_size + val_size:]

  return train_indexes, val_indexes, test_indexes

In [30]:
features = torch.Tensor(features)
labels = torch.LongTensor(labels)
adj_matrix = normalize(adj_matrix + np.eye(adj_matrix.shape[0]))
adj_matrix = torch.Tensor(adj_matrix)
train_indexes, val_indexes, test_indexes = split_data(features.shape[0])

In [31]:
hidden = 16
dropout = 0.5
lr = 0.01
weight_decay = 5e-4

model = GCN(nfeat=features.shape[1],
        nhid=hidden,
        nclass=labels.max().item() + 1,
        dropout=dropout)
optimizer = optim.Adam(model.parameters(),lr=lr,
              weight_decay=weight_decay)

def train(epoch):
  t = time.time()
  model.train()
  optimizer.zero_grad()
  output = model(features, adj_matrix)
  loss_train = F.nll_loss(output[train_indexes], labels[train_indexes])
  acc_train = accuracy(output[train_indexes], labels[train_indexes])
  loss_train.backward()
  optimizer.step()

  model.eval()
  output = model(features, adj_matrix)

  loss_val = F.nll_loss(output[val_indexes], labels[val_indexes])
  acc_val = accuracy(output[val_indexes], labels[val_indexes])
  print('Epoch: {:04d}'.format(epoch+1),
      'loss_train: {:.4f}'.format(loss_train.item()),
      'acc_train: {:.4f}'.format(acc_train.item()),
      'loss_val: {:.4f}'.format(loss_val.item()),
      'acc_val: {:.4f}'.format(acc_val.item()),
      'time: {:.4f}s'.format(time.time() - t))


def test():
  model.eval()
  output = model(features, adj_matrix)
  loss_test = F.nll_loss(output[test_indexes], labels[test_indexes])
  acc_test = accuracy(output[test_indexes], labels[test_indexes])
  print('Test set results:',
      'loss={:.4f}'.format(loss_test.item()),
      'accuracy={:.4f}'.format(acc_test.item()))

In [None]:
epochs = 200
start_time = time.time()
for epoch in range(epochs):
    train(epoch)
print("Optimization Finished!")
print("Total time elapsed: {:.4f}s".format(time.time() - start_time))

test()