In [1]:
#if using colab, do !pip install torch_geometric before running this file
from torch_geometric.datasets import Amazon
import torch
import torch.nn.functional as F
from torch_geometric.nn import MessagePassing
from torch_geometric.utils import add_self_loops, degree
import networkx as nx
import numpy as np
import matplotlib.pyplot as plt
from sklearn.decomposition import PCA

In [13]:
class GCNConv(MessagePassing):
    def __init__(self, in_channels, out_channels):
        super(GCNConv, self).__init__(aggr='add')  # "Add" aggregation
        self.lin = torch.nn.Linear(in_channels, out_channels)

    def forward(self, x, edge_index):
        # Step 1: Add self-loops
        edge_index, _ = add_self_loops(edge_index, num_nodes=x.size(0))

        # Step 2: Multiply with weights
        x = self.lin(x)

        # Step 3: Calculate the normalization
        row, col = edge_index
        deg = degree(row, x.size(0), dtype=x.dtype)
        deg_inv_sqrt = deg.pow(-0.5)
        norm = deg_inv_sqrt[row] * deg_inv_sqrt[col]

        # Step 4: Propagate the embeddings to the next layer
        return self.propagate(edge_index, size=(x.size(0), x.size(0)), x=x,
                              norm=norm)

    def message(self, x_j, norm):
        # Normalize node features.
        return norm.view(-1, 1) * x_j


class Net(torch.nn.Module):
    def __init__(self, dataset, n_in):
        super(Net, self).__init__()
        self.conv1 = GCNConv(n_in, 64)
        self.conv2 = GCNConv(64, 64)
        self.out = torch.nn.Linear(64, dataset.num_classes)
        self.hook = self.conv2.register_forward_hook(self.hook_fn)

    def hook_fn(self, module, input, output):
        self.intermediate_output = output

    def forward(self, data, source_features_reduced):
        x, edge_index = source_features_reduced, data.edge_index
        x = self.conv1(x, edge_index)
        x = F.relu(x)
        x = F.dropout(x, training=self.training)
        x = self.conv2(x, edge_index)
        x = self.out(x)
        return F.log_softmax(x, dim=1)


def plot_dataset(dataset):
    edges_raw = dataset.data.edge_index.numpy()
    edges = [(x, y) for x, y in zip(edges_raw[0, :], edges_raw[1, :])]
    labels = dataset.data.y.numpy()

    G = nx.Graph()
    G.add_nodes_from(list(range(np.max(edges_raw))))
    G.add_edges_from(edges)
    plt.subplot(111)
    options = {
                'node_size': 30,
                'width': 0.2,
    }
    nx.draw(G, with_labels=False, node_color=labels.tolist(), cmap=plt.cm.tab10, font_weight='bold', **options)
    plt.show()


def test(data, source_features_reduced, train=True):
    model.eval()
    correct = 0
    pred = model(data, source_features_reduced).max(dim=1)[1]
    if train:
        correct += pred[train_mask].eq(data.y[train_mask]).sum().item()
        return correct / (len(data.y[train_mask]))
    else:
        correct += pred[test_mask].eq(data.y[test_mask]).sum().item()
        return correct / (len(data.y[test_mask]))


def train(data, source_features_reduced, epochs, plot=False):
    train_accuracies, test_accuracies = list(), list()
    best_test_acc = 0
    for epoch in range(epochs):
            model.train()
            optimizer.zero_grad()
            out = model(data, source_features_reduced)
            # print(out.shape)
            loss = F.nll_loss(out[train_mask], data.y[train_mask])
            loss.backward()
            optimizer.step()

            train_acc = test(data, source_features_reduced)
            test_acc = test(data, source_features_reduced, train=False)
            if (test_acc > best_test_acc):
                best_test_acc = test_acc
                print("Test accuracy", test_acc)
                torch.save(model.state_dict(), "/23F/228/FinalProj/Models/GCN_AMAZON_COMPUTERS.pth")

            train_accuracies.append(train_acc)
            test_accuracies.append(test_acc)
            print('Epoch: {:03d}, Loss: {:.5f}, Train Acc: {:.5f}, Test Acc: {:.5f}'.
                  format(epoch, loss, train_acc, test_acc))

    if plot:
        plt.plot(train_accuracies, label="Train accuracy")
        plt.plot(test_accuracies, label="Validation accuracy")
        plt.xlabel("# Epoch")
        plt.ylabel("Accuracy")
        plt.legend(loc='upper right')
        plt.show()


if __name__ == "__main__":
    # dataset = Planetoid(root='/tmp/Cora', name='Cora')
    dataset = Amazon(root='/23F/228/FinalProj/Datasets/Amazon', name='Computers')
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    source_features = dataset[0].x.numpy()
    n_in = 256
    pca_source = PCA(n_components=n_in)
    source_features_reduced = torch.from_numpy(pca_source.fit_transform(source_features))
    source_features_reduced = source_features_reduced.to(device)
    model = Net(dataset, n_in).to(device)
    data = dataset[0].to(device)
    
    
    #train_mask
    train_mask = torch.zeros(13752, dtype=torch.bool)
    test_mask = torch.zeros(13752, dtype=torch.bool)
    train_mask[0:10000] = True
    test_mask[10001:] = True
    
    optimizer = torch.optim.Adam(model.parameters(), lr=0.01, weight_decay=5e-4)
    train(data, source_features_reduced, epochs=150, plot=False)

Test accuracy 0.32471340975739804
Epoch: 000, Loss: 2.35122, Train Acc: 0.33050, Test Acc: 0.32471
Test accuracy 0.6001066382298054
Epoch: 001, Loss: 2.21840, Train Acc: 0.60290, Test Acc: 0.60011
Test accuracy 0.6419621434284191
Epoch: 002, Loss: 2.08420, Train Acc: 0.65110, Test Acc: 0.64196
Epoch: 003, Loss: 1.91961, Train Acc: 0.64400, Test Acc: 0.63583
Epoch: 004, Loss: 1.73305, Train Acc: 0.64920, Test Acc: 0.64116
Test accuracy 0.6598240469208211
Epoch: 005, Loss: 1.53655, Train Acc: 0.66690, Test Acc: 0.65982
Test accuracy 0.7030125299920021
Epoch: 006, Loss: 1.35265, Train Acc: 0.70930, Test Acc: 0.70301
Test accuracy 0.7280725139962677
Epoch: 007, Loss: 1.18455, Train Acc: 0.73510, Test Acc: 0.72807
Test accuracy 0.7422020794454812
Epoch: 008, Loss: 1.04558, Train Acc: 0.75300, Test Acc: 0.74220
Test accuracy 0.7515329245534524
Epoch: 009, Loss: 0.93804, Train Acc: 0.76180, Test Acc: 0.75153
Test accuracy 0.7632631298320448
Epoch: 010, Loss: 0.84329, Train Acc: 0.77340, Test 

Downloading https://github.com/shchur/gnn-benchmark/raw/master/data/npz/amazon_electronics_computers.npz
Processing...
Done!
