### Implementing a simple 2-layered GCN from scratch in Pytorch
Associated Paper: https://arxiv.org/pdf/1609.02907.pdf
Example Taken: 2-layered GCN on CORA Dataset

_TODO: Modularize GCN class_

In [5]:
import torch
import torch.nn.functional as F
from torch.nn import CrossEntropyLoss
import pandas as pd
import os

In [6]:
data_dir = "data/cora/"
edgelist = pd.read_csv(os.path.join(data_dir, "cora.cites"), sep='\t', header=None, names=["target", "source"])

In [7]:
column_names = ["paper_id"] + [f"term_{idx}" for idx in range(1433)] + ["subject"]
papers = pd.read_csv(
    os.path.join(data_dir, "cora.content"), sep="\t", header=None, names=column_names,
)
print("Papers shape:", papers.shape)

Papers shape: (2708, 1435)


In [8]:
# Paper Index -> Id & Vice Verca
paper_idx_to_id = {idx_: id_ for idx_, id_ in enumerate(list(set(papers["paper_id"])))}

# Paper Id -> Index
paper_id_to_idx = {id_: idx_ for idx_, id_ in paper_idx_to_id.items()}

In [9]:
# Calculate Normalized Adjacency Matrix -> A_HAT

# Create an empty Adjacency matrix
A = torch.zeros(papers.shape[0], papers.shape[0])

# Fill the adjacency matrix wherever there is an edge
for pair in edgelist.values:
    A[paper_id_to_idx[pair[0]], paper_id_to_idx[pair[1]]] = 1

# Create an Identity Matrix for A
I = torch.eye(A.shape[0])

# A_TILDA = A + I
A_TILDA = A + I

# Create the Inverse Squared Diagonal Matrix of A_TILDA
D_TILDA_INVERSE_SQUARED = torch.zeros_like(A_TILDA)

for i in range(len(A_TILDA)):
    D_TILDA_INVERSE_SQUARED[i, i] = A_TILDA[i].sum().pow(-0.5)

# Finally A_HAT = D_TILDA_INVERSE_SQUARED @ A_TILDA @ D_TILDA_INVERSE_SQUARED
A_HAT = D_TILDA_INVERSE_SQUARED @ A_TILDA @ D_TILDA_INVERSE_SQUARED

In [10]:
# Create a two layered GCN model
A_HAT.shape

torch.Size([2708, 2708])

In [11]:
# CREATE X & Y: Preparing Dataset
dataset = papers.values
pairwise_indices = [x[0] for x in dataset]
X_PRE = []
Y_PRE = []
for i in range(len(dataset)):
    idx_ = pairwise_indices.index(paper_idx_to_id[i])    
    X_PRE.append(list(dataset[idx_][1:-1]))
    Y_PRE.append(dataset[idx_][-1])

X = torch.tensor(X_PRE, dtype=torch.float32)

# Convert Y to Onehot

y_idx_to_label = {idx_: label_ for idx_, label_ in enumerate(set(Y_PRE))}
y_label_to_idx = {label_: idx_ for idx_, label_ in y_idx_to_label.items()}

Y_ONE_HOT = torch.zeros(len(Y_PRE), len(set(Y_PRE)))
for idx_ in range(len(Y_PRE)):
    row = torch.zeros(7)
    row[y_label_to_idx[Y_PRE[idx_]]] = 1
    Y_ONE_HOT[idx_] = row

In [12]:
# Define Model Weights
feature_vector_size = X.shape[1]
hidden_layer_size = 100
output_size = len(set(Y_PRE))

# Weights at layer 0, 1
W_0 = torch.randn(feature_vector_size, hidden_layer_size, dtype=torch.float32, requires_grad=True)
W_1 = torch.randn(hidden_layer_size, output_size, dtype=torch.float32, requires_grad=True)

In [13]:
epochs = 1000
lr = 0.01

for epoch in range(epochs):
    # DO FORWARD PASS
    # Calculate first hidden layer outputs
    H_1 = ((A_HAT @ X) @ W_0).relu()  # H_1 reduces feature vector space from 1433 -> 100

    # Calculate Output from our 2-layer GCN
    O = ((A_HAT @ H_1) @ W_1).relu()

    # Calculate loss
    loss = CrossEntropyLoss()
    model_loss = loss(O, Y_ONE_HOT)

    print(f"Epoch: {epoch}; Model Loss: {model_loss.item()}")

    W_0.grad = None
    W_1.grad = None

    # DO BACKWARD PASS
    model_loss.backward()
    W_0.data -= lr * W_0.grad
    W_1.data -= lr * W_1.grad

Epoch: 0; Model Loss: 36.43867874145508
Epoch: 1; Model Loss: 34.91018295288086
Epoch: 2; Model Loss: 33.516510009765625
Epoch: 3; Model Loss: 32.23931884765625
Epoch: 4; Model Loss: 31.07048797607422
Epoch: 5; Model Loss: 29.99709701538086
Epoch: 6; Model Loss: 29.00690460205078
Epoch: 7; Model Loss: 28.082691192626953
Epoch: 8; Model Loss: 27.219947814941406
Epoch: 9; Model Loss: 26.417091369628906
Epoch: 10; Model Loss: 25.66851043701172
Epoch: 11; Model Loss: 24.967445373535156
Epoch: 12; Model Loss: 24.306983947753906
Epoch: 13; Model Loss: 23.686738967895508
Epoch: 14; Model Loss: 23.102771759033203
Epoch: 15; Model Loss: 22.555030822753906
Epoch: 16; Model Loss: 22.033605575561523
Epoch: 17; Model Loss: 21.530256271362305
Epoch: 18; Model Loss: 21.041397094726562
Epoch: 19; Model Loss: 20.568418502807617
Epoch: 20; Model Loss: 20.11102867126465
Epoch: 21; Model Loss: 19.671627044677734
Epoch: 22; Model Loss: 19.24673080444336
Epoch: 23; Model Loss: 18.8330135345459
Epoch: 24; Mo

In [1]:
import torch
import numpy as np

In [5]:
n_hidden_neurons = 100
vocab_size = 25

In [3]:
np.random.seed(0)
g = torch.Generator().manual_seed(2147483647)

<torch._C.Generator at 0x113173090>

In [18]:
torch_a = torch.randn(
    n_hidden_neurons,
    vocab_size,

)

In [19]:
torch_a

tensor([[-0.6007, -0.0498, -0.0094,  ..., -1.3090,  0.1370, -0.4259],
        [-1.6263,  0.3625, -0.0152,  ...,  0.0912,  0.0984,  0.6065],
        [ 0.5099,  1.9434,  1.4643,  ..., -0.5361,  0.7526, -0.9205],
        ...,
        [ 0.4078,  1.2127, -1.1502,  ..., -0.4776, -0.6093, -1.1972],
        [ 1.2646,  1.0890, -0.8633,  ..., -0.0072, -0.8836, -0.0283],
        [-0.2326,  0.5926, -0.1806,  ...,  0.0436, -1.6337, -0.1688]])

In [8]:
np_a = np.random.randn(n_hidden_neurons, vocab_size)

In [9]:
np_a

array([[ 1.76405235,  0.40015721,  0.97873798, ...,  0.8644362 ,
        -0.74216502,  2.26975462],
       [-1.45436567,  0.04575852, -0.18718385, ...,  0.77749036,
        -1.61389785, -0.21274028],
       [-0.89546656,  0.3869025 , -0.51080514, ...,  1.13940068,
        -1.23482582,  0.40234164],
       ...,
       [-0.54735557, -0.55079943,  0.7920415 , ..., -0.47360406,
         0.30365647,  1.03395699],
       [ 1.90934263,  1.66387312,  0.90082276, ..., -2.31377311,
        -0.8425717 , -1.54292145],
       [-0.40176374, -0.4152314 , -0.67366417, ...,  1.13638808,
         0.67161657, -0.97416744]])