In [None]:
# !clear
# !pip install pytorch-lightning
# !pip install torch-geometric

In [1]:
import time
import argparse
import numpy as np

import torch
import torch.nn.functional as F
import torch.optim as optim

from pygcn.utils import load_data, accuracy
from pygcn.models import GCN

import pytorch_lightning as pl
from pytorch_lightning.loggers import TensorBoardLogger

from torch_geometric.datasets import Planetoid
from torch_geometric.utils import k_hop_subgraph
from tqdm.notebook import tqdm

In [2]:
seed = 42

np.random.seed(seed)
torch.manual_seed(seed)
torch.mps.manual_seed(seed)

In [3]:
class CORA_Dataset(torch.utils.data.Dataset):
    def __init__(self):
        super(CORA_Dataset).__init__()
        # self.data = Planetoid(root='./cora/', name='cora')[0]
        self.data = Planetoid(root='datasets/cora/', name='cora')[0]

        # scaler = StandardScaler()
        # self.data.x[self.data.train_mask] = torch.FloatTensor(scaler.fit_transform(self.data.x[self.data.train_mask]))
        # self.data.x[self.data.val_mask] = torch.FloatTensor(scaler.transform(self.data.x[self.data.val_mask]))
        # self.data.x[self.data.test_mask] = torch.FloatTensor(scaler.transform(self.data.x[self.data.test_mask]))

        # self.data.x = self.data.x.to(torch.float32)
        

        
    def __len__(self):
        return 1
    
    
    def __getitem__(self, idx):
        return self.data.x, self.data.edge_index, self.data.y,\
               self.data.train_mask | self.data.val_mask, self.data.test_mask
    
dataset = CORA_Dataset()
# dataloader = torch.utils.data.DataLoader(dataset, batch_size=1, shuffle=False)

In [4]:
dataset.data

Data(x=[2708, 1433], edge_index=[2, 10556], y=[2708], train_mask=[2708], val_mask=[2708], test_mask=[2708])

In [5]:
torch.__version__

'2.2.0'

In [6]:
from torch_geometric.loader import NeighborLoader

In [7]:
loader = NeighborLoader(
    dataset.data,
    # Sample 30 neighbors for each node for 2 iterations
    num_neighbors=[30] * 2,
    # Use a batch size of 128 for sampling training nodes
    batch_size=128,
    input_nodes=dataset.data.train_mask,
)

In [8]:
for i in loader:
    print(i)
    break

Data(x=[1504, 1433], edge_index=[2, 3324], y=[1504], train_mask=[1504], val_mask=[1504], test_mask=[1504], n_id=[1504], e_id=[3324], num_sampled_nodes=[3], num_sampled_edges=[2], input_id=[128], batch_size=128)


In [None]:
from torch_geometric.utils import degree
import matplotlib.pyplot as plt
from scipy.spatial.distance import cdist

In [None]:
deg = degree(dataset.data.edge_index[1], dataset.data.x.size(0))

In [None]:
from collections import Counter

num = 15
cnt = np.array(Counter(deg.numpy().astype(int)).most_common(num))
plt.bar(cnt[:, 0], cnt[:, 1])

In [None]:
# def kNN(x, i, local_nbhood, k):
#     dists = cdist([x[i].numpy()], x)[0]
#     dists[local_nbhood] = np.inf
#     ind = dists.argsort()[:k]
#     return torch.tensor(ind)

In [None]:
def build_sheaf_laplacian(dataset, d, is_self_loops=True):
    x, edge_index, _, _, _ = dataset[0]

    n = x.size(1)
    
    O_matrices = torch.empty(x.size(0), n, d)

    dists = cdist(x, x)
    
    for i in tqdm(range(x.size(0))):
        local_nbhood = k_hop_subgraph(i, 1, edge_index, relabel_nodes=False)[0]
        if len(local_nbhood) != d:
            dists_i = dists[i].copy()
            if len(local_nbhood) < d:
                dists_i[local_nbhood] = np.inf
            else:
                dists_i[~local_nbhood] = np.inf
            ind = dists_i.argsort()[: d if len(local_nbhood) > d else d - local_nbhood.size(0)]
            nearests = torch.tensor(ind)
            if local_nbhood.size(0) < d:
                local_nbhood = torch.concat([local_nbhood, nearests])
            else:
                local_nbhood = nearests
            


        # if len(local_nbhood) < d:
        #     dists_i = dists[i].copy()
        #     dists_i[local_nbhood] = np.inf
        #     ind = dists_i.argsort()[:d - local_nbhood.size(0)]
        #     nearests = torch.tensor(ind) # kNN(x, i, local_nbhood, k=d-len(local_nbhood))
        #     local_nbhood = torch.concat([local_nbhood, nearests])
        # elif len(local_nbhood) > d:
        #     local_nbhood = local_nbhood[local_nbhood != i][:d]


        # print(x[local_nbhood].size())
        U, _, _ = np.linalg.svd(x[local_nbhood].T) 
        # print(U.shape)
        O_matrices[i] = torch.from_numpy(U[:, :d]) # n x d
        
    sheaf_laplacian = torch.empty(edge_index.size(1), n, n)
        
    for k in tqdm(range(edge_index.size(1))):
        i, j = edge_index[:, k]
        mul = torch.matmul(O_matrices[i], O_matrices[j].T) # n x n
        U, _, V_T = np.linalg.svd(mul)
        sheaf_laplacian[k] = torch.tensor(np.dot(U, V_T))
        
    if is_self_loops:
        self_laplac = torch.concat([torch.eye(n).unsqueeze(0) for _ in range(x.size(0))])
        sheaf_laplacian = torch.concat([sheaf_laplacian, self_laplac], axis=0)
        
    return sheaf_laplacian
    
        
        

In [None]:
nfeat = dataset.data.x.size(1)
hidden = 16 #16
nclass = dataset.data.y.max().item() + 1
dropout = 0.5
sheaf_laplacian = build_sheaf_laplacian(dataset, hidden).to(torch.float32)

weight_decay = 5e-4

In [None]:
torch.save(sheaf_laplacian, f'weights/sheaf_laplacian_cora_{hidden}.pt')

In [None]:
# sheaf_laplacian = torch.load('sheaf_laplacian.pt')

In [None]:
device = "cpu"

In [None]:
class GCN_module(pl.LightningModule):
    def __init__(self, learning_rate=0.01):
        super(GCN_module, self).__init__()
        self.model = GCN(nfeat=nfeat,
                         nhid=hidden,
                         nclass=nclass,
                         dropout=dropout,
                         sheaf_laplacian=sheaf_laplacian.to(device))
        self.model.to(device)
        
        self.learning_rate = learning_rate
        self.loss_fn = F.nll_loss
        
    def configure_optimizers(self):
        optim = torch.optim.Adam(self.parameters(), lr=self.learning_rate,
                           weight_decay=weight_decay)
        return optim
    
    def forward(self, features, adj):
        return self.model(features, adj)
    
    
    def training_step(self, train_batch, batch_idx):
        x, edge_index, y, train_mask, val_mask = train_batch
        x, edge_index, y = x[0], edge_index[0], y[0]
        train_mask, val_mask = train_mask[0], val_mask[0]
        
        output = self.model(x, edge_index)
        
        loss_train = self.loss_fn(output[train_mask], y[train_mask])
        acc_train = accuracy(output[train_mask], y[train_mask])
        
        loss_val = self.loss_fn(output[val_mask], y[val_mask])
        acc_val = accuracy(output[val_mask], y[val_mask])
        
        self.log("loss_train", loss_train, prog_bar=True)
        self.log("acc_train", acc_train, prog_bar=True)
        
        self.log("loss_val", loss_val, prog_bar=True)
        self.log("acc_val", acc_val, prog_bar=True)
        
        return loss_train
        
    def validation_step(self, val_batch, batch_idx):
        pass
        

In [None]:
# !rm -r lightning_logs

In [None]:
# checkpoint_callback = pl.callbacks.ModelCheckpoint(monitor="acc_val", mode="max")

trainer = pl.Trainer(max_epochs=400, accelerator=device)#, callbacks=checkpoint_callback, )

In [None]:
module = GCN_module().to(device)

trainer.fit(module, dataloader, dataloader)

**

In [None]:
!rm -r runs

In [None]:
writer = SummaryWriter()

In [None]:
adj, features, labels, idx_train, idx_val, idx_test = load_data('pygcn/data/cora/')

In [None]:
hidden = 16
dropout = 0.5
lr = 0.01
weight_decay = 5e-4
device = "cpu"

model = GCN(nfeat=features.shape[1],
            nhid=hidden,
            nclass=labels.max().item() + 1,
            dropout=dropout)

optimizer = optim.Adam(model.parameters(), lr=lr, weight_decay=weight_decay)

model.to(device)
features = features.to(device)
adj = adj.to(device)
labels = labels.to(device)
idx_train = idx_train.to(device)
idx_val = idx_val.to(device)
idx_test = idx_test.to(device)


In [None]:
def train(epoch):
    t = time.time()
    model.train()
    optimizer.zero_grad()
    output = model(features, adj)
    loss_train = F.nll_loss(output[idx_train], labels[idx_train])
    # log
    # writer.add_scalar("Loss/train", loss_train, epoch)
    acc_train = accuracy(output[idx_train], labels[idx_train])
    loss_train.backward()
    optimizer.step()

    # if not args.fastmode:
    #     # Evaluate validation set performance separately,
    #     # deactivates dropout during validation run.
    #     model.eval()
    #     output = model(features, adj)

    loss_val = F.nll_loss(output[idx_val], labels[idx_val])
    acc_val = accuracy(output[idx_val], labels[idx_val])
    print('Epoch: {:04d}'.format(epoch+1),
          'loss_train: {:.4f}'.format(loss_train.item()),
          'acc_train: {:.4f}'.format(acc_train.item()),
          'loss_val: {:.4f}'.format(loss_val.item()),
          'acc_val: {:.4f}'.format(acc_val.item()),
          'time: {:.4f}s'.format(time.time() - t))
    
    
    
    

def test():
    model.eval()
    output = model(features, adj)
    loss_test = F.nll_loss(output[idx_test], labels[idx_test])
    acc_test = accuracy(output[idx_test], labels[idx_test])
    print("Test set results:",
          "loss= {:.4f}".format(loss_test.item()),
          "accuracy= {:.4f}".format(acc_test.item()))


In [None]:
epochs = 200

t_total = time.time()
for epoch in range(epochs):
    train(epoch)
    
writer.flush()


print("Optimization Finished!")
print("Total time elapsed: {:.4f}s".format(time.time() - t_total))

In [None]:
writer.close()

In [None]:
# tensorboard --logdir=runs

In [None]:
# Testing
test()