In [1]:
import os
import time
import math
import copy
import random
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader, Dataset, TensorDataset
from torch.utils.data.sampler import SubsetRandomSampler
import numpy as np
import scipy
import scipy.stats as st
from numpy.linalg import eig, eigh
from sklearn.metrics import roc_auc_score, mean_absolute_error, accuracy_score

import networkx as nx
import torch_geometric
from torch_geometric.datasets import Planetoid, Coauthor, Flickr, PPI, GitHub, WikiCS, FacebookPagePage
from torch_geometric.utils import to_scipy_sparse_matrix, to_undirected, degree
from ogb.nodeproppred.dataset_pyg import PygNodePropPredDataset
from ogb.linkproppred.dataset_pyg import PygLinkPropPredDataset
from torch_geometric.utils import get_laplacian
from torch_geometric.transforms import ToUndirected

import time
from scipy.io import loadmat
from collections import Counter

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
def check_symmetric(a, rtol=1e-05, atol=1e-05):
    return np.allclose(a, a.T, rtol=rtol, atol=atol)

def count_parameters(model):
    return sum(p.numel() for p in model.parameters() if p.requires_grad)

def init_params(module):
    if isinstance(module, nn.Linear):
        module.weight.data.normal_(mean=0.0, std=0.01)
        if module.bias is not None:
            module.bias.data.zero_()

def mean_confidence_interval(data, confidence=0.95):
    a = 1.0 * np.array(data)
    n = len(a)
    m, se = np.mean(a), st.sem(a)
    h = se * st.t.ppf((1 + confidence) / 2., n-1)
    return m, h

def seed_everything(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    
def connected_components(sparse_adj):
    G = nx.from_scipy_sparse_matrix(sparse_adj)
    cc = nx.connected_components(G)
    
    components = []
    lens = []

    for c in cc:
        c = list(c)
        components.append(c)
        lens.append(len(c))

    return lens, components

def normalize_sparse_adj(A):
    deg = np.array(A.sum(axis=0)).flatten()
    D_ = scipy.sparse.diags(deg ** -0.5)
    A_ = D_.dot(A.dot(D_))
    L_ = scipy.sparse.eye(adj.shape[0]) - A_
    return L_

def normalize_adj(adj):
    """ Symmetrically normalize adjacency matrix."""
    """ Copy from https://github.com/tkipf/gcn """
    adj = sp.coo_matrix(adj)
    rowsum = np.array(adj.sum(1))
    d_inv_sqrt = np.power(rowsum, -0.5).flatten()
    d_inv_sqrt[np.isinf(d_inv_sqrt)] = 0.
    d_mat_inv_sqrt = sp.diags(d_inv_sqrt)
    return adj.dot(d_mat_inv_sqrt).transpose().dot(d_mat_inv_sqrt).tocoo()


def Eigen(adj, d):
    
    adj = normalize_adj(adj)
    lamb, X = sp.linalg.eigs(adj, d)
    lamb, X = lamb.real, X.real
    X = X[:, np.argsort(lamb)]

    return X


def Eigen_multi(adj, d):
    """
    Handle if the graph has multiple connected components
    Arguments are the same as Eigen
    """
    G = nx.from_scipy_sparse_matrix(adj)
    comp = list(nx.connected_components(G))
    X = np.zeros((adj.shape[0],d))
    for i in range(len(comp)):
        node_index = np.array(list(comp[i]))
        d_temp = min(len(node_index) - 2, d)
        if d_temp < 1:
            continue
        adj_temp = adj[node_index,:][:,node_index].asfptype()
        X[node_index,:d_temp] = Eigen(adj_temp, d_temp)
    return X


def arxiv_split_to_mask(data, split):    
    train_mask = torch.LongTensor([0]*data.num_nodes)
    train_mask[split['train']] = 1
    train_mask = train_mask.bool()
    
    val_mask = torch.LongTensor([0]*data.num_nodes)
    val_mask[split['valid']] = 1
    val_mask = val_mask.bool()
    
    test_mask = torch.LongTensor([0]*data.num_nodes)
    test_mask[split['test']] = 1
    test_mask = test_mask.bool()
    
    data.train_mask = train_mask
    data.val_mask = val_mask
    data.test_mask = test_mask
    
    data.y = data.y.view(-1)
    
    return data

In [15]:
data = loadmat('data/Harvard1.mat')
# Counter(data['local_info'][:, 5])

In [3]:
dataset = Planetoid('data/plane', 'Pubmed')

# dataset = Coauthor('data/coauthor', 'Physics')
# dataset = Coauthor('data/coauthor', 'CS')

# dataset = FacebookPagePage('data/facebook')
# dataset = GitHub('data/github')

# dataset = Flickr('data/flickr')
# dataset = PygNodePropPredDataset('ogbn-arxiv', root='data', transform=ToUndirected())
# dataset = PPI('data/ppi')

# dataset = Amazon('data/computer', 'Computers')
# dataset = Amazon('data/photo', 'Photo')
# dataset = WikiCS('data/wiki', is_undirected=True)

# dataset = TUDataset('data/MUTAG', 'MUTAG')

# dataset = Airports('data/air', 'USA')
# dataset = Airports('data/air', 'Brazil')
# dataset = Airports('data/air', 'Europe')

In [4]:
data = dataset[0]

In [5]:
data

Data(x=[19717, 500], edge_index=[2, 88648], y=[19717], train_mask=[19717], val_mask=[19717], test_mask=[19717])

In [6]:
data.is_directed()

False

In [17]:
# Part EVD

index, attr = get_laplacian(data.edge_index, normalization='sym')
L = to_scipy_sparse_matrix(index, attr)
e, u = scipy.sparse.linalg.eigsh(L, k=100, which='SM', tol=1e-3)

In [None]:
# Full EVD

L = torch.FloatTensor(L.todense())
e, u = torch.linalg.eigh(L)

In [8]:
# detecting connected components

adj = to_scipy_sparse_matrix(data.edge_index)
lens, components = connected_components(adj)
print(lens)

AttributeError: module 'networkx' has no attribute 'from_scipy_sparse_matrix'

In [18]:
data.e = torch.FloatTensor(e)

In [19]:
data.u = torch.FloatTensor(u)

In [20]:
data

Data(x=[19717, 500], edge_index=[2, 88648], y=[19717], train_mask=[19717], val_mask=[19717], test_mask=[19717], e=[100], u=[19717, 100])

In [21]:
torch.save(data, 'data/pubmed-3.pt')

In [None]:
# prime_index = components[0]

# train_index = torch.where(data.train_mask)[0]
# valid_index = torch.where(data.val_mask)[0]
# test_index = torch.where(data.test_mask)[0]

# prime_test_index = torch.LongTensor(np.intersect1d(prime_index, test_index))

# prime_test_edge_index = torch_geometric.utils.subgraph(prime_test_index, data.edge_index, relabel_nodes=True)[0]
# prime_test_adj = to_scipy_sparse_matrix(prime_test_edge_index)
# lens, components = connected_components(prime_test_adj)