# Test Dataset to check

In [1]:
import multiprocessing
NUM_PROCESSORS=multiprocessing.cpu_count()
# print("Cpu count: ",NUM_PROCESSORS)

In [2]:
#as it turned out interactive shell (like Jupyter cannot handle CPU multiprocessing well so check which medium the code is runing)
#we will write code in Jupyter for understanding purposes but final execuation will be in shell
from ipynb.fs.full.Utils import isnotebook
from ipynb.fs.full.Dataset import get_data, generate_synthetic
import networkx as nx
from torch_geometric.utils import to_networkx, from_networkx
import torch_geometric.utils.homophily as homophily
import copy
import ipynb.fs.full.utils.MoonGraph as MoonGraph

In [3]:
import torch
import torch.nn as nn
from torch_sparse import SparseTensor
from tqdm import tqdm
import math
import time
import torch.nn.functional as F

import random
random.seed(12345)
import numpy as np
np.random.seed(12345)

In [4]:
import sklearn
from sklearn.metrics import pairwise_distances
from sklearn.metrics.pairwise import pairwise_kernels
from sklearn.metrics.pairwise import cosine_similarity, euclidean_distances
from multiprocessing.pool import ThreadPool, Pool

## Random Sparse

In [5]:
class RandomSparse():
    
    def __init__(self, data, K, log = False):
        
        self.N = N = data.num_nodes
        self.E = E = data.num_edges
        self.data = data
        self.log = log
        self.K = K
        
        self.adj = SparseTensor(
            row=data.edge_index[0], col=data.edge_index[1],
            value=torch.arange(E, device=data.edge_index.device),
            sparse_sizes=(N, N))
        
   
    def sparse(self):
        
        if self.log:
            pbar = tqdm(total=self.N)
            pbar.set_description(f'Nodes')
        
        edge_index=[]

        for u in range(self.N):    
            
            row, col, e_index = self.adj[u,:].coo()   
            indexs = np.random.randint(len(e_index), size=min(self.K, len(e_index)))
            #print(e_index, indexs)
            e_index = e_index[indexs]
            #print(e_index)
            
            edge_index.extend(e_index)
            
            if self.log:
                pbar.update(1)
        
        if self.log:
            pbar.close()
        
        edge_index = self.data.edge_index[:,edge_index]
        
        return edge_index

# Main

In [6]:
if __name__ == '__main__':  
    
    data, dataset = get_data('Cora')
    data = generate_synthetic(data, d=100, h=0.25, train=0.1, random_state=None, log=False)
    print(data)
    
    print("Node Homophily:", homophily(data.edge_index, data.y, method='node'))
    print("Edge Homophily:", homophily(data.edge_index, data.y, method='edge'))
    print("Edge_insensitive Homophily:", homophily(data.edge_index, data.y, method='edge_insensitive'))    
    print('Degree: ', data.num_edges / data.num_nodes)
    rand_sparse = RandomSparse(data, K = 3, log = True)
    
    start = time.time()    
    edge_index = rand_sparse.sparse()
    end = time.time()
    print("Execution time: ", end-start)
    
    print("Node Homophily:", homophily(edge_index, data.y, method='node'))
    print("Edge Homophily:", homophily(edge_index, data.y, method='edge'))
    print("Edge_insensitive Homophily:", homophily(edge_index, data.y, method='edge_insensitive'))    
    print('Sparse Degree: ', edge_index.shape[1] / data.num_nodes)
    
    None

Data directory:  /scratch/gilbreth/das90/Dataset/
Result directory: /scratch/gilbreth/das90/Dataset/RESULTS/

Dataset: Cora():
Number of graphs: 1
Number of features: 1433
Number of classes: 7

Data(x=[2708, 1433], edge_index=[2, 10556], y=[2708], train_mask=[2708], val_mask=[2708], test_mask=[2708])
Number of nodes: 2708
Number of edges: 10556
Average node degree: 3.90
Number of training nodes: 140
Training node label rate: 0.05
Has isolated nodes: False
Has self-loops: False
Is undirected: True
Data(x=[2708, 1433], edge_index=[2, 270800], y=[2708], train_mask=[2708], val_mask=[2708], test_mask=[2708])
Node Homophily: 0.2527381181716919
Edge Homophily: 0.25
Edge_insensitive Homophily: 0.11672463268041611
Degree:  100.0


Nodes: 100%|██████████| 2708/2708 [00:00<00:00, 5065.78it/s]

Execution time:  0.5447683334350586
Node Homophily: 0.24445265531539917
Edge Homophily: 0.25886261463165283
Edge_insensitive Homophily: 0.12290727347135544
Sparse Degree:  3.0





In [7]:
# data, dataset = get_data('Cora', log= False)
# data = generate_synthetic(data, d=5, h=0.25, train=0.1, random_state=1, log=True)
# print(data)

# print("Node Homophily:", homophily(data.edge_index, data.y, method='node'))
# print("Edge Homophily:", homophily(data.edge_index, data.y, method='edge'))
# print("Edge_insensitive Homophily:", homophily(data.edge_index, data.y, method='edge_insensitive'))    

In [8]:
# submodular_weight = KNNWeight(data, 'euclidean')
# S_G, S_edge = submodular_weight.lazy_greedy_weight(0); print(S_G); print(S_edge);
# print(data.edge_index[:,S_edge])    
# neighbors = data.edge_index[:,S_edge][1]
# print(neighbors)
# print(data.y[0])
# print(data.y[neighbors])