In [1]:
import random
import multiprocessing
import pandas as pd
import os
import torch

NUM_GPUS=0

try:
    if torch.cuda.is_available():  
        device = torch.device("cuda")
        NUM_GPUS=torch.cuda.device_count()
        print('There are %d GPU(s) available.' % NUM_GPUS)
        print('We will use the GPU:', torch.cuda.get_device_name())# If not...
    else:
        print('No GPU available, using the CPU instead.')
        device = torch.device("cpu")  
except:
    print('Cuda error using CPU instead.')
    device = torch.device("cpu")  
    
print(device)

# device = torch.device("cpu")  
# print(device)

NUM_PROCESSORS=multiprocessing.cpu_count()
print("Cpu count: ",NUM_PROCESSORS)

There are 2 GPU(s) available.
We will use the GPU: Tesla P100-PCIE-16GB
cuda
Cpu count:  20


In [2]:
from pathlib import Path

if os.uname()[1].find('gilbreth')==0: ##if not darwin(mac/locallaptop)
    DIR='/scratch/gilbreth/das90/Dataset/'
elif os.uname()[1].find('unimodular')==0:
    DIR='/scratch2/das90/Dataset/'
elif os.uname()[1].find('Siddharthas')==0:
    DIR='/Users/siddharthashankardas/Purdue/Dataset/'  
else:
    DIR='./Dataset/'
    
Path(DIR).mkdir(parents=True, exist_ok=True)

RESULTS_DIR=DIR+'RESULTS/'
Path(RESULTS_DIR).mkdir(parents=True, exist_ok=True)

print("Data directory: ", DIR)
print("Result directory:", RESULTS_DIR)

Data directory:  /scratch/gilbreth/das90/Dataset/
Result directory: /scratch/gilbreth/das90/Dataset/RESULTS/


## Packages

In [3]:
import torch.nn as nn
import torch.nn.functional as F
import numpy as np
from torch.nn import init
from random import shuffle, randint
import torch.nn.functional as F
from torch_geometric.datasets import Reddit, PPI, Planetoid
from itertools import combinations, combinations_with_replacement
from sklearn.metrics import f1_score, accuracy_score
from sklearn.decomposition import TruncatedSVD
import matplotlib.pyplot as plt
import sys
from torch_geometric.data import Data
import logging
import time


## Dataset

In [4]:
from torch_geometric.datasets import Planetoid
from torch_geometric.transforms import NormalizeFeatures
from torch_geometric.datasets import Reddit, Reddit2, Flickr, Yelp, AmazonProducts, PPI,  OGB_MAG,  FakeDataset


In [5]:
#"Cora", "CiteSeer", "PubMed", "Reddit", "Flickr", 
#"Yelp", "AmazonProducts","Reddit2"," OGB_MAG"
#"Fake"

In [6]:
DATASET_NAME='Cora' 

def get_data(DATASET_NAME='Cora'):

    if DATASET_NAME in ["Cora", "CiteSeer", "PubMed"]:
        dataset = Planetoid(root=DIR+'Planetoid', name=DATASET_NAME, transform=NormalizeFeatures())

    elif DATASET_NAME == "Reddit2":
        dataset = Reddit2(root=DIR+'Reddit2', transform=NormalizeFeatures())

    elif DATASET_NAME == "Reddit":
        dataset = Reddit(root=DIR+'Reddit', transform=NormalizeFeatures())
        
    elif DATASET_NAME == "Flickr":
        dataset = Flickr(root=DIR+'Flickr', transform=NormalizeFeatures())
    
    elif DATASET_NAME == "Yelp":
        dataset = Yelp(root=DIR+'Yelp', transform=NormalizeFeatures())
    
    elif DATASET_NAME == "AmazonProducts":
        dataset = AmazonProducts(root=DIR+'AmazonProducts', transform=NormalizeFeatures())

    else:    
        raise Exception('dataset not found')

    print()
    print(f'Dataset: {dataset}:')
    print('======================')
    print(f'Number of graphs: {len(dataset)}')
    print(f'Number of features: {dataset.num_features}')
    print(f'Number of classes: {dataset.num_classes}')

    data = dataset[0]  # Get the first graph object.
    
    data.y=F.one_hot(data.y, num_classes=dataset.num_classes)

    print()
    print(data)
    print('===========================================================================================================')

    # Gather some statistics about the graph.
    print(f'Number of nodes: {data.num_nodes}')
    print(f'Number of edges: {data.num_edges}')
    print(f'Average node degree: {data.num_edges / data.num_nodes:.2f}')
    print(f'Number of training nodes: {data.train_mask.sum()}')
    print(f'Training node label rate: {int(data.train_mask.sum()) / data.num_nodes:.2f}')
    print(f'Has isolated nodes: {data.has_isolated_nodes()}')
    print(f'Has self-loops: {data.has_self_loops()}')
    print(f'Is undirected: {data.is_undirected()}')
    
    return data, dataset

#data, dataset = get_data(DATASET_NAME)

### GCN model

In [7]:
import os
import torch
import torch.distributed as dist
import torch.multiprocessing as mp
import torch.nn.functional as F
from torch.nn.parallel import DistributedDataParallel
from tqdm import tqdm

from torch_geometric.datasets import Reddit
from torch_geometric.loader import NeighborSampler
from torch_geometric.nn import SAGEConv
from torch_geometric.nn import GCNConv

In [8]:
from sklearn.metrics import accuracy_score, label_ranking_average_precision_score

In [9]:
#https://www.arangodb.com/2021/08/a-comprehensive-case-study-of-graphsage-using-pytorchgeometric/

class SAGE(torch.nn.Module):
    def __init__(self, in_channels, out_channels, hidden_channels, num_layers=2):
        super().__init__()
        torch.manual_seed(1234567)
        self.num_layers = num_layers

        self.convs = torch.nn.ModuleList()
        self.convs.append(SAGEConv(in_channels, hidden_channels))
        for _ in range(self.num_layers - 2):
            self.convs.append(SAGEConv(hidden_channels, hidden_channels))
        self.convs.append(SAGEConv(hidden_channels, out_channels))

    def forward(self, x, adjs):
        for i, (edge_index, _, size) in enumerate(adjs):
            x_target = x[:size[1]]  # Target nodes are always placed first.
            x = self.convs[i]((x, x_target), edge_index)
            if i != self.num_layers - 1:
                x = F.relu(x)
                #x = F.dropout(x, p=0.5, training=self.training)
                x = F.dropout(x, p=0.2, training=self.training)
        return x.log_softmax(dim=-1)

    @torch.no_grad()
    def inference(self, x_all, device, subgraph_loader):
        pbar = tqdm(total=x_all.size(0) * self.num_layers)
        pbar.set_description('Evaluating')

        for i in range(self.num_layers):
            xs = []
            for batch_size, n_id, adj in subgraph_loader:
                edge_index, _, size = adj.to(device)
                x = x_all[n_id].to(device)
                x_target = x[:size[1]]
                x = self.convs[i]((x, x_target), edge_index)
                if i != self.num_layers - 1:
                    x = F.relu(x)
                xs.append(x.cpu())

                pbar.update(batch_size)

            x_all = torch.cat(xs, dim=0)

        pbar.close()

        return x_all

In [10]:
def train(model, data, epochs=100, train_neighbors=[25,10]):
    
    optimizer = torch.optim.Adam(model.parameters(), lr=0.01, weight_decay=5e-4)
    #criterion = torch.nn.CrossEntropyLoss()
    criterion = nn.MultiLabelSoftMarginLoss()
    #criterion = nn.BCEWithLogitsLoss()
    
    print("Train neighbors: ", train_neighbors)
    
    train_idx = data.train_mask.nonzero(as_tuple=False).view(-1)
#     train_loader = NeighborSampler(data.edge_index, node_idx=train_idx,
#                                    sizes=[25, 10], batch_size=1024,
#                                    shuffle=True, num_workers=0)
    
    train_loader = NeighborSampler(data.edge_index, node_idx=train_idx,
                                   sizes=train_neighbors, batch_size=1024,
                                   shuffle=True, num_workers=0)    
    
    subgraph_loader = NeighborSampler(data.edge_index, node_idx=None,
                                          sizes=[-1], batch_size=2048,
                                          shuffle=False, num_workers=6)
    
    x, y = data.x.to(device), data.y.to(device)
    data.train_mask.to(device)
    data.val_mask.to(device)
    data.test_mask.to(device)
    
    
    best_acc=0
    best_sk=0
    for epoch in range(1,epochs+1):
        
        pbar = tqdm(total=train_idx.size(0))
        pbar.set_description(f'Epoch {epoch:02d}')
        
        total_loss = total_correct = 0
        total_sample=0
        total_acc = 0 
        model.train()
        for batch_size, n_id, adjs in train_loader:
            adjs = [adj.to(device) for adj in adjs]

            optimizer.zero_grad()
            out = model(x[n_id], adjs)
            #loss = F.nll_loss(out, y[n_id[:batch_size]])
            loss = criterion(out, y[n_id[:batch_size]])
            loss.backward()
            optimizer.step()
            
            
        
        
            total_loss += float(loss)
            
            
#             tolerance=0.10
#             bin_out = torch.sigmoid(out)
            
#             print(bin_out)
            
#             threshold = torch.max(bin_out,dim=1).values.view(-1,1)-tolerance            
#             bin_out=(bin_out>=threshold).type(torch.long)
            
#             print(bin_out)
#             print(threshold)

#             total_acc+=accuracy_score(y[n_id[:batch_size]].detach().cpu().numpy(),bin_out.detach().cpu().numpy())*batch_size
            
            bin_out = torch.sigmoid(out)
            total_acc+=label_ranking_average_precision_score(y[n_id[:batch_size]].detach().cpu().numpy(),bin_out.detach().cpu().numpy())*batch_size
            
            total_sample+=batch_size
            
            total_correct += int(out.argmax(dim=-1).eq(y[n_id[:batch_size]].argmax(dim=-1)).sum())
            
            #total_correct += int(out.argmax(dim=-1).eq(y[n_id[:batch_size]]).sum())
            pbar.update(batch_size)

        pbar.close()

        loss = total_loss / len(train_loader)
        approx_acc = total_correct / train_idx.size(0)
        
        print("Multi-Accuracy: ", total_acc/total_sample)
        print(f'Epoch: {epoch:03d}, Training Loss: {loss:.4f}, Training Accuracy: {approx_acc:.4f}')
                
        ####EVALUATION
        if epoch>0 and epoch % 5 == 0:
            model.eval()
            with torch.no_grad():
                out = model.inference(x, device, subgraph_loader)
            #res = out.argmax(dim=-1) == data.y
            res = out.argmax(dim=-1) == data.y.argmax(dim=-1)
            train_acc = int(res[data.train_mask].sum()) / int(data.train_mask.sum())
            val_acc = int(res[data.val_mask].sum()) / int(data.val_mask.sum())
            test_acc = int(res[data.test_mask].sum()) / int(data.test_mask.sum())            
            
            
#             #tolerance=0.10
#             bin_out = torch.sigmoid(out)
#             threshold = torch.max(bin_out,dim=1).values.view(-1,1)-tolerance            
#             bin_out=(bin_out>=threshold).type(torch.long)
#             #print(bin_out[data.train_mask].detach().cpu().numpy())
            
#             sk_train=accuracy_score(y[data.train_mask].detach().cpu().numpy(),bin_out[data.train_mask].detach().cpu().numpy())
#             sk_val=accuracy_score(y[data.val_mask].detach().cpu().numpy(),bin_out[data.val_mask].detach().cpu().numpy())
#             sk_test=accuracy_score(y[data.test_mask].detach().cpu().numpy(),bin_out[data.test_mask].detach().cpu().numpy())
            
            
            bin_out = torch.sigmoid(out)
            sk_train=label_ranking_average_precision_score(y[data.train_mask].detach().cpu().numpy(),bin_out[data.train_mask].detach().cpu().numpy())
            sk_val=label_ranking_average_precision_score(y[data.val_mask].detach().cpu().numpy(),bin_out[data.val_mask].detach().cpu().numpy())
            sk_test=label_ranking_average_precision_score(y[data.test_mask].detach().cpu().numpy(),bin_out[data.test_mask].detach().cpu().numpy())

            
            print(f'Epoch: {epoch:03d}, Loss: {loss:.4f}, Train: {train_acc:.4f}, Val: {val_acc:.4f}, Test: {test_acc:.4f}')
        
            print(f'Epoch: {epoch:03d}, Mult-Train: {sk_train:.4f}, Mult-Val: {sk_val:.4f}, Mult-Test: {sk_test:.4f}')
        
            if test_acc>best_acc:
                best_acc=test_acc
                
            if sk_test>best_sk:
                best_sk=sk_test

    print ("Best Test Accuracy, ",best_acc)
    print ("Best Test Sk Accuracy, ",best_sk)
    
    return model


In [11]:
def GSAGEperformance(data, dataset, epochs=20, train_neighbors=[25,10]):
    model = SAGE(dataset.num_features, dataset.num_classes, hidden_channels=256).to(device)        
    print(model)
    
    train(model, data, epochs, train_neighbors)
    
    return

### Main function

In [12]:
# data, dataset = get_data(DATASET_NAME)
# GSAGEperformance(data, dataset, epochs=10)

In [13]:
if __name__ == '__main__':    
    data, dataset = get_data(DATASET_NAME)    
    GSAGEperformance(data, dataset, epochs=20)
    None
    


Dataset: Cora():
Number of graphs: 1
Number of features: 1433
Number of classes: 7

Data(x=[2708, 1433], edge_index=[2, 10556], y=[2708, 7], train_mask=[2708], val_mask=[2708], test_mask=[2708])
Number of nodes: 2708
Number of edges: 10556
Average node degree: 3.90
Number of training nodes: 140
Training node label rate: 0.05
Has isolated nodes: False
Has self-loops: False
Is undirected: True
SAGE(
  (convs): ModuleList(
    (0): SAGEConv(1433, 256)
    (1): SAGEConv(256, 7)
  )
)
Train neighbors:  [25, 10]


Epoch 01: 100%|██████████| 140/140 [00:00<00:00, 468.42it/s]


Multi-Accuracy:  0.3721938775510207
Epoch: 001, Training Loss: 0.4116, Training Accuracy: 0.1429


Epoch 02: 100%|██████████| 140/140 [00:00<00:00, 6369.90it/s]


Multi-Accuracy:  0.6002551020408166
Epoch: 002, Training Loss: 0.4089, Training Accuracy: 0.4286


Epoch 03: 100%|██████████| 140/140 [00:00<00:00, 6422.50it/s]


Multi-Accuracy:  0.831190476190476
Epoch: 003, Training Loss: 0.4060, Training Accuracy: 0.7143


Epoch 04: 100%|██████████| 140/140 [00:00<00:00, 6421.51it/s]


Multi-Accuracy:  0.9357142857142855
Epoch: 004, Training Loss: 0.4023, Training Accuracy: 0.8857


Epoch 05: 100%|██████████| 140/140 [00:00<00:00, 6609.89it/s]


Multi-Accuracy:  0.9299999999999999
Epoch: 005, Training Loss: 0.3971, Training Accuracy: 0.8786


Evaluating: 100%|██████████| 5416/5416 [00:00<00:00, 7740.68it/s]


Epoch: 005, Loss: 0.3971, Train: 0.9500, Val: 0.7100, Test: 0.6930
Epoch: 005, Mult-Train: 0.9750, Mult-Val: 0.8208, Mult-Test: 0.8141


Epoch 06: 100%|██████████| 140/140 [00:00<00:00, 5874.96it/s]


Multi-Accuracy:  0.9505952380952379
Epoch: 006, Training Loss: 0.3920, Training Accuracy: 0.9143


Epoch 07: 100%|██████████| 140/140 [00:00<00:00, 6161.95it/s]


Multi-Accuracy:  0.9678571428571429
Epoch: 007, Training Loss: 0.3842, Training Accuracy: 0.9429


Epoch 08: 100%|██████████| 140/140 [00:00<00:00, 6363.41it/s]


Multi-Accuracy:  0.9714285714285714
Epoch: 008, Training Loss: 0.3761, Training Accuracy: 0.9429


Epoch 09: 100%|██████████| 140/140 [00:00<00:00, 6585.72it/s]


Multi-Accuracy:  0.9726190476190477
Epoch: 009, Training Loss: 0.3673, Training Accuracy: 0.9500


Epoch 10: 100%|██████████| 140/140 [00:00<00:00, 6572.08it/s]


Multi-Accuracy:  0.9559523809523808
Epoch: 010, Training Loss: 0.3584, Training Accuracy: 0.9214


Evaluating: 100%|██████████| 5416/5416 [00:00<00:00, 7991.99it/s]


Epoch: 010, Loss: 0.3584, Train: 0.9929, Val: 0.7440, Test: 0.7590
Epoch: 010, Mult-Train: 0.9964, Mult-Val: 0.8439, Mult-Test: 0.8559


Epoch 11: 100%|██████████| 140/140 [00:00<00:00, 6361.55it/s]


Multi-Accuracy:  0.9738095238095237
Epoch: 011, Training Loss: 0.3451, Training Accuracy: 0.9500


Epoch 12: 100%|██████████| 140/140 [00:00<00:00, 6707.05it/s]


Multi-Accuracy:  0.975
Epoch: 012, Training Loss: 0.3341, Training Accuracy: 0.9500


Epoch 13: 100%|██████████| 140/140 [00:00<00:00, 6724.57it/s]


Multi-Accuracy:  0.9595238095238096
Epoch: 013, Training Loss: 0.3218, Training Accuracy: 0.9214


Epoch 14: 100%|██████████| 140/140 [00:00<00:00, 6673.29it/s]


Multi-Accuracy:  0.9845238095238094
Epoch: 014, Training Loss: 0.3063, Training Accuracy: 0.9714


Epoch 15: 100%|██████████| 140/140 [00:00<00:00, 6650.61it/s]


Multi-Accuracy:  0.9791666666666665
Epoch: 015, Training Loss: 0.2945, Training Accuracy: 0.9643


Evaluating: 100%|██████████| 5416/5416 [00:00<00:00, 7960.98it/s]


Epoch: 015, Loss: 0.2945, Train: 0.9929, Val: 0.7620, Test: 0.7740
Epoch: 015, Mult-Train: 0.9964, Mult-Val: 0.8537, Mult-Test: 0.8648


Epoch 16: 100%|██████████| 140/140 [00:00<00:00, 6251.82it/s]


Multi-Accuracy:  0.9797619047619047
Epoch: 016, Training Loss: 0.2791, Training Accuracy: 0.9643


Epoch 17: 100%|██████████| 140/140 [00:00<00:00, 6606.76it/s]


Multi-Accuracy:  0.9964285714285714
Epoch: 017, Training Loss: 0.2653, Training Accuracy: 0.9929


Epoch 18: 100%|██████████| 140/140 [00:00<00:00, 6642.11it/s]


Multi-Accuracy:  0.9809523809523808
Epoch: 018, Training Loss: 0.2546, Training Accuracy: 0.9643


Epoch 19: 100%|██████████| 140/140 [00:00<00:00, 6521.57it/s]


Multi-Accuracy:  0.9857142857142858
Epoch: 019, Training Loss: 0.2411, Training Accuracy: 0.9714


Epoch 20: 100%|██████████| 140/140 [00:00<00:00, 6634.53it/s]


Multi-Accuracy:  0.9892857142857143
Epoch: 020, Training Loss: 0.2279, Training Accuracy: 0.9786


Evaluating: 100%|██████████| 5416/5416 [00:00<00:00, 7554.94it/s]


Epoch: 020, Loss: 0.2279, Train: 0.9929, Val: 0.7700, Test: 0.7820
Epoch: 020, Mult-Train: 0.9964, Mult-Val: 0.8608, Mult-Test: 0.8700
Best Test Accuracy,  0.782
Best Test Sk Accuracy,  0.8700357142857149


In [14]:
# out = torch.empty(3, 5).uniform_(0, 1) 
# y=torch.bernoulli(out)

# print(out)
# print(y)

# sort_out = torch.sort(out,dim=1, descending=True)
# sort_out = sort_out.values
# print(sort_out)
# result = sort_out[:, :-1] - sort_out[:, 1:]
# print(result)

# torch.max(result, dim=1)

# tolerance=0.01
# threshold = torch.max(out,dim=1).values.view(-1,1)-tolerance
# print(threshold)

# (out>=threshold).type(torch.long)