In [1]:
import os
import sys
if not os.getcwd().endswith("Submodular"):
    sys.path.append('../Submodular')    

In [2]:
import DeviceDir

DIR, RESULTS_DIR = DeviceDir.get_directory()
device, NUM_PROCESSORS = DeviceDir.get_device()

## Dataset

In [3]:
from ipynb.fs.full.Dataset import get_data
from ipynb.fs.full.Dataset import datasets as available_datasets
from ipynb.fs.full.Utils import save_plot

In [4]:
import argparse
from argparse import ArgumentParser

#set default arguments here
def get_configuration():
    parser = ArgumentParser()
    parser.add_argument('--epochs', type=int, default=1)
    parser.add_argument('--log_info', type=bool, default=True)
    parser.add_argument('--pbar', type=bool, default=False)
    parser.add_argument('--batch_size', type=int, default=2048)
    parser.add_argument('--learning_rate', type=float, default=0.01)
    parser.add_argument('--num_gpus', type=int, default=-1)
    parser.add_argument('--parallel_mode', type=str, default="dp", choices=['dp', 'ddp', 'ddp2'])
    parser.add_argument('--dataset', type=str, default="Cora", choices=available_datasets)
    #parser.add_argument('--use_normalization', action='store_false', default=True)
    parser.add_argument('--use_normalization', action='store_true')    
    parser.add_argument('-f') ##dummy for jupyternotebook
    
    args = parser.parse_args()
    
    dict_args = vars(args)
    
    return args, dict_args

args, dict_args = get_configuration()


## Packages

In [5]:
import torch
import torch.nn.functional as F
from torch.nn import ModuleList
from tqdm import tqdm

from torch_geometric.loader import ClusterData, ClusterLoader, NeighborSampler
from torch_geometric.nn import SAGEConv
import time
import numpy as np

### CLusterGCN model

In [6]:
class Net(torch.nn.Module):
    def __init__(self, in_channels, out_channels):
        super().__init__()
        self.convs = ModuleList(
            [SAGEConv(in_channels, 256),
             SAGEConv(256, out_channels)])

    def forward(self, x, edge_index):
        for i, conv in enumerate(self.convs):
            x = conv(x, edge_index)
            if i != len(self.convs) - 1:
                x = F.relu(x)
                x = F.dropout(x, p=0.5, training=self.training)
        return F.log_softmax(x, dim=-1)

    def inference(self, x_all, subgraph_loader):
        if args.log_info:
            pbar = tqdm(total=x_all.size(0) * len(self.convs))
            pbar.set_description('Evaluating')

        # Compute representations of nodes layer by layer, using *all*
        # available edges. This leads to faster computation in contrast to
        # immediately computing the final representations of each batch.
        for i, conv in enumerate(self.convs):
            xs = []
            for batch_size, n_id, adj in subgraph_loader:
                edge_index, _, size = adj.to(device)
                x = x_all[n_id].to(device)
                x_target = x[:size[1]]
                x = conv((x, x_target), edge_index)
                if i != len(self.convs) - 1:
                    x = F.relu(x)
                xs.append(x.cpu())
                
                if args.log_info:
                    pbar.update(batch_size)

            x_all = torch.cat(xs, dim=0)
        
        if args.log_info:
            pbar.close()

        return x_all

In [7]:
def train(DATASET_NAME, model, data, epochs=100):
    optimizer = torch.optim.Adam(model.parameters(), lr=0.005)
    #optimizer = torch.optim.Adam(model.parameters(), lr=0.01, weight_decay=5e-4)
    criterion = torch.nn.CrossEntropyLoss()
    
    sampler_dir = DIR+'ClusterGCN/'+DATASET_NAME
    if not os.path.exists(sampler_dir):
        os.makedirs(sampler_dir)
        
    num_parts=int(np.ceil(data.num_nodes/10)) #1500
#     num_parts
    
    worker = 0
    if data.num_nodes>100000:
        worker = 8
    
    start_time = time.time()
    cluster_data = ClusterData(data, num_parts=num_parts, recursive=False,save_dir=sampler_dir)
    train_loader = ClusterLoader(cluster_data, batch_size=16, shuffle=True,num_workers=worker)
    subgraph_loader = NeighborSampler(data.edge_index, sizes=[25], batch_size=1024,shuffle=False, num_workers=worker)
    end_time = time.time()

    cluster_time = end_time-start_time
    
    if args.log_info:
        print("Cluster Time: ",cluster_time)
        print(cluster_data)
        
        
    best_acc=0
    num_iteration = epochs
    train_losses = []
    
    val_accuracies=[]
    train_accuracies=[]
    test_accuracies=[]
    training_times = []
    
    num_iteration = epochs
    for epoch in range(1,epochs+1):
        
        ##training
        if args.log_info:
            pbar = tqdm(total=len(train_loader))
            pbar.set_description(f'Epoch {epoch:02d}')
        
        epoch_start = time.time()
        model.train()
        total_loss = total_nodes = 0
        for batch in train_loader:
            batch = batch.to(device)
            optimizer.zero_grad()
            out = model(batch.x, batch.edge_index)
            loss = F.nll_loss(out[batch.train_mask], batch.y[batch.train_mask])
            loss.backward()
            optimizer.step()

            nodes = batch.train_mask.sum().item()
            total_loss += loss.item() * nodes
            total_nodes += nodes
            
            if args.log_info:
                pbar.update(1)
        
        if args.log_info:
            pbar.close()
        
        loss = total_loss / total_nodes
        train_losses.append(loss)
        
        epoch_end = time.time()
        training_times.append(epoch_end-epoch_start)
        
        ### evaluation
        model.eval()
        out = model.inference(data.x, subgraph_loader)
        y_pred = out.argmax(dim=-1)
        
        accs = []
        for mask in [data.train_mask, data.val_mask, data.test_mask]:
            correct = y_pred[mask].eq(data.y[mask]).sum().item()
            accs.append(correct / mask.sum().item())
        
        train_acc = accs[0]
        val_acc = accs[1]
        test_acc = accs[2]
        
        if test_acc>best_acc:
            best_acc=test_acc
        
        train_accuracies.append(train_acc)
        val_accuracies.append(val_acc)
        test_accuracies.append(test_acc)
        
        std_dev = np.std(train_losses[-5:])
        
        if args.log_info:
            print(f'Epoch: {epoch:03d}, Train Loss: {loss:.4f}, Train: {train_acc:.4f}, Val: {val_acc:.4f}, Test: {test_acc:.4f}, Std dev: {std_dev:.4f}')
                
        if epoch>=5 and std_dev<=1e-4:
            num_iteration = epoch
            
            if args.log_info:                
                print("Iteration for convergence: ", epoch)
            break
            

    acc_file = open("Runtime/ClusterGCN.txt",'a+')
    acc_file.write(f'\nparts {num_parts:1d} cluster time: {cluster_time:0.4f}\n')
    acc_file.write(str(train_losses))
    acc_file.write(str(train_accuracies))
    acc_file.write(str(val_accuracies))
    acc_file.write(str(test_accuracies))
    acc_file.write(str(training_times))
    acc_file.write(str(np.mean(training_times)))
    acc_file.write(f'\nworker {worker:1d} avg epoch runtime {np.mean(training_times):0.8f}')
    acc_file.close() 
                
    return best_acc, num_iteration


In [8]:
def ClusterGCNperformanceSampler(DATASET_NAME, data, dataset, num_classes, epochs=1, train_neighbors=[-1,-1], test_neighbors=[-1,-1]):        
    model = Net(dataset.num_features, num_classes).to(device)

    if args.log_info:
        print(model)    
    
    best_acc, num_iteration = train(DATASET_NAME, model, data, epochs)    
    return best_acc, num_iteration, model


In [9]:
# args.log_info = True
# DATASET_NAME = 'Cora'
# data, dataset = get_data(DATASET_NAME, DIR=None, log=False, h_score=True, split_no=0); print("")
# print(data)
# best_acc, num_iteration, _ = ClusterGCNperformanceSampler(DATASET_NAME, data, dataset, dataset.num_classes, epochs=10, train_neighbors=[-1,-1], test_neighbors=[-1,-1])
# print(best_acc, num_iteration)

In [10]:
# args.log_info = True
# DATASET_NAME = 'Yelp'
# data, dataset = get_data(DATASET_NAME, DIR=None, log=False, h_score=True, split_no=0); print("")
# print(data)

In [11]:
# sampler_dir = DIR+'ClusterGCNruntime/'+DATASET_NAME
# if not os.path.exists(sampler_dir):
#     os.makedirs(sampler_dir)


# start_time = time.time()
# cluster_data = ClusterData(data, num_parts=2000, recursive=False,save_dir=sampler_dir)
# train_loader = ClusterLoader(cluster_data, batch_size=16, shuffle=True,num_workers=8)
# subgraph_loader = NeighborSampler(data.edge_index, sizes=[25], batch_size=1024,shuffle=False, num_workers=8)
# end_time = time.time()

# cluster_time = end_time-start_time

# if args.log_info:
#     print("Cluster Time: ",cluster_time)
#     print(cluster_data)


# Batch Experiments

In [None]:
def batch_experiments(num_run=1):
    
    ALL_DATASETs= [
        "Cornell",
        "Texas",
        "Wisconsin",
        "reed98",
        "amherst41",
        "penn94",
        "Roman-empire",
        "cornell5",
        "Squirrel",
        "johnshopkins55",
        "Actor",
        "Minesweeper",
        "Questions",
        "Chameleon",
        "Tolokers",
        "Flickr",
        "Amazon-ratings",
    ]
 
    
#     ALL_DATASETs= ["karate"]
    
    args.log_info = False
    runtime_filename = "Runtime/ClusterGCN.txt"
    
    for DATASET_NAME in ALL_DATASETs:  
        print(DATASET_NAME, end=' ')
        
        if DATASET_NAME in ['cornell5','Squirrel']:
            continue
        
        
        result_file = open("Results/ClusterGCN.txt",'a+')        
        result_file.write(f'{DATASET_NAME} ')
        
        acc_file = open(runtime_filename,'a+') 
        acc_file.write(f'{DATASET_NAME}\n')
        acc_file.close()     

                
        accs = []
        itrs = []
        
        for i in range(num_run):
            data, dataset = get_data(DATASET_NAME, DIR=None, log=False, h_score=False, split_no=i)   
            
#             if data.num_nodes>100000:
#                 accs.append(-1)
#                 itrs.append(-1)
#                 break
            
            if len(data.y.shape) > 1:
                data.y = data.y.argmax(dim=1)        
                num_classes = torch.max(data.y).item()+1
            else:
                num_classes = dataset.num_classes
            
            if num_classes!= torch.max(data.y)+1:
                num_classes = torch.max(data.y).item()+1
                
            if data.num_nodes<100000:
                max_epochs = 150
            else:
                max_epochs = 20
                              
            accuracy, itr, _ = ClusterGCNperformanceSampler(DATASET_NAME, data, dataset, num_classes, epochs=max_epochs, train_neighbors=[8,4], test_neighbors=[8,4])
            
            accs.append(accuracy)
            itrs.append(itr)
            #print(itr, accuracy)
                        
        #print(accs, itrs)
        print(f'acc {np.mean(accs):0.4f} sd {np.std(accs):0.4f}, itr {int(np.mean(itrs)):d} sd {int(np.std(itrs)):d}')
        result_file.write(f'acc {np.mean(accs):0.4f} sd {np.std(accs):0.4f} itr {int(np.mean(itrs)):d} sd {int(np.std(itrs)):d}\n')
        result_file.close()
                

batch_experiments(num_run=1)

Actor 

Computing METIS partitioning...
*** Error in `/home/das90/.conda/envs/cent7/2020.11-py38/py38cu11/bin/python': corrupted size vs. prev_size: 0x0000560bbbe5296f ***
/usr/lib64/libc.so.6(+0x7f474)[0x2b5759f24474]
/usr/lib64/libc.so.6(+0x816a4)[0x2b5759f266a4]
/home/das90/.conda/envs/cent7/2020.11-py38/py38cu11/lib/python3.8/site-packages/torch_sparse/_metis_cuda.so(gk_free+0xb7)[0x2b58a79b61c7]
/home/das90/.conda/envs/cent7/2020.11-py38/py38cu11/lib/python3.8/site-packages/torch_sparse/_metis_cuda.so(libmetis__FreeRData+0x70)[0x2b58a79c6b10]
/home/das90/.conda/envs/cent7/2020.11-py38/py38cu11/lib/python3.8/site-packages/torch_sparse/_metis_cuda.so(libmetis__MultilevelBisect+0xce)[0x2b58a79b22ee]
/home/das90/.conda/envs/cent7/2020.11-py38/py38cu11/lib/python3.8/site-packages/torch_sparse/_metis_cuda.so(libmetis__MlevelRecursiveBisection+0x111)[0x2b58a79b2dd1]
/home/das90/.conda/envs/cent7/2020.11-py38/py38cu11/lib/python3.8/site-packages/torch_sparse/_metis_cuda.so(libmetis__MlevelRecursi