In [8]:
import numpy as np
import networkx as nx
import os
import pandas as pd
import torch
import time
import math
import dgl as dgl
from dgl.nn import pytorch
import numpy as np
import torch as th
import dgl.function as fn
import torch.nn.functional as F
from dgl.data import CoraGraphDataset
from GraphSAGE.losses import compute_loss_multiclass
from utils import *
from model import *
from loss import *
import matplotlib.cm as cm
import matplotlib.pyplot as plt



def train(g, features, n_classes, in_feats, n_edges, labels, mask, Q, cuda, nn_model,loss_direction):
    # sethyperparameter
    dropout = 0.0
    gpu = 0
    lr = 0.0005
    early_stop_rate=0.000005
    n_epochs = 1000
    n_hidden = features.shape[1]  # number of hidden nodes
    n_layers = 1  # number of hidden layers
    weight_decay_gamma = 0.65 #
    self_loop = True  #
    early_stop = False
    visualize_model=False
    last_score = 0
    step_size=int(n_epochs/100)

    # step_size = 1
    if self_loop:
        g = dgl.add_self_loop(g)
    # run single train of some model
    degs = g.in_degrees().float()
    norm = torch.pow(degs, -0.5)
    norm[torch.isinf(norm)] = 0

    if cuda:
        torch.cuda.set_device(gpu)
        features = features.cuda()
        labels = labels.cuda()
        g = g.to('cuda:0')
        mask = mask.cuda()

    if cuda:
        norm = norm.cuda()
    # g.ndata['norm'] = norm.unsqueeze(1)

    if nn_model == 'GCN':
        model = eval(nn_model)(g,
                               in_feats,
                               n_hidden,
                               n_classes,
                               n_layers,
                               F.relu,
                               dropout)
        if cuda:
            model.cuda()
    else:
        model = eval(nn_model)(in_feat=in_feats,
                               out_feat=n_classes,
                               num_rels=3,
                               regularizer='basis',
                               num_bases=None,
                               bias=True,
                               activation=None,
                               self_loop=True,
                               low_mem=False,
                               dropout=0.0,
                               layer_norm=False
                               )
        if cuda:
            model.cuda()

    loss_fcn = ModularityScore(n_classes, cuda,loss_direction)


    if visualize_model:
        print_parameter(model)
        print_parameter(loss_fcn)

    #optimizer = torch.optim.Adam(model.parameters(),lr=lr)
    #apply weight_decay scheduler
    optimizer = torch.optim.SGD(model.parameters(), lr=lr)
    StepLR = torch.optim.lr_scheduler.StepLR(optimizer, step_size=step_size, gamma=weight_decay_gamma)

    # train and evaluate (with modularity score and labels)
    dur = []



    for epoch in range(n_epochs):
        model.train()
        t0 = time.time()
        C_hat = model(features)
        # use train_mask to train
        loss = loss_fcn(C_hat[mask], Q)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        StepLR.step()

        dur.append(time.time() - t0)
        if epoch % step_size == 0:
        #if epoch % 1 == 0:
            if visualize_model:
                print_parameter(model)
                print_parameter(loss_fcn)
            C_out,eval_loss = evaluate_M(C_hat,Q,cuda)
            print("Epoch {} | Time(s) {}  Train_Modularity {} | True_Modularity {}"
                  "ETputs(KTEPS) {}".format(epoch, np.mean(dur), eval_loss, abs(loss),
                                             n_edges / np.mean(dur) / 1000))
        if early_stop:
            if abs((loss - last_score) / last_score) < 0.000005 or torch.isnan(loss).sum()>0:
                loss=last_score
                C_hat=last_C_hat
                C_out, eval_loss = evaluate_M(C_hat, Q, cuda)
                print("Epoch {} | Time(s) {}  Train_Modularity {} | True_Modularity {}"
                      "ETputs(KTEPS) {}".format(epoch, np.mean(dur), eval_loss, abs(loss),
                                                n_edges / np.mean(dur) / 1000))
                break

        last_score = loss
        last_C_hat=C_hat

    #C_out = C_construction(model, features, mask)

    C_init,modularity_init = evaluate_M(features, Q, cuda)
    print('initial modularity is', modularity_init)
    C_hat,modularity_score = evaluate_M(C_hat, Q, cuda)
    if torch.isnan(modularity_score):
        modularity_score=loss
    print(C_hat)
    return modularity_score.cpu().detach().numpy(), C_hat.cpu(), model.__str__(),features.cpu()


def main(nx_g, nn_model,loss_direction):
    # note g is a networkx class
    gpu = 0
    if gpu < 0:
        cuda = False
    else:
        cuda = True
    # prepare training data, set hyperparameters
    g, features, n_classes, in_feats, n_edges, labels,Q,mask,modularity_classic = generate_model_input(nx_g,cuda)
    return train(g, features, n_classes, in_feats, n_edges, labels, mask, Q, cuda, nn_model,loss_direction),modularity_classic





In [9]:
if __name__ == "__main__":
    test_number = 10
    work_dir = os.getcwd()
    nn_model = 'GCN'
    data_dir = os.path.join(work_dir, 'data/ComboSampleData/')
    # G = loadNetworkMat('karate_34.mat',data_dir)
    G = loadNetworkMat('celeganmetabolic_453.mat', data_dir)
    modularity_scores_gcn = {}
    nmi_gcn={}
    nmi={}
    C_init={}
    C_out = {}
    C_out_combo = {}
    graph_type = {}
    modularity_scores_combo = {}
    model_parameter = {}
    data_name = []
    modularity_scores_classic={}
    for root, dirs, files in os.walk(data_dir):
        for file in files:
            if file[-3:] == 'mat':
                if file !='karate_34.mat':
                    continue
                data_name.append(file)
                G = loadNetworkMat(file, data_dir)
                if nx.classes.function.is_directed(G):
                    graph_type[file] = 'directed'
                else:
                    graph_type[file] = 'undirected'
                print(file, graph_type[file])

                # need to figure out it is weighted or not
                modularity_scores_combo[file], partition = getNewComboPartition(G)
                C_out_combo[file] = partition_to_binary_attachment(partition)
                [modularity_scores_gcn[file], C_out[file], model_parameter[file],C_init[file]],modularity_scores_classic[file] = main(G, nn_model,loss_direction=1)

                nmi_gcn[file]=NMI(C_out[file],C_init[file])
                nmi[file] = NMI(C_out_combo[file], C_init[file])



    ##save log
    save_result(data_name, graph_type, modularity_scores_gcn, modularity_scores_combo, modularity_scores_classic,nmi_gcn,nmi,model_parameter,
                data_dir)

    print('something')

karate_34.mat undirected
Epoch 0 | Time(s) 0.005000591278076172  Train_Modularity 0.4155982732772827 | True_Modularity 0.34612488746643066ETputs(KTEPS) 31.19631086106608
Epoch 10 | Time(s) 0.004637371410023083  Train_Modularity 0.4155982732772827 | True_Modularity 0.3219166398048401ETputs(KTEPS) 33.63974678905943
Epoch 20 | Time(s) 0.004715340478079659  Train_Modularity 0.4155982732772827 | True_Modularity 0.3049769699573517ETputs(KTEPS) 33.083507060667564
Epoch 30 | Time(s) 0.004646201287546466  Train_Modularity 0.4155982732772827 | True_Modularity 0.2947860360145569ETputs(KTEPS) 33.575816101239




Epoch 40 | Time(s) 0.00463521189805938  Train_Modularity 0.4155982732772827 | True_Modularity 0.2885139286518097ETputs(KTEPS) 33.65541930570907
Epoch 50 | Time(s) 0.004648138495052562  Train_Modularity 0.4155982732772827 | True_Modularity 0.28459587693214417ETputs(KTEPS) 33.5618226879524
Epoch 60 | Time(s) 0.00468948239185771  Train_Modularity 0.4155982732772827 | True_Modularity 0.2821216285228729ETputs(KTEPS) 33.26593149616274
Epoch 70 | Time(s) 0.004733438223180637  Train_Modularity 0.4155982732772827 | True_Modularity 0.2805461883544922ETputs(KTEPS) 32.95701615710022
Epoch 80 | Time(s) 0.004717017397468473  Train_Modularity 0.4155982732772827 | True_Modularity 0.27953681349754333ETputs(KTEPS) 33.07174573571046
Epoch 90 | Time(s) 0.004704202924455915  Train_Modularity 0.4155982732772827 | True_Modularity 0.27888718247413635ETputs(KTEPS) 33.16183474760347
Epoch 100 | Time(s) 0.004674184440386178  Train_Modularity 0.4155982732772827 | True_Modularity 0.27846774458885193ETputs(KTEPS) 3

In [10]:
if __name__ == "__main__":
    test_number = 10
    work_dir = os.getcwd()
    nn_model = 'GCN'
    data_dir = os.path.join(work_dir, 'data/ComboSampleData/')
    # G = loadNetworkMat('karate_34.mat',data_dir)
    G = loadNetworkMat('celeganmetabolic_453.mat', data_dir)
    modularity_scores_gcn = {}
    nmi_gcn={}
    nmi={}
    C_init={}
    C_out = {}
    C_out_combo = {}
    graph_type = {}
    modularity_scores_combo = {}
    model_parameter = {}
    data_name = []
    modularity_scores_classic={}
    for root, dirs, files in os.walk(data_dir):
        for file in files:
            if file[-3:] == 'mat':
                if file !='karate_34.mat':
                    continue
                data_name.append(file)
                G = loadNetworkMat(file, data_dir)
                if nx.classes.function.is_directed(G):
                    graph_type[file] = 'directed'
                else:
                    graph_type[file] = 'undirected'
                print(file, graph_type[file])

                # need to figure out it is weighted or not
                modularity_scores_combo[file], partition = getNewComboPartition(G)
                C_out_combo[file] = partition_to_binary_attachment(partition)
                [modularity_scores_gcn[file], C_out[file], model_parameter[file],C_init[file]],modularity_scores_classic[file] = main(G, nn_model,loss_direction=-1)

                nmi_gcn[file]=NMI(C_out[file],C_init[file])
                nmi[file] = NMI(C_out_combo[file], C_init[file])



    ##save log
    save_result(data_name, graph_type, modularity_scores_gcn, modularity_scores_combo, modularity_scores_classic,nmi_gcn,nmi,model_parameter,
                data_dir)

    print('something')


karate_34.mat undirected
Epoch 0 | Time(s) 0.006001710891723633  Train_Modularity 0.4151052236557007 | True_Modularity 0.3245973587036133ETputs(KTEPS) 25.992588249314743
Epoch 10 | Time(s) 0.0050921006636186076  Train_Modularity 0.4151052236557007 | True_Modularity 0.32956236600875854ETputs(KTEPS) 30.635686586985393
Epoch 20 | Time(s) 0.005144039789835612  Train_Modularity 0.4151052236557007 | True_Modularity 0.33197689056396484ETputs(KTEPS) 30.326359509941753
Epoch 30 | Time(s) 0.00500114502445344  Train_Modularity 0.4151052236557007 | True_Modularity 0.3331901431083679ETputs(KTEPS) 31.192856683265
Epoch 40 | Time(s) 0.004879154810091344  Train_Modularity 0.4151052236557007 | True_Modularity 0.33265459537506104ETputs(KTEPS) 31.972750624217124




Epoch 50 | Time(s) 0.004785444222244562  Train_Modularity 0.4151052236557007 | True_Modularity 0.3319858908653259ETputs(KTEPS) 32.59885451696475
Epoch 60 | Time(s) 0.004722513136316518  Train_Modularity 0.4151052236557007 | True_Modularity 0.3314651846885681ETputs(KTEPS) 33.033259092568116
Epoch 70 | Time(s) 0.004775920384366747  Train_Modularity 0.4151052236557007 | True_Modularity 0.3312300741672516ETputs(KTEPS) 32.66386108751779
Epoch 80 | Time(s) 0.004741980705732181  Train_Modularity 0.4151052236557007 | True_Modularity 0.3310204744338989ETputs(KTEPS) 32.89764545254787
Epoch 90 | Time(s) 0.004715670596112262  Train_Modularity 0.4151052236557007 | True_Modularity 0.330901175737381ETputs(KTEPS) 33.08119106720707
Epoch 100 | Time(s) 0.004704317243972627  Train_Modularity 0.4151052236557007 | True_Modularity 0.33082160353660583ETputs(KTEPS) 33.16102888253845
Epoch 110 | Time(s) 0.004667975881078222  Train_Modularity 0.4151052236557007 | True_Modularity 0.33076977729797363ETputs(KTEPS)