In [1]:
import numpy as np
import networkx as nx
import os
import pandas as pd
import torch
import time
import math
import dgl as dgl
from dgl.nn import pytorch
import numpy as np
import torch as th
import dgl.function as fn
import torch.nn.functional as F
from dgl.data import CoraGraphDataset
from GraphSAGE.losses import compute_loss_multiclass
from utils import *
from model import *
from loss import *
import matplotlib.cm as cm
import matplotlib.pyplot as plt



def train(g, features, n_classes, in_feats, n_edges, labels, mask, Q, cuda, nn_model,loss_direction):
    # sethyperparameter
    dropout = 0.0
    gpu = 0
    lr = 0.0005
    early_stop_rate=0.000005
    n_epochs = 1000
    n_hidden = features.shape[1]  # number of hidden nodes
    n_layers = 1  # number of hidden layers
    weight_decay_gamma = 0.65 #
    self_loop = True  #
    early_stop = False
    visualize_model=False
    last_score = 0
    step_size=int(n_epochs/100)

    # step_size = 1
    if self_loop:
        g = dgl.add_self_loop(g)
    # run single train of some model
    degs = g.in_degrees().float()
    norm = torch.pow(degs, -0.5)
    norm[torch.isinf(norm)] = 0

    if cuda:
        torch.cuda.set_device(gpu)
        features = features.cuda()
        labels = labels.cuda()
        g = g.to('cuda:0')
        mask = mask.cuda()

    if cuda:
        norm = norm.cuda()
    # g.ndata['norm'] = norm.unsqueeze(1)

    if nn_model == 'GCN':
        model = eval(nn_model)(g,
                               in_feats,
                               n_hidden,
                               n_classes,
                               n_layers,
                               F.relu,
                               dropout)
        if cuda:
            model.cuda()
    else:
        model = eval(nn_model)(in_feat=in_feats,
                               out_feat=n_classes,
                               num_rels=3,
                               regularizer='basis',
                               num_bases=None,
                               bias=True,
                               activation=None,
                               self_loop=True,
                               low_mem=False,
                               dropout=0.0,
                               layer_norm=False
                               )
        if cuda:
            model.cuda()

    loss_fcn = ModularityScore(n_classes, cuda,loss_direction)


    if visualize_model:
        print_parameter(model)
        print_parameter(loss_fcn)

    #optimizer = torch.optim.Adam(model.parameters(),lr=lr)
    #apply weight_decay scheduler
    optimizer = torch.optim.SGD(model.parameters(), lr=lr)
    StepLR = torch.optim.lr_scheduler.StepLR(optimizer, step_size=step_size, gamma=weight_decay_gamma)

    # train and evaluate (with modularity score and labels)
    dur = []



    for epoch in range(n_epochs):
        model.train()
        t0 = time.time()
        C_hat = model(features)
        # use train_mask to train
        loss = loss_fcn(C_hat[mask], Q)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        StepLR.step()

        dur.append(time.time() - t0)
        if epoch % step_size == 0:
        #if epoch % 1 == 0:
            if visualize_model:
                print_parameter(model)
                print_parameter(loss_fcn)
            C_out,eval_loss = evaluate_M(C_hat,Q,cuda)
            print("Epoch {} | Time(s) {}  Train_Modularity {} | True_Modularity {}"
                  "ETputs(KTEPS) {}".format(epoch, np.mean(dur), eval_loss, abs(loss),
                                             n_edges / np.mean(dur) / 1000))
        if early_stop:
            if abs((loss - last_score) / last_score) < 0.000005 or torch.isnan(loss).sum()>0:
                loss=last_score
                C_hat=last_C_hat
                C_out, eval_loss = evaluate_M(C_hat, Q, cuda)
                print("Epoch {} | Time(s) {}  Train_Modularity {} | True_Modularity {}"
                      "ETputs(KTEPS) {}".format(epoch, np.mean(dur), eval_loss, abs(loss),
                                                n_edges / np.mean(dur) / 1000))
                break

        last_score = loss
        last_C_hat=C_hat

    #C_out = C_construction(model, features, mask)

    C_init,modularity_init = evaluate_M(features, Q, cuda)
    print('initial modularity is', modularity_init)
    C_hat,modularity_score = evaluate_M(C_hat, Q, cuda)
    if torch.isnan(modularity_score):
        modularity_score=loss
    print(C_hat)
    return modularity_score.cpu().detach().numpy(), C_hat.cpu(), model.__str__(),features.cpu()


def main(nx_g, nn_model,loss_direction):
    # note g is a networkx class
    gpu = 0
    if gpu < 0:
        cuda = False
    else:
        cuda = True
    # prepare training data, set hyperparameters
    g, features, n_classes, in_feats, n_edges, labels,Q,mask,modularity_classic = generate_model_input(nx_g,cuda)
    return train(g, features, n_classes, in_feats, n_edges, labels, mask, Q, cuda, nn_model,loss_direction),modularity_classic





Using backend: pytorch


In [2]:

if __name__ == "__main__":
    test_number = 10
    work_dir = os.getcwd()
    nn_model = 'GCN'
    data_dir = os.path.join(work_dir, 'data/ComboSampleData/')
    # G = loadNetworkMat('karate_34.mat',data_dir)
    G = loadNetworkMat('celeganmetabolic_453.mat', data_dir)
    modularity_scores_gcn = {}
    nmi_gcn={}
    nmi={}
    C_init={}
    C_out = {}
    C_out_combo = {}
    graph_type = {}
    modularity_scores_combo = {}
    model_parameter = {}
    data_name = []
    modularity_scores_classic={}
    for root, dirs, files in os.walk(data_dir):
        for file in files:
            if file[-3:] == 'mat':
                if file !='celeganmetabolic_453.mat':
                    continue
                data_name.append(file)
                G = loadNetworkMat(file, data_dir)
                if nx.classes.function.is_directed(G):
                    graph_type[file] = 'directed'
                else:
                    graph_type[file] = 'undirected'
                print(file, graph_type[file])

                # need to figure out it is weighted or not
                modularity_scores_combo[file], partition = getNewComboPartition(G)
                C_out_combo[file] = partition_to_binary_attachment(partition)
                [modularity_scores_gcn[file], C_out[file], model_parameter[file],C_init[file]],modularity_scores_classic[file] = main(G, nn_model,loss_direction=1)

                nmi_gcn[file]=NMI(C_out[file],C_init[file])
                nmi[file] = NMI(C_out_combo[file], C_init[file])



    ##save log
    save_result(data_name, graph_type, modularity_scores_gcn, modularity_scores_combo, modularity_scores_classic,nmi_gcn,nmi,model_parameter,
                data_dir)

    print('something')

celeganmetabolic_453.mat directed




Epoch 0 | Time(s) 0.013004541397094727  Train_Modularity 0.2129949927330017 | True_Modularity 0.13639506697654724ETputs(KTEPS) 156.8682768356403
Epoch 10 | Time(s) 0.0071835951371626424  Train_Modularity 0.2129949927330017 | True_Modularity 0.11975400149822235ETputs(KTEPS) 283.9803693065244
Epoch 20 | Time(s) 0.00676349231175014  Train_Modularity 0.18549278378486633 | True_Modularity 0.09768126904964447ETputs(KTEPS) 301.61932711240473
Epoch 30 | Time(s) 0.0066144466400146484  Train_Modularity 0.18128719925880432 | True_Modularity 0.09118369966745377ETputs(KTEPS) 308.41582236960676
Epoch 40 | Time(s) 0.006489294331248213  Train_Modularity 0.18128719925880432 | True_Modularity 0.0884830430150032ETputs(KTEPS) 314.36391938283475
Epoch 50 | Time(s) 0.006373980466057272  Train_Modularity 0.17365045845508575 | True_Modularity 0.0869472473859787ETputs(KTEPS) 320.05118479157727
Epoch 60 | Time(s) 0.006312905764970623  Train_Modularity 0.16807326674461365 | True_Modularity 0.08601099997758865ETp

In [3]:
if __name__ == "__main__":
    test_number = 10
    work_dir = os.getcwd()
    nn_model = 'GCN'
    data_dir = os.path.join(work_dir, 'data/ComboSampleData/')
    # G = loadNetworkMat('karate_34.mat',data_dir)
    G = loadNetworkMat('celeganmetabolic_453.mat', data_dir)
    modularity_scores_gcn = {}
    nmi_gcn={}
    nmi={}
    C_init={}
    C_out = {}
    C_out_combo = {}
    graph_type = {}
    modularity_scores_combo = {}
    model_parameter = {}
    data_name = []
    modularity_scores_classic={}
    for root, dirs, files in os.walk(data_dir):
        for file in files:
            if file[-3:] == 'mat':
                if file !='celeganmetabolic_453.mat':
                    continue
                data_name.append(file)
                G = loadNetworkMat(file, data_dir)
                if nx.classes.function.is_directed(G):
                    graph_type[file] = 'directed'
                else:
                    graph_type[file] = 'undirected'
                print(file, graph_type[file])

                # need to figure out it is weighted or not
                modularity_scores_combo[file], partition = getNewComboPartition(G)
                C_out_combo[file] = partition_to_binary_attachment(partition)
                [modularity_scores_gcn[file], C_out[file], model_parameter[file],C_init[file]],modularity_scores_classic[file] = main(G, nn_model,loss_direction=-1)

                nmi_gcn[file]=NMI(C_out[file],C_init[file])
                nmi[file] = NMI(C_out_combo[file], C_init[file])



    ##save log
    save_result(data_name, graph_type, modularity_scores_gcn, modularity_scores_combo, modularity_scores_classic,nmi_gcn,nmi,model_parameter,
                data_dir)

    print('something')



celeganmetabolic_453.mat directed




Epoch 0 | Time(s) 0.00800180435180664  Train_Modularity 0.2129949927330017 | True_Modularity 0.13639506697654724ETputs(KTEPS) 254.94249925510996
Epoch 10 | Time(s) 0.00654690915888006  Train_Modularity 0.2129949927330017 | True_Modularity 0.1299007087945938ETputs(KTEPS) 311.59741955988443
Epoch 20 | Time(s) 0.006287131990705218  Train_Modularity 0.2129949927330017 | True_Modularity 0.12724630534648895ETputs(KTEPS) 324.4722717792308
Epoch 30 | Time(s) 0.00658214476800734  Train_Modularity 0.2129949927330017 | True_Modularity 0.1258770078420639ETputs(KTEPS) 309.92937285662043
Epoch 40 | Time(s) 0.00646483607408477  Train_Modularity 0.2129949927330017 | True_Modularity 0.12508897483348846ETputs(KTEPS) 315.5532447570689
Epoch 50 | Time(s) 0.0064199157789641735  Train_Modularity 0.2129949927330017 | True_Modularity 0.12460242956876755ETputs(KTEPS) 317.761177908964
Epoch 60 | Time(s) 0.006433256336900055  Train_Modularity 0.2129949927330017 | True_Modularity 0.12430053949356079ETputs(KTEPS) 