In [2]:
import torch
import time
import math
import dgl
import numpy as np
import torch as th
from dgl.data import citation_graph as citegrh
from dgl.data import CoraBinary
from dgl.data import CoraGraphDataset
from dgl import DGLGraph
import dgl.function as fn
import networkx as nx
import torch.nn.functional as F
from dgl.data import RedditDataset, KarateClubDataset
from dgl.nn import GraphConv
import matplotlib.pyplot as plt
from torch.nn import MSELoss
from losses import compute_loss_multiclass


class MyModel(th.nn.Module):
    def __init__(self, g, dropout, n_features):
        '''

        :param g:
        :param dropout:

        c_hat = ReLU(f1*c+f2*(Q C)+b) = (nX1)
        Q= nXn
        C = nX1
        Q*C = nX1
        so dimmension of  input is [n,2], output [n,1], Linear layer  [2,1]
        '''
        super(MyModel, self).__init__()
        self.g = g
        self.layers = th.nn.ModuleList()
        self.layers.append(th.nn.Linear(n_features, 2))
        self.layers.append(th.nn.ReLU(inplace=True))
        self.dropout = th.nn.Dropout(p=dropout)

    def forward(self, features):
        h = features.float()
        for i, layers in enumerate(self.layers):
            if i != 0:
                h = self.dropout(h)
            h = layers(h)
        return h


class ModularityScore(th.nn.Module):
    def __init__(self,n_classes,cuda):
        super(ModularityScore, self).__init__()
        ## define C as parameter
        #self.params = th.nn.ParameterList([C])
        self.cuda=cuda


    def forward(self,C,Q):
        # -tf.linalg.trace(tf.matmul(tf.matmul(tf.transpose(C),Q),C))
        C=th.sigmoid(C)
        Q=Q.float()
        if self.cuda:
            C=C.cuda()
            Q=Q.cuda()
        temp = th.matmul(th.matmul(C.t(), Q), C)
        loss = -temp.trace()
        return loss


class GCN(th.nn.Module):
    def __init__(self,
                 g,
                 in_feats,
                 n_hidden,
                 n_classes,
                 n_layers,
                 activation,
                 dropout):
        super(GCN, self).__init__()
        self.g = g
        self.layers = th.nn.ModuleList()
        # input layer
        self.layers.append(GraphConv(in_feats, n_hidden, activation=activation))
        # output layer
        for i in range(n_layers - 1):
            self.layers.append(GraphConv(n_hidden, n_hidden, activation=activation))
        # output layer
        self.layers.append(GraphConv(n_hidden, n_classes))
        self.dropout = th.nn.Dropout(p=dropout)

    def forward(self, features):
        h = features
        for i, layers in enumerate(self.layers):
            if i != 0:
                h = self.dropout(h)
            h = layers(self.g, h)
        return h


def evaluate(model, features, labels, mask):
    model.eval()
    with torch.no_grad():
        logits = model(features)
        logits = logits[mask]
        labels = labels[mask]
        _, indices = torch.max(logits, dim=1)
        correct = torch.sum(indices == labels)
        return correct.item() * 1.0 / len(labels)



def Q2(G1: dgl.DGLGraph):
    # calculate matrix Q with diag set to 0
    # A=np.array(nx.adjacency_matrix(G1).todense())
    G1 = dgl.to_networkx(G1)
    A = np.array(nx.adjacency_matrix(G1).todense())
    T = A.sum(axis=(0, 1))
    Q = A * 0
    w_in = A.sum(axis=1)
    w_out = w_in.reshape(w_in.shape[0], 1)
    K = w_in * w_out / T
    Q = (A - K) / T
    # set Qii to zero for every i
    for i in range(Q.shape[0]):
        Q[i][i] = 0
    return Q


# a utility function to convert a scipy.coo_matrix to torch.SparseFloat
def sparse2th(mat):
    value = mat.data
    indices = th.LongTensor([mat.row, mat.col])
    # tensor = th.FloatTensor(th.from_numpy(value).float())
    tensor = th.sparse.FloatTensor(indices, th.from_numpy(value).float(), mat.shape)
    return tensor.to_dense()


# network visualization utility function
def visualize(labels, g):
    pos = nx.spring_layout(g, seed=1)
    plt.figure(figsize=(8, 8))
    plt.axis('off')
    nx.draw_networkx(g, pos=pos, node_size=50, cmap=plt.get_cmap('coolwarm'),
                     node_color=labels, edge_color='k',
                     arrows=False, width=0.5, style='dotted', with_labels=False)

def load_cora_binary():
    data = CoraBinary()
    g,features,labels=data[1]
    n_edges=g.number_of_edges()
    features=sparse2th(features)
    labels=th.LongTensor(labels)
    in_feats=features.shape[1]
    n_classes=2
    print(th.max(features))

    return g,features,n_classes,in_feats,n_edges,labels

def load_kara():
    data =KarateClubDataset()
    n_classes = data.num_classes
    g = data[0]
    n_edges=g.number_of_edges()
    n=len(g.ndata['label'])
    labels=g.ndata['label']
    #construct features, train,val,test masks
    g.ndata['feat']= th.eye(n)
    in_feats=g.ndata['feat'].shape[1]
    features=torch.FloatTensor(g.ndata['feat'])

    return g,features,n_classes,in_feats,labels,n

def load_cora():

    data = citegrh.load_cora()
    features = torch.FloatTensor(data.features)
    labels = torch.LongTensor(data.labels)
    train_mask = torch.BoolTensor(data.train_mask)
    val_mask = torch.BoolTensor(data.val_mask)
    test_mask = torch.BoolTensor(data.test_mask)
    in_feats = features.shape[1]
    n_classes = data.num_labels
    n_edges = data.graph.number_of_edges()
    g = DGLGraph(data.graph)



Using backend: pytorch


In [3]:
if __name__ == "__main__":
    dropout = 0.5
    gpu = 0
    lr = 5e-2
    n_epochs = 20000
    n_hidden =32  # hidden node number for each layer
    n_layers = 2  # number of layer
    weight_decay = 5e-4  # weight decay not used here
    self_loop = True  # check cycle in the network

    # load cora_binary, train_masks,val_masks,test_masks are used for future accuracy comparement with supervised algorithm
    g, features, n_classes, in_feats, n_edges,labels = load_cora_binary()
    n = len(labels)
    train_mask = [True] * n
    train_mask=th.BoolTensor(train_mask)
    val_mask = train_mask
    test_mask = train_mask


    #calculate matrix Q, initial community attachment C (with overlap)
    Q = Q2(g)
    # NOT OVERLAPING CASE
    C_init = Q[0:2] * 0
    C_init[0] = np.random.randint(2, size=(1, Q.shape[0]))
    C_init[1] = 1 - C_init[0]
    C = th.tensor(data=C_init.T, requires_grad=True)
    C=C.float()
    Q = th.from_numpy(Q)
    Q=Q.float()
    Q_C = th.matmul(Q,C)

    if gpu < 0:
        cuda = False
    else:
        cuda = True
        torch.cuda.set_device(gpu)
        features = features.cuda()
        labels = labels.cuda()
        g=g.to('cuda:0')
        train_mask = train_mask.cuda()
        val_mask = val_mask.cuda()
        test_mask = test_mask.cuda()

    degs = g.in_degrees().float()
    norm = torch.pow(degs, -0.5)
    norm[torch.isinf(norm)] = 0
    if cuda:
        norm = norm.cuda()
    g.ndata['norm'] = norm.unsqueeze(1)
    model = GCN(g,
            in_feats,
            n_hidden,
            n_classes,
            n_layers,
            F.relu,
            dropout)


    #print initial model parameter
    for p in model.parameters():
        print(p)



    # use crossentropyLoss as loss, must consider the permutations,
    # loss_fcn = torch.th.nn.CrossEntropyLoss()
    loss_fcn =ModularityScore(n_classes,cuda)
    if cuda:
        model.cuda()
    optimizer = torch.optim.SGD(model.parameters(),
                                  lr=lr)

    # train and evaluate (with modularity score and labels)
    dur = []
    M=[]
    for epoch in range(n_epochs):
        model.train()
        t0 = time.time()
        C_hat = model(features)
        loss = loss_fcn(C_hat,Q)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        dur.append(time.time() - t0)
        if epoch % 100 == 0:
            #record modularity
            M.append(str(-loss.item()))
            acc_1 = evaluate(model, features, labels, val_mask)
            acc_2 = evaluate(model, features, 1 - labels, val_mask)
            acc = max(acc_1, acc_2)
            print("Epoch {} | Time(s) {} | Modularity {} | Accuracy {} | "
                  "ETputs(KTEPS) {}".format(epoch, np.mean(dur), -loss,
                                                acc, n_edges / np.mean(dur) / 1000))

    with open('modularity_history.txt','w') as f:
        for line in M:
            f.write(line+'\n')
    f.close()

Traceback (most recent call last):
  File "C:\Users\benno\anaconda3\envs\gpu\lib\site-packages\dgl\data\dgl_dataset.py", line 165, in _load
    self.load()
  File "C:\Users\benno\anaconda3\envs\gpu\lib\site-packages\dgl\data\citation_graph.py", line 874, in load
    for i in range(len(lables)):
NameError: name 'lables' is not defined

Loading from cache failed, re-processing.
Done saving data into cached files.
Done saving data into cached files.
tensor(1.)
Parameter containing:
tensor([[ 0.0420, -0.0268,  0.0022,  ..., -0.0291, -0.0110,  0.0214],
        [-0.0426,  0.0371, -0.0140,  ...,  0.0183,  0.0403, -0.0368],
        [-0.0041, -0.0357,  0.0410,  ...,  0.0240, -0.0267,  0.0428],
        ...,
        [ 0.0113,  0.0322, -0.0434,  ...,  0.0006, -0.0240, -0.0202],
        [-0.0291, -0.0247,  0.0289,  ...,  0.0316,  0.0290,  0.0089],
        [-0.0197,  0.0198,  0.0315,  ...,  0.0355, -0.0189,  0.0174]],
       requires_grad=True)
Parameter containing:
tensor([0., 0., 0., 0., 0., 0., 0

In [3]:
import pycombo
import networkx as nx


g, features, n_classes, in_feats, n_edges,labels = load_cora_binary()
nx_g = dgl.to_networkx(g)
partition = pycombo.execute(nx_g,max_communities=2)
print(partition)


Traceback (most recent call last):
  File "C:\Users\benno\anaconda3\envs\gpu\lib\site-packages\dgl\data\dgl_dataset.py", line 165, in _load
    self.load()
  File "C:\Users\benno\anaconda3\envs\gpu\lib\site-packages\dgl\data\citation_graph.py", line 874, in load
    for i in range(len(lables)):
NameError: name 'lables' is not defined

Loading from cache failed, re-processing.
Done saving data into cached files.
Done saving data into cached files.
tensor(1.)


