In [1]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import tensorflow as tf
import networkx as nx
import random


from sklearn.metrics import accuracy_score, precision_score, recall_score
from sklearn.model_selection import train_test_split
from tensorflow.keras import layers, losses
from tensorflow.keras.models import Model
from func_timeout import func_timeout, FunctionTimedOut

We need to generate a bunch of graphs for training/testing, and for each, compute its modularity matrix B. We will start with LFR networks. 


Per the recommendations of [Lancichinetti et al., 2008], the paper had 1000 nodes, with an average degree of 20. The exponent of a vertex degree and the community size was -2.5 and -1.5 respectively, and the mixing parameter μ was varied from 0.6 to 0.8. A "small" network had community sizes distributed uniformly between 10 and 50, and a "large" network between 20 and 100.

In [2]:
def generate_network(num_vertices, avg_degree):
    mu = random.uniform(0.6, 0.8)
    return nx.LFR_benchmark_graph(num_vertices, 2.5, 1.5, mu, avg_degree, max_iters=100)

In [3]:
def create_adjacency_matrix(graph):
    return np.asarray(nx.adjacency_matrix(graph).todense())

def create_modularity_matrix(graph):
    num_vertices = len(graph)
    B = np.empty(shape=(num_vertices,num_vertices))
    A = create_adjacency_matrix(graph)
    degrees = [val for (node, val) in graph.degree()]
    m = 0.5*sum(degrees)
    for i in range(num_vertices):
        for j in range(num_vertices):
            a_ij = A[i,j]
            k_i = degrees[i]
            k_j = degrees[j]
            b_ij = a_ij - (k_i * k_j * (1/2*m))
            B[i,j] = b_ij
    return B

In [4]:
def create_community_indicator_matrix(graph):
    communities = {frozenset(graph.nodes[v]['community']) for v in graph}
    num_vertices = len(graph)
    num_communities = len(communities)
    H = np.zeros(shape=(num_vertices, num_communities))
    k = 0
    for community in communities:
        for vertex in community:
            H[vertex, k] = 1
        k = k + 1
    return H

In [5]:
def create_data_point(num_vertices, avg_degree):
    G = generate_network(num_vertices, avg_degree)
    A = create_adjacency_matrix(G)
    B = create_modularity_matrix(G)
    H = create_community_indicator_matrix(G)
    return (A,B,H)

def generate_dataset(num_data_points, num_vertices, avg_degree):
    data = []
    x = 0
    while len(data) < num_data_points:
        dp = None
        try:
            dp = func_timeout(5, create_data_point, args=(num_vertices, avg_degree))
        except:
            continue
        data.append(dp)
        print(x)
        x = x + 1
    return data

In [6]:
class Autoencoder(Model):
    
    def __init__(self, input_dim, latent_dim):
        super(Autoencoder, self).__init__()
        self.input_dim = input_dim
        self.latent_dim = latent_dim   
        self.encoder = tf.keras.Sequential([
            layers.Flatten(),
            layers.Dense(latent_dim, activation='relu'),
        ])
        self.decoder = tf.keras.Sequential([
            layers.Dense(input_dim, activation='sigmoid'),
        ])
        
    def call(self, X):
        encoded = self.encoder(X)
        decoded = self.decoder(encoded)
        
        self.B = X
        self.H = encoded
        self.M = decoded
        
        return decoded
    
    #def get_loss(self, X):
        #call(X)

In [7]:
data = generate_dataset(10, 34, 4.5)

0
1
2
3
4
5
6
7
8
9
