In [1]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import tensorflow as tf
import networkx as nx
import random
import pickle
import itertools
import math
import urllib.request as urllib
import io
import zipfile

from sklearn.metrics import accuracy_score, precision_score, recall_score
from sklearn.model_selection import train_test_split
from sklearn.metrics.pairwise import cosine_similarity
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras import layers, losses
from tensorflow.keras.models import Model
from tensorflow.linalg import diag
from tensorflow.keras import callbacks
from sklearn.cluster import KMeans
from sklearn.metrics.cluster import normalized_mutual_info_score
from networkx.generators.community import LFR_benchmark_graph
from itertools import count
from scipy.spatial import distance_matrix
from sklearn.decomposition import NMF


from helpers import *
from Autoencoder import *

In [2]:
#CORA
cora = pickle.load(open('graphs/cora.p', 'rb'))

In [3]:
#adjacency
A = create_adjacency_matrix(cora)

In [4]:
#modularity B
X_cora_modularity = tf.convert_to_tensor(nx.modularity_matrix(cora).astype('float32'))

#similarity (Sørensen–Dice) S
X_cora_similarity = tf.convert_to_tensor(adjacency_to_similarity(A).astype('float32'))

#probability transition T
X_cora_prob_trans = tf.convert_to_tensor(probability_transition_matrix(A, 4))

X_cora_features = create_feature_matrix(cora, 1433)
X_cos_sim = cosine_similarity(X_cora_features)
X_cos_sim = top_k(X_cos_sim, int(average_community_size(cora_labels(cora))))
X_cora_markov = tf.convert_to_tensor(markov_matrix(A, X_cos_sim).astype('float32'))

In [None]:
ae_cora_1 = Autoencoder(2708, 512, k_reg=tf.keras.regularizers.L2(), act_reg=SparseRegularizer())
history_cora_1 = train(ae_cora_1, 5000, 2708, X_cora_1)

X_cora_2 = ae_cora_1.encoder(X_cora_1)

ae_cora_2 = Autoencoder(512, 256, k_reg=tf.keras.regularizers.L2(), act_reg=SparseRegularizer())
history_cora_2 = train(ae_cora_2, 5000, 512, X_cora_2)

X_cora_3 = ae_cora_2.encoder(X_cora_2)

ae_cora_3 = Autoencoder(256, 128, k_reg=tf.keras.regularizers.L2(), act_reg=SparseRegularizer())
history_cora_3 = train(ae_cora_3, 5000, 256, X_cora_3)

In [None]:
#combining stacked autoencoders into deep autoencoder and fine tuning parameters
ae_cora_1 = tf.keras.models.load_model('C:/Users/Owner/Documents/community_detection/trained/cora_modularity/cora_L2_sparse_5000iter_1', custom_objects={'SparseRegularizer': SparseRegularizer})
ae_cora_2 = tf.keras.models.load_model('C:/Users/Owner/Documents/community_detection/trained/cora_modularity/cora_L2_sparse_5000iter_2', custom_objects={'SparseRegularizer': SparseRegularizer})
ae_cora_3 = tf.keras.models.load_model('C:/Users/Owner/Documents/community_detection/trained/cora_modularity/cora_L2_sparse_5000iter_3', custom_objects={'SparseRegularizer': SparseRegularizer})

combined_encoder = tf.keras.Sequential()
combined_decoder = tf.keras.Sequential()

combined_encoder.add(ae_cora_1.encoder.layers[0])
combined_encoder.add(ae_cora_2.encoder.layers[0])
combined_encoder.add(ae_cora_3.encoder.layers[0])

combined_decoder.add(ae_cora_3.decoder.layers[0])
combined_decoder.add(ae_cora_2.decoder.layers[0])
combined_decoder.add(ae_cora_1.decoder.layers[0])

ae_combined=Autoencoder(2708, 128, learning_rate=0.0001)
ae_combined.encoder=combined_encoder
ae_combined.decoder=combined_decoder

ae_combined.build(input_shape=(None, 2708))

combined_history = train(ae_combined, 500, 2708, X_cora_1)

H_cora_combined = ae_combined.encoder(X_cora_1)
labels_combined = cora_labels(cora)
kmeans_cora_combined = KMeans(n_clusters=7, n_init=20).fit(H_cora_combined)

normalized_mutual_info_score(labels_combined, kmeans_cora_combined.labels_)

In [None]:
#ensemble learning

ae_cora_modularity_1 = tf.keras.models.load_model('C:/Users/Owner/Documents/community_detection/trained/cora_modularity/cora_L2_sparse_5000iter_1', custom_objects={'SparseRegularizer': SparseRegularizer})
ae_cora_modularity_2 = tf.keras.models.load_model('C:/Users/Owner/Documents/community_detection/trained/cora_modularity/cora_L2_sparse_5000iter_2', custom_objects={'SparseRegularizer': SparseRegularizer})
ae_cora_modularity_3 = tf.keras.models.load_model('C:/Users/Owner/Documents/community_detection/trained/cora_modularity/cora_L2_sparse_5000iter_3', custom_objects={'SparseRegularizer': SparseRegularizer})

X_modularity_1 = tf.convert_to_tensor(nx.modularity_matrix(cora).astype('float32'))
X_modularity_2 = ae_cora_modularity_1.encoder(X_modularity_1)
X_modularity_3 = ae_cora_modularity_2.encoder(X_modularity_2)
H_modularity = ae_cora_modularity_3.encoder(X_modularity_3)

ae_cora_similarity = tf.keras.models.load_model('C:/Users/Owner/Documents/community_detection/trained/cora_similarity/cora_L2_sparse_5000iter_combined', custom_objects={'SparseRegularizer': SparseRegularizer})

X_similarity = tf.convert_to_tensor(adjacency_to_similarity(create_adjacency_matrix(cora)).astype('float32'))
H_similarity = ae_cora_similarity.encoder(X_similarity)

ae_cora_prob_trans = tf.keras.models.load_model('C:/Users/Owner/Documents/community_detection/trained/cora_prob_trans/cora_L2_sparse_5000iter_combined', custom_objects={'SparseRegularizer': SparseRegularizer})

X_prob_trans = tf.convert_to_tensor(probability_transition_matrix(create_adjacency_matrix(cora), 4))
H_prob_trans = ae_cora_prob_trans.encoder(X_prob_trans)


In [None]:
labels = cora_labels(cora)

In [None]:
kmeans_modularity = KMeans(n_clusters=7, n_init=20).fit(H_modularity)
kmeans_similarity = KMeans(n_clusters=7, n_init=20).fit(H_similarity)
kmeans_prob_trans = KMeans(n_clusters=7, n_init=20).fit(H_prob_trans)

In [None]:
modularity_pairwise = create_pairwise_community_membership_matrix(kmeans_modularity.labels_)
similarity_pairwise = create_pairwise_community_membership_matrix(kmeans_similarity.labels_)
prob_trans_pairwise = create_pairwise_community_membership_matrix(kmeans_prob_trans.labels_)

In [None]:
Q = np.average(np.array([modularity_pairwise, similarity_pairwise, prob_trans_pairwise]), axis=0)

In [None]:
nmf_model = NMF(n_components=7, solver='mu', max_iter=1000)

In [None]:
nmf_model.fit(Q)

In [None]:
W = nmf_model.transform(Q)
H = nmf_model.components_

In [None]:
clustering_labels = nmf_cluster_membership(H)

In [None]:
normalized_mutual_info_score(labels, clustering_labels)

In [5]:
ae_cora_1 = tf.keras.models.load_model('C:/Users/Owner/Documents/community_detection/trained/cora_transfer/cora_transfer_BSTM_1', custom_objects={'SparseRegularizer': SparseRegularizer})



In [6]:
H_cora_modularity = ae_cora_1.encoder(X_cora_modularity)
H_cora_similarity = ae_cora_1.encoder(X_cora_similarity)
H_cora_prob_trans = ae_cora_1.encoder(X_cora_prob_trans)
H_cora_markov = ae_cora_1.encoder(X_cora_markov)

In [9]:
ae_cora_2 = Autoencoder(512, 256, k_reg=tf.keras.regularizers.L2(), act_reg=SparseRegularizer())

In [10]:
ae_cora_2.history = {}
history_1 = train(ae_cora_2, 5000, 512, H_cora_prob_trans)

In [16]:
history_modularity_markov_similarity_2 = train(mod_model_2, 5000, 512, H_cora_similarity)

In [19]:
history_modularity_similarity_prob_trans_2 = train(mod_model_2, 5000, 512, H_cora_prob_trans)

In [13]:
history_modularity_similarity_prob_trans_markov_2 = train(mod_model_2, 5000, 512, H_cora_markov)

In [26]:
H_cora_combined = ae_cora_1.encoder(X_cora_prob_trans)
labels_combined = cora_labels(cora)
kmeans_cora_combined = KMeans(n_clusters=7, n_init=20).fit(H_cora_combined)

normalized_mutual_info_score(labels_combined, kmeans_cora_combined.labels_)

0.5070735990403337

In [33]:
mod_model_1.save('C:/Users/Owner/Documents/community_detection/trained/cora_transfer/cora_transfer_BSTM_1')

INFO:tensorflow:Assets written to: C:/Users/Owner/Documents/community_detection/trained/cora_transfer/cora_transfer_BSTM_1\assets
