In [1]:
import networkx as nx
import numpy as np
import matplotlib.pyplot as plt
from sklearn.cluster import SpectralClustering,KMeans
from itertools import combinations 
import mysql.connector
from concurrent.futures import ThreadPoolExecutor
from Linear_kmeans import KMeans
from foreign_keys import *
%matplotlib inline

In [2]:
L_energy = None

In [3]:
def norm(V):
    return np.sqrt(np.sum(V**2))

def spectral_clusters(weighted_adj_matrix,n_clusters):
    #creating graph from weighted adjacency matrix
    graph = nx.convert_matrix.from_numpy_matrix(weighted_adj_matrix)
    
    #Finding Normalized Laplacian matrix of the given adjacency matrix
    nlm = nx.normalized_laplacian_matrix(H).toarray()
    #Finding eigenvalue and eigenvectors
    val,vec = np.linalg.eig(nlm)

    # Sort eigenvectors,eigenvalues in ascending order
    sort_eigval, sort_eigvec = zip(*sorted(zip(val,vec))[1:])
    sort_eigvec = np.array(sort_eigvec)
    U = sort_eigvec.T
    U = np.array([U[i]/norm(U[i]) for i in range(U.shape[0])])
    kmean = KMeans(k=n_clusters)
    kmean.fit(U)
    return kmean.predict(U)

In [4]:
def get_centrality(i):
    vertex = weighted_adj_matrix[i]
    vertex_col = weighted_adj_matrix[:i]
    mid_walk_comb = combinations(vertex[vertex!=0], 2) 
    two_closed_walk = [weight*weight for weight in vertex]
    two_mid_walk = [i[0]*i[1] for i in mid_walk_comb]
    two_open_walk = [adj_matrix[pos][vertex!=0] * w for pos,w in enumerate(vertex) if w!=0]
    delta_e = 4 * np.sum(two_closed_walk) + 2 * np.sum(two_mid_walk) + 2 * np.sum(two_open_walk)
    centrality = delta_e/L_energy
    return centrality

In [5]:
def get_edge_weights(v1,v2):
    edge_name = v1+'-'+v2 
    query='SELECT * FROM '+v1+' INNER JOIN '+v2+' ON '+v1+'.'+foreign_keys[edge_name][v1]+'='+v2+'.'+foreign_keys[edge_name][v2]
    mycursor.execute(query)
    myresult = mycursor.fetchall()
    return len(myresult)

In [6]:
def laplacian_centrality(weighted_adj_matrix):
    #creating graph from weighted adjacency matrix
    graph = nx.convert_matrix.from_numpy_matrix(weighted_adj_matrix)
    
    #Finding Laplacian matrix of the given adjacency matrix
    lm = nx.laplacian_matrix(graph).toarray()
    
    #Finding eigenvalue and eigenvectors
    val,vec = np.linalg.eig(lm)

    #Computing Laplacian energy
    laplacian_energy = sum([v*v for v in val])
    global L_energy
    L_energy = laplacian_energy
    centralities = []
    with ThreadPoolExecutor(max_workers=weighted_adj_matrix.shape[0]) as executor:
        nodes = [i for i in range(weighted_adj_matrix.shape[0])]
        for centrality in executor.map(get_centrality,nodes):
            centralities.append(centrality)
    
    return zip(node_name_list,centralities)

In [7]:
adj_matrix = np.array([[0,1,0,1,1,1,0,0],
                       [1,0,1,1,1,0,1,1],
                       [0,1,0,0,0,1,0,0],
                       [1,1,0,0,0,0,0,0],
                       [1,1,0,0,0,0,0,0],
                       [1,0,1,0,0,0,0,0],
                       [0,1,0,0,0,0,0,0],
                       [0,1,0,0,0,0,0,0]])
adj_matrix.shape

(8, 8)

In [8]:
graph = nx.convert_matrix.from_numpy_matrix(adj_matrix)
node_name_map = {  0:  'post',
                   1: 'users',
                   2: 'followers',
                   3: 'comment',
                   4: 'likes',
                   5: 'categories',
                   6: 'bookmarks',
                   7: 'user_uploads'} 
mapping = node_name_map
H = nx.relabel_nodes(graph, mapping)

In [9]:
mydb = mysql.connector.connect(
  host="localhost",
  user="root",
  passwd="",
  database="mc"
)
mycursor = mydb.cursor()

In [10]:
node_name_list = list(node_name_map.values())
weighted_adj_matrix = np.full(adj_matrix.shape,-1)
for vec_pos,vec in enumerate(adj_matrix):
    for val_pos,val in enumerate(vec):
        if val!=0 and weighted_adj_matrix[vec_pos][val_pos] == -1:
            weight = get_edge_weights(node_name_list[vec_pos],node_name_list[val_pos])
            weighted_adj_matrix[vec_pos][val_pos] = weight
            weighted_adj_matrix[val_pos][vec_pos] = weight
weighted_adj_matrix[weighted_adj_matrix==-1] = 0

In [11]:
for i in laplacian_centrality(weighted_adj_matrix):
    print(i)

('post', 0.5613470697623728)
('users', 0.33114069708633925)
('followers', 0.0012959499064575612)
('comment', 0.0)
('likes', 0.534471739689017)
('categories', 0.20849954026638162)
('bookmarks', 0.021692250788070782)
('user_uploads', 0.0037525281720911085)


In [15]:
n_clusters = 2
try:
    clusters = spectral_clusters(weighted_adj_matrix,n_clusters)
    for c in range(n_clusters):
        print(np.array(list(node_name_map.values()))[clusters==c])
except:
    print("Please reduce the number of clusters or try calling the function again")

Epoch 0 Loss = 0.8775497029442638
Epoch 1 Loss = 1.1191855756210131e-05
['post' 'users' 'followers']
['comment' 'likes' 'categories' 'bookmarks' 'user_uploads']


  return (np.sum(centroid - point)**n)**(1/n)
