In [1]:
import numpy as np
import pandas as pd
import math
import itertools
from sklearn.metrics import DistanceMetric
import networkx as nx
from numpy import inf
from scipy import linalg
from matplotlib import pyplot as plt
from sklearn.cluster import KMeans
import warnings
warnings.filterwarnings("ignore", category=np.VisibleDeprecationWarning) 
from scipy.spatial.distance import pdist



In [2]:
  # Uploading the files
df = pd.read_csv("HeartFailure.csv")
df_cluster = df.iloc[:,1:11]
df_cluster.head()

Unnamed: 0,age,anaemia,creatinine_phosphokinase,diabetes,ejection_fraction,high_blood_pressure,platelets,serum_creatinine,serum_sodium,sex
0,75.0,0,582,0,20,1,265000.0,1.9,130,1
1,55.0,0,7861,0,38,0,263358.03,1.1,136,1
2,65.0,0,146,0,20,0,162000.0,1.3,129,1
3,50.0,1,111,0,20,0,210000.0,1.9,137,1
4,65.0,1,160,1,20,0,327000.0,2.7,116,0


In [3]:
# coverting the input csv file into a graph
'''  '''
def getGraph(df,measure) :
    
    dist = DistanceMetric.get_metric(measure)
    matrix_dist = dist.pairwise(df.to_numpy())
    Graph=nx.from_numpy_matrix(matrix_dist)
    #draw_graph(Graph)
    
    return matrix_dist, Graph
    

In [4]:
#Function to draw graph
def draw_graph(G):
    pos = nx.spring_layout(G)
    nx.draw_networkx_nodes(G, pos)
    nx.draw_networkx_labels(G, pos)
    nx.draw_networkx_edges(G, pos, width=1.0, alpha=0.5)

In [5]:
matrix_dist, Graph = getGraph(df_cluster,"euclidean")
matrix_dist

array([[     0.        ,   7461.94801114, 103000.92328887, ...,
        477002.29250205, 125013.41482517, 130000.5780106 ],
       [  7461.94801114,      0.        , 101651.22696515, ...,
        478677.1225331 , 123478.27495382, 131864.93303654],
       [103000.92328887, 101651.22696515,      0.        , ...,
        580003.1598873 ,  22116.51270002, 233000.00729418],
       ...,
       [477002.29250205, 478677.1225331 , 580003.1598873 , ...,
             0.        , 602000.10390311, 347005.00681206],
       [125013.41482517, 123478.27495382,  22116.51270002, ...,
        602000.10390311,      0.        , 255009.63742384],
       [130000.5780106 , 131864.93303654, 233000.00729418, ...,
        347005.00681206, 255009.63742384,      0.        ]])

In [6]:
#equation fro adjacency matrix
def equation(df, sigma, distance):
    dim = df.shape[0]
    A = np.zeros([dim, dim])
    dist = iter(pdist(df, metric=distance))
#     matrix_dist = dist.pairwise(df.to_numpy())
    for i in range(dim):
        for j in range(i+1, dim):  
            d = np.exp(-next(dist) ** 2) / (2. * sigma ** 2)
            A[i,j] = d
            A[j,i] = d
    return A


In [7]:
adjacency_matrix = equation(df_cluster, 0.3, 'euclidean')
# adacency_matrix= getAjacency(df_cluster)
adjacency_matrix


array([[0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       ...,
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.]])

In [8]:
def getLaplacian(Graph,matrix_dist):
    D = Graph.degree()
    degrees = [val for (node, val) in Graph.degree()]
    D = np.zeros((len(degrees), len(degrees)))
    np.fill_diagonal(D, degrees)
#     print('degree matrix:')
    print(D)
    L = D - matrix_dist
    
#Normalized laplacian matrix
    D_half = linalg.fractional_matrix_power(D, -0.5)
    LN = np.matmul(np.matmul(D_half, L), D_half)
    
#  print('laplacian matrix:')
    print(LN)
    return D,L,LN


In [None]:
def getEigen(LN):
    e, v = np.linalg.eig(LN)
        # eigenvalues
    print('eigenvalues:')
    print(e)
        # eigenvectors
    print('eigenvectors:')
    print(v)
    return e,v
    

In [None]:
D,L,LN = getLaplacian(Graph,matrix_dist)

In [None]:
e,v = getEigen(LN)

In [None]:
fig = plt.figure()
ax1 = plt.subplot(121)
plt.plot(e)
ax1.title.set_text('eigenvalues')
i = np.where(e < 10e-6)[0]
ax2 = plt.subplot(122)
plt.plot(v[:, i[0]])
fig.tight_layout()
plt.show()

In [None]:
U = np.array(v[:, i[0]])
km = KMeans(init='k-means++', n_clusters=3)
df['clusters'] = km.fit_predict(U)
df