In [7]:
from pandas.core.common import flatten
import time
import sys
import os
import numpy as np
from graphein.protein.config import ProteinGraphConfig
from graphein.protein.edges.distance import add_hydrogen_bond_interactions, add_peptide_bonds
from functools import partial
#from graphein.protein.visualisation import plotly_protein_structure_graph

from graphein.protein.edges.distance import (add_peptide_bonds,
                                             add_k_nn_edges,
                                             add_distance_threshold,
                                             add_hydrogen_bond_interactions,
                                             add_disulfide_interactions,
                                             add_ionic_interactions,
                                             add_aromatic_interactions,
                                             add_aromatic_sulphur_interactions,
                                             add_cation_pi_interactions,
                                             add_delaunay_triangulation
                                            )

from graphein.protein.features.nodes import secondary_structure
from graphein.protein.subgraphs import extract_subgraph_from_secondary_structure
from graphein.protein.config import DSSPConfig
from graphein.protein.subgraphs import extract_surface_subgraph
from graphein.protein.features.nodes import rsa
import networkx as nx                                       
import logging
import matplotlib.pyplot as plt
logging.getLogger("matplotlib").setLevel(logging.WARNING)
logging.getLogger("graphein").setLevel(logging.INFO)
import graphein.protein as gp
from graphein.protein.edges.atomic import add_atomic_edges, add_bond_order, add_ring_status

In [8]:
# define different graph
edge_funcs1 = {"edge_construction_functions": [partial(add_distance_threshold, long_interaction_threshold=5, threshold=10.),
                                                  add_aromatic_interactions,
                                                  add_hydrogen_bond_interactions,
                                                  add_disulfide_interactions,
                                                  add_ionic_interactions,
                                                  add_aromatic_sulphur_interactions,
                                                  add_cation_pi_interactions
                                                   ]
                 }




edge_funcs2 = {"edge_construction_functions": [
                                                  add_aromatic_interactions,
                                                  add_hydrogen_bond_interactions,
                                                  add_disulfide_interactions,
                                                  add_ionic_interactions,
                                                  add_aromatic_sulphur_interactions,
                                                  add_cation_pi_interactions
                                                   ]
                 }

edge_funcs3 = {"edge_construction_functions": [partial(add_distance_threshold, long_interaction_threshold=5, threshold=10.)
                                                  
                                                   ]
                 }

edge_funcs4 = {"edge_construction_functions": [add_delaunay_triangulation        
                                                   ]
                 }

edge_funcs5 = {"edge_construction_functions": [partial(add_k_nn_edges, k=3, long_interaction_threshold=0)]
                 }

edge_funcs = [edge_funcs1,edge_funcs2,edge_funcs3,edge_funcs4,edge_funcs5]

In [9]:
import networkx as nx
from graphein.protein.graphs import construct_graph
from graphein.protein.subgraphs import extract_subgraph

# data load

pdb_p_GH30 = "/Users/eth-may/Desktop/dtu/semester6/graph_similarity/PDB/Q09DH4/Q09DH4.pdb"
pdb_p_GH35 = "/Users/eth-may/Desktop/dtu/semester6/graph_similarity/PDB/C3VDH8/C3VDH8.pdb"
pdb_p_GH16 = "/Users/eth-may/Desktop/dtu/semester6/graph_similarity/PDB/Q21N15/Q21N15.pdb"
pdb_p_GH27 = "/Users/eth-may/Desktop/dtu/semester6/graph_similarity/PDB/A0A6M4XDE9/A0A6M4XDE9.pdb"

def nodes_list(g,s,e):
    nodes =[]
    for i in g.nodes:
        nodes.append(i)
    return nodes[s-1:e] 

def get_network(s_g):
    nodes = []
    for i in s_g.nodes:
        nodes.append(i)
    
    edges = [] 
    for j in s_g.edges:
        edges.append(j)
    G = nx.Graph()
    
    for node in nodes:
        G.add_node(node)

    for link in edges:
        G.add_edge(link[0], link[1])
    return G

def sub_graph(edge_func):
    config = ProteinGraphConfig(**edge_func)

    g1 = construct_graph(config=config, pdb_path=pdb_p_GH30)
    g2 = construct_graph(config=config, pdb_path=pdb_p_GH35)
    g3 = construct_graph(config=config, pdb_path=pdb_p_GH16)
    g4 = construct_graph(config=config, pdb_path=pdb_p_GH27)

    s_g1 = extract_subgraph(g1, node_list=nodes_list(g1,33,478), inverse=False)
    s_g2 = extract_subgraph(g2, node_list=nodes_list(g2,20,408), inverse=False)
    s_g3 = extract_subgraph(g3, node_list=nodes_list(g3,328,567), inverse=False)
    s_g4 = extract_subgraph(g4, node_list=nodes_list(g4,48,380), inverse=False)
    
    G1 = get_network(s_g1)
    G2 = get_network(s_g2)
    G3 = get_network(s_g3)
    G4 = get_network(s_g4)
    
    return G1,G2,G3,G4
#s_g1,s_g2,s_g3,s_g4

In [10]:
from graphein.protein.visualisation import plotly_protein_structure_graph
G1 = sub_graph(edge_funcs[4])
p = plotly_protein_structure_graph(
    G1,
    colour_edges_by="kind",
    colour_nodes_by="seq_position",
    label_node_ids=True,
    plot_title="GH30 sample's Protein graph with type 5, nodes colored by degree ",
    node_size_multiplier=1,
    )
p.show()

Output()

TypeError: list indices must be integers or slices, not str

In [11]:
from graphein.protein.analysis import plot_degree_distribution
from graphein.protein.analysis import plot_degree_by_residue_type
#for i in range(len(edge_funcs)):
G1 = sub_graph(edge_funcs[4])
fig = plot_degree_by_residue_type(G1, normalise_by_residue_occurrence=True)
fig.show()

Output()

TypeError: list indices must be integers or slices, not str

In [9]:
avg_degrees_all = []
avg_ccs_all = []

for i in range(5):
    G1,G2,G3,G4 = sub_graph(edge_funcs[i])
    G = [G1,G2,G3,G4]
    avg_degrees = []
    avg_ccs = []
    for j in range(len(G)):
        degrees = dict(G[j].degree())
        

        avg_degree = sum(degrees.values()) / len(degrees)
        avg_cc = nx.average_clustering(G[j])
    
        avg_degrees.append(avg_degree)
        avg_ccs.append(avg_cc )
    avg_degrees_all.append(avg_degrees)
    avg_ccs_all.append(avg_ccs)


Output()

TypeError: 'method' object is not subscriptable

In [10]:
import networkx
networkx.__version__

'1.7'

In [140]:
G = [G1,G2,G3,G4]
G[1]

<networkx.classes.graph.Graph at 0x7ff440c83b20>

In [None]:
 avg_degrees_all

In [203]:
index_outcon_de=[]
index_incon_de=[]
for i in range(5):
    index_outcon_de.append(abs(avg_degrees_all[i][2]-avg_degrees_all[i][0])/abs(avg_degrees_all[i][3]-avg_degrees_all[i][0]))
    index_incon_de.append(abs(avg_degrees_all[i][3]-avg_degrees_all[i][0])/abs(avg_degrees_all[i][1]-avg_degrees_all[i][0]))

In [204]:
index_outcon_de

[0.6569191700639985,
 0.09464127937798227,
 0.5997823142414871,
 1.2647438423645316,
 7.884789156626543]

In [205]:
index_incon_de

[2.3253733399718826,
 1.712163601052491,
 2.407326378934958,
 1.8724191312783256,
 0.09711982433184359]

In [206]:
avg_ccs_all

[[0.24523419995525605,
  0.2384427225153921,
  0.2869802607882208,
  0.25061051188232947],
 [0.049925261584454414, 0.02759211653813196, 0.0, 0.025025025025025023],
 [0.23080917157021796,
  0.2225055227367125,
  0.27848482017337467,
  0.2276767303796513],
 [0.4422775249475943,
  0.4419199054642,
  0.44623215068989225,
  0.44630501337774664],
 [0.3069933803117658,
  0.3067694944301628,
  0.29319444444444454,
  0.28438438438438407]]

In [207]:
index_outcon=[]
index_incon=[]
for i in range(5):
    index_outcon.append(abs(avg_ccs_all[i][2]-avg_ccs_all[i][0])/abs(avg_ccs_all[i][3]-avg_ccs_all[i][0]))
    index_incon.append(abs(avg_ccs_all[i][3]-avg_ccs_all[i][0])/abs(avg_ccs_all[i][1]-avg_ccs_all[i][0]))

In [208]:
index_outcon

[7.764813760664582,
 2.0050115373521775,
 15.219966059293254,
 0.981908653713591,
 0.6103294419461324]

In [209]:
index_incon

[0.791626266107588,
 1.114945365186246,
 0.37723671284449933,
 11.261937945678818,
 100.98446478850946]

In [221]:
laplacian1 = nx.laplacian_matrix(G1)

AttributeError: module 'scipy.sparse' has no attribute 'coo_array'

In [222]:
import numpy as np

# define the adjacency matrix
A = np.array([[0, 1, 1],
              [1, 0, 1],
              [1, 1, 0]])

# compute the degree matrix
D = np.diag(np.sum(A, axis=1))

# compute the Laplacian matrix
L = D - A

In [251]:
n_u =[]
n_v =[]
for u, v, d in G1.edges(data=True):
    n_u.append(int(u[6:]))
    n_v.append(int(v[6:]))
    #print(u[6:], v[6:], d)
print(min(min(n_u),min(n_v)))


33


In [397]:
def get_adj_matrix(G):
# define the edges of the graph
    edges = G.edges

# compute the number of nodes in the graph
    n = len(G.nodes)

# create an empty adjacency matrix
    adj_matrix = [[0] * n for _ in range(n)]

    n_u =[]
    n_v =[]
    for u, v, d in G.edges(data=True):
        n_u.append(int(u[6:]))
        n_v.append(int(v[6:]))
    #print(u[6:], v[6:], d)
    tmp = min(min(n_u),min(n_v))

# populate the adjacency matrix with the edges
    for u, v in edges:
        adj_matrix[int(u[6:])-tmp][int(v[6:])-tmp] = 1
        adj_matrix[int(v[6:])-tmp][int(u[6:])-tmp] = 1  # if the graph is undirected

# convert the adjacency matrix to a NumPy array (optional)
    adj_matrix = np.array(adj_matrix)
    return adj_matrix

def get_Laplacian_matrix(A):
    # define the adjacency matrix
    

# compute the degree matrix
    D = np.diag(np.sum(A, axis=1))

# compute the Laplacian matrix
    L = D - A
    return L

def normalized_laplacian_matrix(G):
    adj_matrix = get_adj_matrix(G)
    D = np.diag(adj_matrix.sum(axis=1))
    L = get_Laplacian_matrix(adj_matrix)
    D_sqrt_inv = np.diag(1 / np.sqrt(np.diag(D)))
    L_norm = D_sqrt_inv @ L @ D_sqrt_inv
    return L_norm


In [273]:
def get_maxtrix(L):
    LL =[]
    for i in range(len(L)):
    
        LL.append(L[i].tolist())
    return LL

In [366]:
G1,G2,G3,G4=sub_graph(edge_funcs[0])
graphs=[G1,G2,G3,G4]

Output()

Output()

Output()

Output()

In [427]:
import numpy as np
from sklearn.cluster import KMeans
from scipy.sparse.linalg import eigs

# Define the adjacency matrices for each graph
adjacency_matrices = [adjacency_matrix1, adjacency_matrix2, adjacency_matrix3, adjacency_matrix4]

# Define the number of eigenvectors to compute
n_eigenvectors = 4

# Compute the Laplacian matrices for each graph
laplacian_matrices = []
for adjacency_matrix in adjacency_matrices:
    degree_matrix = np.diag(np.sum(adjacency_matrix, axis=1))
    laplacian_matrix = degree_matrix - adjacency_matrix
    laplacian_matrices.append(laplacian_matrix)

# Compute the eigenvectors for each Laplacian matrix
eigenvectors = []
for laplacian_matrix in laplacian_matrices:
    k_eigvals, k_eigenvects = eigs(laplacian_matrix, k=n_eigenvectors, which='SM')
    eigenvectors.append(k_eigenvects)

# Concatenate the eigenvectors into a single matrix
concatenated_eigenvectors = np.concatenate(eigenvectors, axis=1)

# Cluster the rows of the concatenated eigenvectors using KMeans
n_clusters = 4  # set the number of clusters you want to find
kmeans = KMeans(n_clusters=n_clusters)
kmeans.fit(concatenated_eigenvectors)
graph_labels = kmeans.labels_

print(graph_labels)


NameError: name 'adjacency_matrix1' is not defined

In [436]:
from sklearn.cluster import SpectralClustering
from sklearn.cluster import KMeans
from sklearn.metrics import pairwise_distances
from sklearn.cluster import spectral_clustering
from sklearn.manifold import spectral_embedding
# Compute the Laplacian eigenvalues and eigenvectors for each graph
k = 3
eigenvalues_list = []
eigenvectors_list = []
L_list = []
for G in graphs:
    adj = get_adj_matrix(G)
    L = get_Laplacian_matrix(adj)
    eigenvectors = np.linalg.eigh(L)[1][:, 1:3]
    #eigenvalues_list.append(eigenvalues[:k])
    eigenvectors_list.append(eigenvectors)
    L_list.append(L)
# Stack Laplacian matrices vertically
"""
max_cols = max(L.shape[1] for L in L_list)
for i in range(len(L_list)):
    num_cols = L_list[i].shape[1]
    if num_cols < max_cols:
        pad_cols = max_cols - num_cols
        L_list[i] = np.pad(L_list[i], ((0, 0), (0, pad_cols)), mode='constant')
"""
from sklearn.cluster import SpectralClustering

# Apply spectral embedding to each Laplacian matrix
n_components = 3
embedding1 = spectral_embedding(L_list[0], n_components=n_components)
embedding2 = spectral_embedding(L_list[1], n_components=n_components)
embedding3 = spectral_embedding(L_list[2], n_components=n_components)
embedding4 = spectral_embedding(L_list[3], n_components=n_components)

# Compute pairwise distances between graph representations
X = np.vstack([embedding1, embedding2, embedding3, embedding4])
D = pairwise_distances(X, metric='euclidean')

# Cluster the graphs using spectral clustering
n_clusters = 2
spectral = SpectralClustering(n_clusters=n_clusters, affinity='precomputed')
labels = spectral.fit_predict(D)

print('Graph-level labels:', labels)




ValueError: array must not contain infs or NaNs

In [438]:
from sklearn.metrics.pairwise import pairwise_kernels
kernel = 'graphlet'
K = pairwise_kernels(graphs, metric=kernel)

# Cluster the graphs using spectral clustering
n_clusters = 2
spectral = SpectralClustering(n_clusters=n_clusters, affinity='precomputed')
labels = spectral.fit_predict(K)

print('Graph-level labels:', labels)

ValueError: Unknown kernel 'graphlet'

In [441]:
import numpy as np
from sklearn.cluster import SpectralClustering
from sklearn.metrics.pairwise import pairwise_kernels
from graphkernels import graphlet_sampling_kernel
from igraph import Graph

# Define graphs
graph1 = Graph.Erdos_Renyi(n=20, p=0.2)
graph2 = Graph.Erdos_Renyi(n=30, p=0.1)
graph3 = Graph.Erdos_Renyi(n=40, p=0.15)
graph4 = Graph.Erdos_Renyi(n=50, p=0.05)

# Convert graphs to adjacency matrices
A1 = np.array(graph1.get_adjacency().data)
A2 = np.array(graph2.get_adjacency().data)
A3 = np.array(graph3.get_adjacency().data)
A4 = np.array(graph4.get_adjacency().data)

# Compute kernel matrix using graphlet sampling kernel
K = pairwise_kernels([A1, A2, A3, A4], metric=graphlet_sampling_kernel)

# Cluster the graphs using spectral clustering
labels = SpectralClustering(n_clusters=2, affinity='precomputed').fit_predict(K)

print("Graph-level labels:", labels)


ModuleNotFoundError: No module named 'graphkernels'

In [426]:
import networkx as nx
import numpy as np
from sklearn.cluster import spectral_clustering

# define function to compute graph similarity using Laplacian matrix
def graph_similarity(G1, G2):
    L1 = normalized_laplacian_matrix(G1)
    L2 = normalized_laplacian_matrix(G2)
    n1, m1 = L1.shape
    n2, m2 = L2.shape
    
    # pad the smaller Laplacian matrix with zeros
    if n1 > n2:
        L2 = np.pad(L2, ((0, n1-n2), (0, n1-n2)), 'constant')
    elif n1 < n2:
        L1 = np.pad(L1, ((0, n2-n1), (0, n2-n1)), 'constant')
    
    # compute Euclidean distance between Laplacian matrices
    dist = np.linalg.norm(L1 - L2)
    
    # return similarity as exponential of negative distance
    return np.exp(-dist)

# define function to cluster graphs using spectral clustering
def graph_clustering(G_list, n_clusters_list):
    L_list = []
    for G in G_list:
        L = normalized_laplacian_matrix(G)
        L_list.append(L)
        print(L)
    # stack Laplacian matrices vertically
    max_cols = max(L.shape[1] for L in L_list)
    for i in range(len(L_list)):
        num_cols = L_list[i].shape[1]
        if num_cols < max_cols:
            pad_cols = max_cols - num_cols
            L_list[i] = np.pad(L_list[i], ((0, 0), (0, pad_cols)), mode='constant')

    X = np.vstack(L_list)
    
    # compute pairwise distances between Laplacian matrices
    D = pairwise_distances(X, metric='euclidean')
    
    # perform spectral clustering
    labels = spectral_clustering(D, n_clusters=n_clusters_list)
    
    # separate graph-level labels
    graph_labels = []
    i = 0
    for G in G_list:
        n = len(G.nodes)
        graph_labels.append(labels[i:i+n])
        i += n
    
    return graph_labels

# create list of protein graphs with different sizes
G_list = graphs
#[nx.petersen_graph(), nx.bull_graph(), nx.star_graph(7)]

# compute pairwise similarities between graphs
similarity_matrix = np.zeros((len(G_list), len(G_list)))
for i in range(len(G_list)):
    for j in range(i+1, len(G_list)):
        similarity_matrix[i,j] = graph_similarity(G_list[i], G_list[j])
        similarity_matrix[j,i] = similarity_matrix[i,j]

#print('Graph similarity matrix:')
#print(similarity_matrix)

# cluster graphs
n_clusters_list = [3, 2, 4,2] # number of clusters for each graph
graph_labels = graph_clustering(G_list, n_clusters_list)

# print graph-level labels
print('Graph-level labels:')
for i, G in enumerate(G_list):
    print(f'{G}: {graph_labels[i]}')



divide by zero encountered in true_divide


invalid value encountered in matmul



[[1. 0. 0. ... 0. 0. 0.]
 [0. 1. 0. ... 0. 0. 0.]
 [0. 0. 1. ... 0. 0. 0.]
 ...
 [0. 0. 0. ... 1. 0. 0.]
 [0. 0. 0. ... 0. 1. 0.]
 [0. 0. 0. ... 0. 0. 1.]]
[[1. 0. 0. ... 0. 0. 0.]
 [0. 1. 0. ... 0. 0. 0.]
 [0. 0. 1. ... 0. 0. 0.]
 ...
 [0. 0. 0. ... 1. 0. 0.]
 [0. 0. 0. ... 0. 1. 0.]
 [0. 0. 0. ... 0. 0. 1.]]
[[ 1.          0.          0.         ...  0.          0.
  -0.13608276]
 [ 0.          1.          0.         ...  0.          0.
  -0.13608276]
 [ 0.          0.          1.         ...  0.         -0.12909944
  -0.10540926]
 ...
 [ 0.          0.          0.         ...  1.          0.
   0.        ]
 [ 0.          0.         -0.12909944 ...  0.          1.
   0.        ]
 [-0.13608276 -0.13608276 -0.10540926 ...  0.          0.
   1.        ]]
[[1. 0. 0. ... 0. 0. 0.]
 [0. 1. 0. ... 0. 0. 0.]
 [0. 0. 1. ... 0. 0. 0.]
 ...
 [0. 0. 0. ... 1. 0. 0.]
 [0. 0. 0. ... 0. 1. 0.]
 [0. 0. 0. ... 0. 0. 1.]]


ValueError: Input contains NaN, infinity or a value too large for dtype('float64').

In [391]:
import numpy as np
from sklearn.cluster import spectral_clustering

# Define a function to compute the Laplacian matrix for a given adjacency matrix
def laplacian_matrix(adjacency_matrix):
    degree_matrix = np.diag(np.sum(adjacency_matrix, axis=1))
    laplacian = degree_matrix - adjacency_matrix
    return laplacian

# Define the adjacency matrices for the 4 graphs
A1 = np.array([[0, 1, 1, 0], [1, 0, 1, 0], [1, 1, 0, 1], [0, 0, 1, 0]])
A2 = np.array([[0, 1, 1], [1, 0, 1], [1, 1, 0]])
A3 = np.array([[0, 1], [1, 0]])
A4 = np.array([[0, 1, 0], [1, 0, 1], [0, 1, 0]])

# Compute the Laplacian matrices for the 4 graphs
L1 = laplacian_matrix(A1)
L2 = laplacian_matrix(A2)
L3 = laplacian_matrix(A3)
L4 = laplacian_matrix(A4)

L_list = [L1,L2,L3,L4]
max_cols = max(L.shape[1] for L in L_list)
for i in range(len(L_list)):
    num_cols = L_list[i].shape[1]
    if num_cols < max_cols:
        pad_cols = max_cols - num_cols
        L_list[i] = np.pad(L_list[i], ((0, 0), (0, pad_cols)), mode='constant')


# Stack Laplacian matrices vertically
X = np.vstack((L_list[0],L_list[1],L_list[2],L_list[3]))

# Compute the pairwise distances between the Laplacian matrices
D = pairwise_distances(X, metric='euclidean')

# Apply spectral clustering to obtain the graph-level labels
n_clusters = 2
labels = spectral_clustering(D, n_clusters=n_clusters, assign_labels='kmeans')

# Print the labels for each graph
print("Graph-level labels:", labels)


Graph-level labels: [0 0 0 0 0 1 0 0 0 1 0 0]


In [428]:
k = 10

# Compute the Laplacian matrix for each graph and compute the k smallest eigenvectors
laplacians = []
eigenvectors = []
for graph in graphs:
    adj = get_adj_matrix(graph)
 
    laplacian = get_Laplacian_matrix(adj)
    eigenvalues, eigenvects = np.linalg.eig(laplacian)
    sorted_indices = np.argsort(eigenvalues)
    k_eigenvects = eigenvects[:, sorted_indices[1:k+1]]
    laplacians.append(laplacian)
    eigenvectors.append(k_eigenvects)

max_size = max([eigvec.shape[0] for eigvec in eigenvectors])
padded_eigenvectors = []
for eigvec in eigenvectors:
    padded_eigvec = np.pad(eigvec, ((0, max_size-eigvec.shape[0]), (0, 0)), 'constant')
    padded_eigenvectors.append(padded_eigvec)
# Concatenate the eigenvectors into a single matrix

concatenated_eigenvectors = np.concatenate(padded_eigenvectors, axis=1)
# Cluster the rows of the concatenated eigenvectors using KMeans
n_clusters = 4  # set the number of clusters you want to find
kmeans = KMeans(n_clusters=n_clusters)
labels = kmeans.fit_predict(concatenated_eigenvectors)
labels

446

In [368]:
len(eigenvectors)


4

In [419]:
from networkx.generators.directed import gnr_graph
from networkx.generators import spectral_graph_forge
from similarity_index_of_label_graph_package import similarity_index_of_label_graph_class
G1,G2,G3,G4=sub_graph(edge_funcs[2])
graphs=[G1,G2,G3,G4]
similarity_index_of_label_graph = similarity_index_of_label_graph_class()
similarity_index_of_label_graph(G1, G4)

Output()

Output()

Output()

Output()

-0.8844167295249322

In [424]:
similarity_index_of_label_graph(G1, G3)

-0.8730569188390245

In [431]:
import numpy as np
from sklearn.datasets import make_moons
from sklearn.metrics import pairwise_distances
from grakel import GraphKernel

# Generate two example graphs
X1, y1 = make_moons(n_samples=50, noise=0.05)
X2, y2 = make_moons(n_samples=100, noise=0.1)

# Compute pairwise distances between the graphs using the Laplacian kernel
D1 = pairwise_distances(X1)
D2 = pairwise_distances(X2)
lap_kernel = GraphKernel(kernel="Laplacian")
K1 = lap_kernel.fit_transform(D1)
K2 = lap_kernel.fit_transform(D2)

# Compute similarity between the graphs using the graphlet kernel
gk_kernel = GraphKernel(kernel="graphlet")
similarity = gk_kernel.fit_transform([K1, K2])

print("Graph similarity:", similarity)


ValueError: numpy.ndarray size changed, may indicate binary incompatibility. Expected 96 from C header, got 88 from PyObject

In [432]:
import numpy as np
from sklearn.cluster import SpectralClustering
from sklearn.metrics import pairwise_distances
from sklearn.manifold import spectral_embedding
from networkx.linalg.laplacianmatrix import laplacian_matrix
import networkx as nx

# Load the graphs
G1 = nx.karate_club_graph()
G2 = nx.erdos_renyi_graph(20, 0.5)
G3 = nx.grid_2d_graph(5, 5)
G4 = nx.path_graph(10)

# Compute the Laplacian matrix for each graph
L1 = laplacian_matrix(G1).toarray()
L2 = laplacian_matrix(G2).toarray()
L3 = laplacian_matrix(G3).toarray()
L4 = laplacian_matrix(G4).toarray()

# Apply spectral embedding to each Laplacian matrix
n_components = 3
embedding1 = spectral_embedding(L1, n_components=n_components)
embedding2 = spectral_embedding(L2, n_components=n_components)
embedding3 = spectral_embedding(L3, n_components=n_components)
embedding4 = spectral_embedding(L4, n_components=n_components)

# Compute pairwise distances between graph representations
X = np.vstack([embedding1, embedding2, embedding3, embedding4])
D = pairwise_distances(X, metric='euclidean')

# Cluster the graphs using spectral clustering
n_clusters = 2
spectral = SpectralClustering(n_clusters=n_clusters, affinity='precomputed')
labels = spectral.fit_predict(D)

print('Graph-level labels:', labels)


AttributeError: module 'scipy.sparse' has no attribute 'coo_array'

In [442]:
from grakel.kernels import GraphletSampling

ValueError: numpy.ndarray size changed, may indicate binary incompatibility. Expected 96 from C header, got 88 from PyObject

In [449]:
import networkx as nx
import numpy as np
from sklearn.cluster import KMeans


# Define the four graphs
graph1 = nx.Graph()
graph1.add_nodes_from([1, 2, 3])
graph1.add_edges_from([(1, 2), (2, 3)])

graph2 = nx.Graph()
graph2.add_nodes_from([1, 2, 3, 4])
graph2.add_edges_from([(1, 2), (2, 3), (3, 4)])

graph3 = nx.Graph()
graph3.add_nodes_from([1, 2, 3, 4])
graph3.add_edges_from([(1, 2), (1, 3), (1, 4), (2, 3), (3, 4)])

graph4 = nx.Graph()
graph4.add_nodes_from([1, 2, 3])
graph4.add_edges_from([(1, 2), (2, 3), (1, 3)])

# Create a list of graphs
graphs = [graph1, graph2, graph3, graph4]

# Define the Graphlet kernel function
def graphlet_kernel(graph1, graph2, k=4):
    nodes1 = list(graph1.nodes())
    nodes2 = list(graph2.nodes())
    n1 = len(nodes1)
    n2 = len(nodes2)
    G1 = nx.Graph(graph1)
    G2 = nx.Graph(graph2)
    K = np.zeros((n1, n2))
    for i in range(n1):
        for j in range(n2):
            K[i, j] = count_subgraphs(G1, G2, nodes1[i], nodes2[j], k)
    return np.sum(K)


from networkx.algorithms.smallworld import find_graphlet_degree

def count_subgraphs(G1, G2, u, v, k):
    count = 0
    for subgraph in nx.graphlets.graphlets_at_node(G1, u, k):
        nodes = list(subgraph)
        # get the degree sequence of the subgraph
        deg_seq = [G1.degree(node) for node in nodes]
        # count the number of times the subgraph appears in G2
        count += find_graphlet_degree(G2, deg_seq)
    return count


# Compute the Graphlet kernel matrix
n = len(graphs)
kernel_matrix = np.zeros((n, n))
for i in range(n):
    for j in range(n):
        kernel_matrix[i, j] = graphlet_kernel(graphs[i], graphs[j])

# Perform K-means clustering
kmeans = KMeans(n_clusters=2, random_state=0).fit(kernel_matrix)

# Visualize the clustering results
labels = kmeans.labels_
print(labels)


ImportError: cannot import name 'find_graphlet_degree' from 'networkx.algorithms.smallworld' (/Users/eth-may/opt/anaconda3/lib/python3.8/site-packages/networkx/algorithms/smallworld.py)

In [447]:
import numpy as np
from sklearn.cluster import KMeans
import networkx as nx

def graphlet_kernel(graph1, graph2, k=5):
    subgraph_counts1 = nx.subgraph_census(graph1)
    subgraph_counts2 = nx.subgraph_census(graph2)
    kernel_value = 0
    for i in range(k):
        kernel_value += np.sqrt(subgraph_counts1[i] * subgraph_counts2[i])
    return kernel_value

# Define the four graphs
graph1 = nx.Graph()
graph1.add_edges_from([(1,2),(2,3),(3,4),(4,5),(5,1)])

graph2 = nx.Graph()
graph2.add_edges_from([(1,2),(2,3),(3,4),(4,5),(5,1),(2,4)])

graph3 = nx.Graph()
graph3.add_edges_from([(1,2),(2,3),(3,4),(4,5),(5,1),(2,4),(1,4)])

graph4 = nx.Graph()
graph4.add_edges_from([(1,2),(2,3),(3,4),(4,5),(5,1),(2,4),(1,4),(1,3)])

# Compute the kernel matrix
graphs = [graph1, graph2, graph3, graph4]
n = len(graphs)
kernel_matrix = np.zeros((n,n))
for i in range(n):
    for j in range(n):
        kernel_matrix[i, j] = graphlet_kernel(graphs[i], graphs[j])

# Perform K-means clustering
kmeans = KMeans(n_clusters=2, random_state=0).fit(kernel_matrix)
print(kmeans.labels_)



AttributeError: module networkx has no attribute subgraph_census