In [51]:
import numpy as np
import networkx as nx
import kmapper as km
import sklearn
import warnings
import matplotlib.pyplot as plt
import signal
import time
import random
import numpy as np
from scipy.sparse import csr_matrix
from networkx.algorithms.similarity import graph_edit_distance
from sklearn.metrics.pairwise import cosine_similarity

warnings.filterwarnings("ignore")

In [3]:
def select_k(spectrum, minimum_energy = 0.9):
    running_total = 0.0 
    
    total = sum(spectrum)
    if total == 0.0:
        return len(spectrum)
    for i in range(len(spectrum)):
        running_total += spectrum[i]
        if running_total / total >= minimum_energy:
            return i + 1
    return len(spectrum)

In [4]:
def calculate_similarity(graph1,graph2):
    laplacian1 = nx.spectrum.laplacian_spectrum(graph1)
    laplacian2 = nx.spectrum.laplacian_spectrum(graph2)
    
    k1 = select_k(laplacian1)
    k2 = select_k(laplacian2)
    k = min(k1, k2) #k are different between the two graphs, then use the smaller one.
    similarity = sum((laplacian1[:k] - laplacian2[:k])**2) #sum of the squared differences between the largest k eigenvalues
    return similarity


In [5]:
def extract_graph_features(graph):
    pr = nx.pagerank(graph,0.9)
    dc = nx.degree_centrality(graph)
    cc = nx.closeness_centrality(graph)
    bx = nx.betweenness_centrality(graph)
    c = nx.clustering(graph)
    
    #create list for each features
    pr_list =  [i for i in pr.values()]
    dc_list =  [i for i in dc.values()]
    cc_list =  [i for i in cc.values()]
    bx_list =  [i for i in bx.values()]
    c_list =  [i for i in c.values()]
    d_list = [val for (node, val) in graph.degree()]
    data = np.column_stack((pr_list,dc_list,cc_list,bx_list,c_list,d_list))
    return data

In [6]:
def TDA_transformation(data):
    Xfilt = data
    mapper = km.KeplerMapper()
    scaler = sklearn.preprocessing.MinMaxScaler(feature_range=(0, 1))
    Xfilt = scaler.fit_transform(Xfilt)
    lens = mapper.fit_transform(Xfilt, projection=sklearn.manifold.TSNE())
    cls = 2  # We use cls= 5

    graph = mapper.map(lens,Xfilt,clusterer=sklearn.cluster.KMeans(n_clusters=cls,random_state=1618033),
        cover=km.Cover(n_cubes=5, perc_overlap=0.1))
    return km.to_nx(graph)

In [7]:
def add_node_to_graph(graph,p):
    new_node = graph.number_of_nodes() + 1
    graph.add_node(new_node)
    existing_nodes = list(graph.nodes())[:-1]  # Exclude the new node
    for existing_node in existing_nodes:
        if random.random() < p:  
            graph.add_edge(new_node, existing_node)
    return graph
    

In [8]:
def remove_least_degree_node(graph):
    degrees = graph.degree()
    min_degree_node = min(degrees, key=lambda x: x[1])[0]
    # Remove the node with the minimum degree
    graph.remove_node(min_degree_node)
    return graph

In [9]:
def random_add_new_edge(graph):
    node_num = graph.number_of_nodes()
    node1 = random.randint(1,node_num)
    node2 = random.randint(1,node_num)
    while graph.has_edge(node1,node2) or node1 == node2 :
        node1 = random.randint(1,node_num)
        node2 = random.randint(1,node_num)
    graph.add_edge(node1,node2)
    return graph

In [10]:
def random_remove_edge(graph):
    random_edge = random.choice(list(graph.edges()))
    # Remove the randomly selected edge
    graph.remove_edge(*random_edge)
    return graph

In [11]:
def graph_generator(original_graph, node, edge,p):
    new_graph = original_graph.copy()
    if node < 0:
        for i in range(abs(node)):
            new_graph = remove_least_degree_node(new_graph)
    elif node > 0:
        for i in range(abs(node)):
            new_graph = add_node_to_graph(new_graph,p)
    
    if edge < 0:
        for i in range(abs(edge)):
            new_graph = random_remove_edge(new_graph)
    elif edge >0:
        for i in range(abs(edge)):
            new_graph = random_add_new_edge(new_graph)
    return new_graph

In [12]:
def calc_TDA_average_similarity_for_hop(graph, hop,p):
    counter = 0
    sum = 0
    for i in range(-hop,hop + 1):
        for j in range(-hop,hop + 1):
            if (i == -hop or i == hop or j == -hop or j == hop):
                neighbour = graph_generator(graph,j,i,p)
                TDA_graph = TDA_transformation(extract_graph_features(graph))
                TDA_neighbour = TDA_transformation(extract_graph_features(neighbour))
                score = calculate_similarity(TDA_graph, TDA_neighbour)
                sum += score
                counter += 1
    return sum/counter

In [31]:
def TDA_transformation(data):
    Xfilt = data
    mapper = km.KeplerMapper()
    scaler = sklearn.preprocessing.MinMaxScaler(feature_range=(0, 1))
    Xfilt = scaler.fit_transform(Xfilt)
    lens = mapper.fit_transform(Xfilt, projection=sklearn.manifold.TSNE())
    cls = 2  # We use cls= 5

    graph = mapper.map(lens,Xfilt,clusterer=sklearn.cluster.KMeans(n_clusters=cls,random_state=1618033),
        cover=km.Cover(n_cubes=5, perc_overlap=0.3))
    return km.to_nx(graph)

In [19]:
def calc_both_average_similarity_for_hop(graph, hop,p):
    counter = 0
    sum_original = 0
    sum_TDA = 0
    for i in range(-hop,hop + 1):
        for j in range(-hop,hop + 1):
            if (i == -hop or i == hop or j == -hop or j == hop):
                neighbour = graph_generator(graph,j,i,p)
                TDA_graph = TDA_transformation(extract_graph_features(graph))
                TDA_neighbour = TDA_transformation(extract_graph_features(neighbour))
                
                score_TDA = calculate_similarity(TDA_graph, TDA_neighbour)
                score_original = calculate_similarity(graph, neighbour)
                
                sum_TDA += score_TDA
                sum_original += score_original
                counter += 1
    return {"TDA": sum_TDA/counter,"original": sum_original/counter}

In [24]:
g = nx.erdos_renyi_graph(30,0.6)

In [32]:
for i in range(1,20):
    result = calc_both_average_similarity_for_hop(g,i,0.6)
    score_TDA = result["TDA"]
    score_original = result["original"]
    print(score_original,score_TDA)

2.1913387736550756 55.133771874274025
20.910601108106974 54.39359944250961
44.07558497838127 58.33701843217739


KeyboardInterrupt: 

In [38]:
def backbone_pruning(adj_matrix, threshold):
    pruned_matrix = adj_matrix.copy()
    pruned_matrix[pruned_matrix < threshold] = 0
    return pruned_matrix

def original_bp_similarity(graph_a, graph_b, threshold):
    # Convert graphs to adjacency matrices
    adj_matrix_a = nx.adjacency_matrix(graph_a).toarray()
    adj_matrix_b = nx.adjacency_matrix(graph_b).toarray()

    # Apply backbone pruning to adjacency matrices
    pruned_matrix_a = backbone_pruning(adj_matrix_a, threshold)
    pruned_matrix_b = backbone_pruning(adj_matrix_b, threshold)

    # Calculate similarity score based on the Jaccard index of pruned matrices
    intersection = np.logical_and(pruned_matrix_a, pruned_matrix_b)
    union = np.logical_or(pruned_matrix_a, pruned_matrix_b)
    similarity_score = np.sum(intersection) / np.sum(union)

    return similarity_score

In [55]:
def graph_similarity_score(graph_a, graph_b):
    # Convert graphs to adjacency matrices
    adj_matrix_a = nx.to_numpy_array(graph_a)
    adj_matrix_b = nx.to_numpy_array(graph_b)

    # Flatten the adjacency matrices to obtain feature vectors
    vector_a = adj_matrix_a.flatten()
    vector_b = adj_matrix_b.flatten()

    # Reshape feature vectors to be 2D arrays for cosine similarity calculation
    vector_a = np.reshape(vector_a, (1, -1))
    vector_b = np.reshape(vector_b, (1, -1))

    # Calculate cosine similarity between the feature vectors
    similarity_score = cosine_similarity(vector_a, vector_b)[0, 0]

    return similarity_score

# Example usage
# Create two graphs using NetworkX
graph_a = nx.erdos_renyi_graph(30,0.6)
graph_a.add_edges_from([(1, 2), (2, 3)])

graph_b = nx.erdos_renyi_graph(31,0.6)
graph_b.add_edges_from([(1, 2), (2, 4)])

# Calculate the similarity score between the two graphs using cosine similarity
similarity_score = graph_similarity_score(graph_a, graph_b)
print("Similarity Score:", similarity_score)

ValueError: Incompatible dimension for X and Y matrices: X.shape[1] == 900 while Y.shape[1] == 961