In [1]:
import numpy as np
import networkx as nx
import kmapper as km
import sklearn
import warnings
import matplotlib.pyplot as plt
import signal
import time
import random
from networkx.algorithms.similarity import graph_edit_distance

warnings.filterwarnings("ignore")

In [2]:
def select_k(spectrum, minimum_energy = 0.9):
    running_total = 0.0 
    
    total = sum(spectrum)
    if total == 0.0:
        return len(spectrum)
    for i in range(len(spectrum)):
        running_total += spectrum[i]
        if running_total / total >= minimum_energy:
            return i + 1
    return len(spectrum)

In [3]:
def calculate_similarity(graph1,graph2):
    laplacian1 = nx.spectrum.laplacian_spectrum(graph1)
    laplacian2 = nx.spectrum.laplacian_spectrum(graph2)
    
    k1 = select_k(laplacian1)
    k2 = select_k(laplacian2)
    k = min(k1, k2) #k are different between the two graphs, then use the smaller one.
    similarity = sum((laplacian1[:k] - laplacian2[:k])**2) #sum of the squared differences between the largest k eigenvalues
    return similarity

In [4]:
def extract_graph_features(graph):
    pr = nx.pagerank(graph,0.9)
    dc = nx.degree_centrality(graph)
    cc = nx.closeness_centrality(graph)
    bx = nx.betweenness_centrality(graph)
    c = nx.clustering(graph)
    
    #create list for each features
    pr_list =  [i for i in pr.values()]
    dc_list =  [i for i in dc.values()]
    cc_list =  [i for i in cc.values()]
    bx_list =  [i for i in bx.values()]
    c_list =  [i for i in c.values()]
    d_list = [val for (node, val) in graph.degree()]
    data = np.column_stack((pr_list,dc_list,cc_list,bx_list,c_list,d_list))
    return data

In [5]:
def TDA_transformation(data):
    Xfilt = data
    mapper = km.KeplerMapper()
    scaler = sklearn.preprocessing.MinMaxScaler(feature_range=(0, 1))
    Xfilt = scaler.fit_transform(Xfilt)
    lens = mapper.fit_transform(Xfilt, projection=sklearn.manifold.TSNE())
    cls = 5  # We use cls= 5

    graph = mapper.map(lens,Xfilt,clusterer=sklearn.cluster.KMeans(n_clusters=cls,random_state=1618033),
        cover=km.Cover(n_cubes=2, perc_overlap=0.3))
    return km.to_nx(graph)

In [6]:
def add_node_to_graph(graph,p):
    new_node = graph.number_of_nodes() + 1
    graph.add_node(new_node)
    existing_nodes = list(graph.nodes())[:-1]  # Exclude the new node
    for existing_node in existing_nodes:
        if random.random() < p:  
            graph.add_edge(new_node, existing_node)
    return graph

In [7]:
def remove_least_degree_node(graph):
    degrees = graph.degree()
    min_degree_node = min(degrees, key=lambda x: x[1])[0]
    # Remove the node with the minimum degree
    graph.remove_node(min_degree_node)
    return graph

In [8]:
def random_add_new_edge(graph):
    node_num = graph.number_of_nodes()
    node1 = random.randint(1,node_num)
    node2 = random.randint(1,node_num)
    while graph.has_edge(node1,node2) or node1 == node2 :
        node1 = random.randint(1,node_num)
        node2 = random.randint(1,node_num)
    graph.add_edge(node1,node2)
    return graph

In [9]:
def random_remove_edge(graph):
    random_edge = random.choice(list(graph.edges()))
    # Remove the randomly selected edge
    graph.remove_edge(*random_edge)
    return graph

In [10]:
def graph_generator(original_graph, node, edge,p):
    new_graph = original_graph.copy()
    if node < 0:
        for i in range(abs(node)):
            new_graph = remove_least_degree_node(new_graph)
    elif node > 0:
        for i in range(abs(node)):
            new_graph = add_node_to_graph(new_graph,p)
    
    if edge < 0:
        for i in range(abs(edge)):
            new_graph = random_remove_edge(new_graph)
    elif edge >0:
        for i in range(abs(edge)):
            new_graph = random_add_new_edge(new_graph)
    return new_graph

In [11]:
def calc_original_average_similarity_for_hop(graph, hop,p):
    counter = 0
    sum = 0
    for i in range(-hop,hop + 1):
        for j in range(-hop,hop + 1):
            if (i == -hop or i == hop or j == -hop or j == hop):
                neighbour = graph_generator(graph,j,i,p)
                score = calculate_similarity(graph, neighbour)
                sum += score
                counter += 1
    return sum/counter

In [12]:
def calc_TDA_average_similarity_for_hop(graph, hop,p):
    counter = 0
    sum = 0
    for i in range(-hop,hop + 1):
        for j in range(-hop,hop + 1):
            if (i == -hop or i == hop or j == -hop or j == hop):
                neighbour = graph_generator(graph,j,i,p)
                TDA_graph = TDA_transformation(extract_graph_features(graph))
                TDA_neighbour = TDA_transformation(extract_graph_features(neighbour))
                score = calculate_similarity(TDA_graph, TDA_neighbour)
                sum += score
                counter += 1
    return sum/counter

In [13]:
def calc_both_average_similarity_for_hop(graph, hop,p):
    counter = 0
    sum_original = 0
    sum_TDA = 0
    for i in range(-hop,hop + 1):
        for j in range(-hop,hop + 1):
            if (i == -hop or i == hop or j == -hop or j == hop):
                neighbour = graph_generator(graph,j,i,p)
                TDA_graph = TDA_transformation(extract_graph_features(graph))
                TDA_neighbour = TDA_transformation(extract_graph_features(neighbour))
                
                score_TDA = calculate_similarity(TDA_graph, TDA_neighbour)
                score_original = calculate_similarity(graph, neighbour)
                
                sum_TDA += score_TDA
                sum_original += score_original
                counter += 1
    return {"TDA": sum_TDA/counter,"original": sum_original/counter}

In [14]:
def calc_hop_v1():
    threshold = 50
    n = random.randint(30,50)
    reach_max = False
    
    p = round(random.uniform(60, 100))/100
#     n = 30
#     p = 0.6
    reference_graph = nx.erdos_renyi_graph(n,p)
    
    TDA_average = 0
    original_average = 0
    
    TDA_hop = 0
    original_hop = 0
    while (TDA_average <= threshold or original_average <= threshold) and TDA_hop < 15:
        if(TDA_average <= threshold and original_average <= threshold):
            TDA_hop += 1
            original_hop += 1
            result = calc_both_average_similarity_for_hop(reference_graph,TDA_hop ,p)
            TDA_average = result['TDA']
            original_average = result['original']
            print(TDA_average,original_average)
        elif(TDA_average <= threshold):
            TDA_hop += 1
            TDA_average = calc_TDA_average_similarity_for_hop(reference_graph,TDA_hop,p)
            print(TDA_average)
        elif(original_average <= threshold):
            original_hop += 1
            original_average = calc_original_average_similarity_for_hop(reference_graph,original_hop,p)
            

    if(reach_max):
        return {"Threshold": threshold, "n":n,"p":p,"last_TDA_average": TDA_average, "last_original_average": original_average,
               "TDA_hop": TDA_hop + "+","orginal_hop":original_hop}
    else:
        return {"Threshold": threshold, "n":n,"p":p,"last_TDA_average": TDA_average, "last_original_average": original_average,
               "TDA_hop": TDA_hop,"orginal_hop":original_hop}
#     return {"TDA_hop":TDA_hop,"orginnal":original_hop}

In [18]:
def calc_average_for_hop(hop,ref_graph,n,p,delta_n,delta_p):
    sum_original = 0
    sum_TDA = 0
    counter = 0
    for i in range(-hop,hop + 1):
        for j in range(-hop,hop + 1):
            if (i == -hop or i == hop or j == -hop or j == hop):
                neighbour = nx.erdos_renyi_graph(i*delta_n + n,j*delta_p + p)
                
                TDA_graph = TDA_transformation(extract_graph_features(ref_graph))
                TDA_neighbour = TDA_transformation(extract_graph_features(neighbour))
                
                score_TDA = calculate_similarity(TDA_graph, TDA_neighbour)
                score_original = calculate_similarity(ref_graph, neighbour)
                
                sum_TDA += score_TDA
                sum_original += score_original
                counter += 1
    return {"hop:" : hop,"TDA": sum_TDA/counter,"original": sum_original/counter}
                

In [19]:
G = nx.erdos_renyi_graph(30,0.3)

result = calc_average_for_hop(hop = 1,ref_graph = G,n = 30,p = 0.3,delta_n = 2,delta_p = 0.05)
result

{'hop:': 1, 'TDA': 9.507347129771286, 'original': 77.89334391468418}

In [17]:
result = calc_average_for_hop(hop = 2,ref_graph = G,n = 30,p = 0.3,delta_n = 2,delta_p = 0.05)
result

{'TDA': 12.272861381934323, 'original': 228.77719987633557}

In [26]:
def conduct_test_for_on_graph(graph,n,p,delta_n,delta_p,n_hop):
    result = [dict() for x in range(n_hop)]
    for i in range(n_hop):
        result[i] = calc_average_for_hop(hop =i+1,ref_graph = graph,n = n,p = p,delta_n = delta_n,delta_p = delta_p)
    return {"n" : n,"p" : p,"Result" : result}
    

In [27]:
result = conduct_test_for_on_graph(G,30,0.3,2,0.05,4)
result

{'n': 30,
 'p': 0.3,
 'Result': [{'hop:': 1,
   'TDA': 12.594334504078214,
   'original': 71.70064664422883},
  {'hop:': 2, 'TDA': 14.758963670114408, 'original': 198.58170262805848},
  {'hop:': 3, 'TDA': 15.383603484941808, 'original': 348.9553165645368},
  {'hop:': 4, 'TDA': 11.349649011312883, 'original': 620.2731672598416}]}

In [None]:
def conduct_test_on_sample(sample_numbers):
    for i in range(sample_numbers):
        n = random.randint(30,50)       
        p = round(random.uniform(5, 40))/100
        reference_graph = nx.erdos_renyi_graph(n,p)
        result_list[i] = conduct_test_for_on_graph(reference_graph,n,p,2,0.05,4)