In [2]:
import warnings
warnings.filterwarnings("ignore", message="IProgress not found. Please update jupyter and ipywidgets.")
import networkx as nx
from networkx import to_numpy_array
from node2vec import Node2Vec
import numpy as np
from sklearn.cluster import KMeans
from sknetwork.data import karate_club
from sknetwork.clustering import get_modularity
from datetime import datetime
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import normalize
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.decomposition import TruncatedSVD
from scipy.sparse import csr_matrix

# Initial defenitions

In [2]:
# Creating graphs from data

graph_karate = nx.karate_club_graph()

graph_dolphin = nx.Graph()

with open('soc-dolphins.txt', 'r') as file:
    for line in file:
        node1, node2 = line.strip().split()
        graph_dolphin.add_edge(node1, node2)

graph_football = nx.Graph()

with open('American_football.txt', 'r') as file:
    for line in file:
        node1, node2 = line.strip().split()
        graph_football.add_edge(node1, node2)


graph_book = nx.Graph()

with open('book.txt', 'r') as file:
    for line in file:
        node1, node2 = line.strip().split()
        graph_book.add_edge(node1, node2)



In [3]:
# Calculating adjancency and position
def get_adjacency_and_position(graph):
  adjacency = csr_matrix(nx.to_numpy_array(graph))
  position_dict = nx.spring_layout(graph)
  position = np.array(list(position_dict.values()))
  return adjacency, position


def get_node_array_without_embedding(graph):
  '''
  Convert graph to array without embedding
  '''
  node_array = to_numpy_array(graph)
  return node_array


# Node2vec
def get_embedded_node_array_using_node2vec_weighted(graph):

    model = Node2Vec(graph, dimensions=64, walk_length=80, num_walks=100, p=0.5, q=1.0, weight_key='weight')
    model = model.fit(window=10, min_count=1)

    embeddings = {node: model.wv[str(node)] for node in graph.nodes()}
    embedded_node_array = np.array(list(embeddings.values()))

    norm = np.linalg.norm(embedded_node_array, axis=1, keepdims=True)

    embedded_node_array = embedded_node_array / norm
    return embedded_node_array

# HOPE
def get_hope_embedding(graph, d=32):

    adjacency_matrix = nx.to_numpy_array(graph)
    identity_matrix = np.eye(adjacency_matrix.shape[0])

    katz_matrix = np.linalg.inv(identity_matrix - 0.5 * adjacency_matrix) - identity_matrix
    katz_matrix = np.asarray(katz_matrix)  

    svd = TruncatedSVD(n_components=d)
    U = svd.fit_transform(katz_matrix)
    S = svd.singular_values_
    V = svd.components_.T

    hope_embedding = np.concatenate((U * np.sqrt(S), V * np.sqrt(S)), axis=1)

    norm = np.linalg.norm(hope_embedding, axis=1, keepdims=True)
    
    hope_embedding = hope_embedding / norm
    return hope_embedding

# SDNE
def train_sdne(graph, hidden_layers=[256, 64], epochs=100, learning_rate=0.01):    
    class SDNE(nn.Module):
        def __init__(self, input_dim, hidden_layers, alpha=1e-5, beta=5):
            super(SDNE, self).__init__()
            self.alpha = alpha
            self.beta = beta
            self.encoder = nn.Sequential(
                nn.Linear(input_dim, hidden_layers[0]),
                nn.ReLU(),
                nn.Linear(hidden_layers[0], hidden_layers[1])
            )
            self.decoder = nn.Sequential(
                nn.Linear(hidden_layers[1], hidden_layers[0]),
                nn.ReLU(),
                nn.Linear(hidden_layers[0], input_dim)
            )

        def forward(self, x):
            encoded = self.encoder(x)
            reconstructed = self.decoder(encoded)
            return encoded, reconstructed

        def loss(self, x, reconstructed, L):
            bce_loss = nn.BCEWithLogitsLoss()(reconstructed, x)
            L_loss = self.beta * torch.mean(torch.sum(L * (x - reconstructed) ** 2, dim=1))
            return bce_loss + self.alpha * L_loss

    def get_adjacency_matrix(graph):
        adjacency_matrix = nx.to_numpy_array(graph)
        return adjacency_matrix

    adjacency_matrix = get_adjacency_matrix(graph)
    L = torch.FloatTensor(adjacency_matrix)
    D = np.diag(np.sum(adjacency_matrix, axis=1))
    L = D - adjacency_matrix
    L = torch.FloatTensor(L)

    model = SDNE(adjacency_matrix.shape[1], hidden_layers)
    optimizer = optim.Adam(model.parameters(), lr=learning_rate)

    adjacency_matrix = torch.FloatTensor(adjacency_matrix)

    for epoch in range(epochs):
        model.train()
        optimizer.zero_grad()
        _, reconstructed = model(adjacency_matrix)
        loss = model.loss(adjacency_matrix, reconstructed, L)
        loss.backward()
        optimizer.step()

    model.eval()
    with torch.no_grad():
        embedding, _ = model(adjacency_matrix)

    embedding = embedding.numpy()
    norm = np.linalg.norm(embedding, axis=1, keepdims=True)
    embedding = embedding / norm
    return embedding

In [4]:
# Creating data array

def prepare_data_arrays(graph):
    node2vec = get_embedded_node_array_using_node2vec_weighted(graph)
    hope = get_hope_embedding(graph)
    sdne = train_sdne(graph)
    
    return {
        'node_array': get_node_array_without_embedding(graph),
        'embedded_node_array_node2vec': node2vec,
        'embedded_node_array_hope': hope,
        'embedded_node_array_sdne': sdne,
        'embedded_node_array_sdne+hope': np.add(sdne, hope),
        'embedded_node_array_sdne+node2vec': np.add(sdne, node2vec),
        'embedded_node_array_hope+node2vec': np.add(hope, node2vec),
        'embedded_node_array_summation': np.add(np.add(sdne, hope), node2vec),
        'embedded_node_array_sdne*hope': np.multiply(sdne, hope),
        'embedded_node_array_sdne*node2vec': np.multiply(sdne, node2vec),
        'embedded_node_array_hope*node2vec': np.multiply(hope, node2vec),
        'embedded_node_array_multiply': np.multiply(np.multiply(sdne, hope), node2vec),
        'embedded_node_array_sdneMINhope': np.minimum(sdne, hope),
        'embedded_node_array_sdneMINnode2vec': np.minimum(sdne, node2vec),
        'embedded_node_array_hopeMINnode2vec': np.minimum(hope, node2vec),
        'embedded_node_array_minimum': np.minimum(np.minimum(sdne, hope), node2vec),
        'embedded_node_array_sdneMAXhope': np.maximum(sdne, hope),
        'embedded_node_array_sdneMAXnode2vec': np.maximum(sdne, node2vec),
        'embedded_node_array_hopeMAXnode2vec': np.maximum(hope, node2vec),
        'embedded_node_array_maximum': np.maximum(np.maximum(sdne, hope), node2vec),
    }

In [5]:
# Calculating adjancency and position for each dataset

graph = karate_club(metadata=True)
adjacency_karate, position_karate = graph.adjacency, graph.position
adjacency_dolphin, position_dolphin = get_adjacency_and_position(graph_dolphin)
adjacency_football, position_football = get_adjacency_and_position(graph_football)
adjacency_book, position_book = get_adjacency_and_position(graph_book)

In [6]:
# K-means clustering

def perform_kmeans_clustering(data_array, adjacency, range_k=(2, 11), random_state=0):
    labels = []
    modularity = []
    work_log = []

    for k in range(*range_k):
        start_time = datetime.now()
        kmeans = KMeans(n_clusters=k, random_state=random_state)
        kmeans.fit(data_array)
        end_time = datetime.now()

        labels.append(kmeans.labels_)
        modularity.append(get_modularity(adjacency, kmeans.labels_))
        work_log.append(end_time - start_time)

    best_k = modularity.index(max(modularity)) + range_k[0]
    best_labels = labels[best_k - range_k[0]]
    best_modularity = modularity[best_k - range_k[0]]
    best_work_log = work_log[best_k - range_k[0]]

    # print(f"Best value of k: {best_k}")
    return best_labels, best_modularity, best_work_log

def cluster_all_data_arrays(data_arrays, adjacency):
    labels_dict = {}
    modularity_dict = {}
    work_log_dict = {}

    for array_type, data_array in data_arrays.items():
        # print(f"Clustering for {array_type}...")
        best_labels, best_modularity, best_work_log = perform_kmeans_clustering(data_array, adjacency)
        
        labels_dict[array_type] = best_labels
        modularity_dict[array_type] = best_modularity
        work_log_dict[array_type] = best_work_log
    
    return labels_dict, modularity_dict, work_log_dict


# Jaccard

In [7]:
# Calculate Jaccard Coefficient for pairs of nodes connected by an edge

def jaccard_sim(original_graph):

    jaccard_graph = original_graph.copy()

    jaccard = []
    for u, v in original_graph.edges():
        jaccard.extend(nx.jaccard_coefficient(original_graph, [(u, v)]))

    # Normalize the scores
    scaler = MinMaxScaler()
    normalized_jaccard = scaler.fit_transform([[s] for _, _, s in jaccard])

    for (u, v, s), norm_s in zip(jaccard, normalized_jaccard):
        if jaccard_graph.has_edge(u, v):
            if norm_s[0] == 0 :
                jaccard_graph[u][v]['weight'] =  0.0000000000000001
            else:
                jaccard_graph[u][v]['weight'] = norm_s[0]

    print("Jaccard Coefficient Weighted Graph")
    print(jaccard_graph.edges(data=True))
    return(jaccard_graph)

In [8]:
graph_karate_jaccard = jaccard_sim(graph_karate)
graph_dolphin_jaccard = jaccard_sim(graph_dolphin)
graph_football_jaccard = jaccard_sim(graph_football)
graph_book_jaccard = jaccard_sim(graph_book)

Jaccard Coefficient Weighted Graph
[(0, 1, {'weight': 0.7388888888888889}), (0, 2, {'weight': 0.4523809523809524}), (0, 3, {'weight': 0.5588235294117647}), (0, 4, {'weight': 0.2235294117647059}), (0, 5, {'weight': 0.2111111111111111}), (0, 6, {'weight': 0.2111111111111111}), (0, 7, {'weight': 0.33529411764705885}), (0, 8, {'weight': 0.09500000000000001}), (0, 10, {'weight': 0.2235294117647059}), (0, 11, {'weight': 1e-16}), (0, 12, {'weight': 0.11176470588235295}), (0, 13, {'weight': 0.31666666666666665}), (0, 17, {'weight': 0.11176470588235295}), (0, 19, {'weight': 0.10555555555555556}), (0, 21, {'weight': 0.11176470588235295}), (0, 31, {'weight': 1e-16}), (1, 2, {'weight': 0.5066666666666667}), (1, 3, {'weight': 0.690909090909091}), (1, 7, {'weight': 0.5700000000000001}), (1, 13, {'weight': 0.5181818181818182}), (1, 17, {'weight': 0.19000000000000003}), (1, 19, {'weight': 0.17272727272727276}), (1, 21, {'weight': 0.19000000000000003}), (1, 30, {'weight': 1e-16}), (2, 3, {'weight': 0.6

In [9]:
# Creating data array for each dataset

karate_club_data_array_jaccard = prepare_data_arrays(graph_karate_jaccard)
dolphin_data_array_jaccard = prepare_data_arrays(graph_dolphin_jaccard)
football_data_array_jaccard = prepare_data_arrays(graph_football_jaccard)
book_data_array_jaccard = prepare_data_arrays(graph_book_jaccard)

Computing transition probabilities: 100%|██████████| 34/34 [00:00<00:00, 6813.82it/s]
Generating walks (CPU: 1): 100%|██████████| 100/100 [00:01<00:00, 93.00it/s]
Computing transition probabilities: 100%|██████████| 62/62 [00:00<00:00, 5954.14it/s]
Generating walks (CPU: 1): 100%|██████████| 100/100 [00:01<00:00, 50.47it/s]
Computing transition probabilities: 100%|██████████| 115/115 [00:00<00:00, 2127.70it/s]
Generating walks (CPU: 1): 100%|██████████| 100/100 [00:04<00:00, 24.93it/s]
Computing transition probabilities: 100%|██████████| 105/105 [00:00<00:00, 1968.45it/s]
Generating walks (CPU: 1): 100%|██████████| 100/100 [00:03<00:00, 26.75it/s]


In [10]:
# Performing K-means

kmeans_labels_dict_karate_jaccard, kmeans_modularity_karate_jaccard, kmeans_work_log_karate_jaccard = cluster_all_data_arrays(karate_club_data_array_jaccard, adjacency_karate)
kmeans_labels_dict_dolphin_jaccard, kmeans_modularity_dolphin_jaccard, kmeans_work_log_dolphin_jaccard = cluster_all_data_arrays(dolphin_data_array_jaccard, adjacency_dolphin)
kmeans_labels_dict_football_jaccard, kmeans_modularity_football_jaccard, kmeans_work_log_football_jaccard = cluster_all_data_arrays(football_data_array_jaccard, adjacency_football)
kmeans_labels_dict_book_jaccard, kmeans_modularity_book_jaccard, kmeans_work_log_book_jaccard = cluster_all_data_arrays(book_data_array_jaccard, adjacency_book)

In [11]:
kmeans_modularity_karate_jaccard

{'node_array': 0.17225509533201833,
 'embedded_node_array_node2vec': 0.4151051939513479,
 'embedded_node_array_hope': 0.39907955292570685,
 'embedded_node_array_sdne': 0.2807363576594346,
 'embedded_node_array_sdne+hope': 0.3845332018408941,
 'embedded_node_array_sdne+node2vec': 0.4151051939513479,
 'embedded_node_array_hope+node2vec': 0.4101742274819199,
 'embedded_node_array_summation': 0.39907955292570674,
 'embedded_node_array_sdne*hope': 0.3869986850756081,
 'embedded_node_array_sdne*node2vec': 0.4151051939513479,
 'embedded_node_array_hope*node2vec': 0.26109467455621305,
 'embedded_node_array_multiply': 0.09557856673241288,
 'embedded_node_array_sdneMINhope': 0.39949046679815914,
 'embedded_node_array_sdneMINnode2vec': 0.4058185404339251,
 'embedded_node_array_hopeMINnode2vec': 0.389792899408284,
 'embedded_node_array_minimum': 0.389792899408284,
 'embedded_node_array_sdneMAXhope': 0.40203813280736356,
 'embedded_node_array_sdneMAXnode2vec': 0.4151051939513479,
 'embedded_node_ar

In [12]:
kmeans_modularity_dolphin_jaccard

{'node_array': 0.2955381511807286,
 'embedded_node_array_node2vec': 0.5267987816937623,
 'embedded_node_array_hope': 0.46064237965270366,
 'embedded_node_array_sdne': 0.31270519362367,
 'embedded_node_array_sdne+hope': 0.49715201139195453,
 'embedded_node_array_sdne+node2vec': 0.5220125786163523,
 'embedded_node_array_hope+node2vec': 0.4992880028479886,
 'embedded_node_array_summation': 0.47747320121830616,
 'embedded_node_array_sdne*hope': 0.4641232546180926,
 'embedded_node_array_sdne*node2vec': 0.5173648194296112,
 'embedded_node_array_hope*node2vec': 0.30823543372493173,
 'embedded_node_array_multiply': 0.24846722835330887,
 'embedded_node_array_sdneMINhope': 0.5002373323840039,
 'embedded_node_array_sdneMINnode2vec': 0.5160594913175902,
 'embedded_node_array_hopeMINnode2vec': 0.4811122977730311,
 'embedded_node_array_minimum': 0.4951149084292552,
 'embedded_node_array_sdneMAXhope': 0.49715201139195453,
 'embedded_node_array_sdneMAXnode2vec': 0.4805980776076896,
 'embedded_node_arr

In [13]:
kmeans_modularity_football_jaccard

{'node_array': 0.5634751669243605,
 'embedded_node_array_node2vec': 0.6028717110778165,
 'embedded_node_array_hope': 0.509171858242697,
 'embedded_node_array_sdne': 0.5975625982984227,
 'embedded_node_array_sdne+hope': 0.5189092234857053,
 'embedded_node_array_sdne+node2vec': 0.6002876767375702,
 'embedded_node_array_hope+node2vec': 0.5293544704326327,
 'embedded_node_array_summation': 0.5241770875191939,
 'embedded_node_array_sdne*hope': 0.38946799762620143,
 'embedded_node_array_sdne*node2vec': 0.5978154131926796,
 'embedded_node_array_hope*node2vec': 0.3984908281417575,
 'embedded_node_array_multiply': 0.3218679561113346,
 'embedded_node_array_sdneMINhope': 0.589429942331592,
 'embedded_node_array_sdneMINnode2vec': 0.6021452009080046,
 'embedded_node_array_hopeMINnode2vec': 0.524612195258257,
 'embedded_node_array_minimum': 0.5778084408240168,
 'embedded_node_array_sdneMAXhope': 0.4989860792135594,
 'embedded_node_array_sdneMAXnode2vec': 0.5999177686291313,
 'embedded_node_array_hop

In [14]:
kmeans_modularity_book_jaccard

{'node_array': 0.23507694839084542,
 'embedded_node_array_node2vec': 0.5265681480453103,
 'embedded_node_array_hope': 0.4381070644433133,
 'embedded_node_array_sdne': 0.4718147274026768,
 'embedded_node_array_sdne+hope': 0.43915086820820554,
 'embedded_node_array_sdne+node2vec': 0.5221718316956412,
 'embedded_node_array_hope+node2vec': 0.45409063096137936,
 'embedded_node_array_summation': 0.4584278155706728,
 'embedded_node_array_sdne*hope': 0.4051449756017298,
 'embedded_node_array_sdne*node2vec': 0.5191946771149879,
 'embedded_node_array_hope*node2vec': 0.31434947372751065,
 'embedded_node_array_multiply': 0.31830101655174536,
 'embedded_node_array_sdneMINhope': 0.4494655004859087,
 'embedded_node_array_sdneMINnode2vec': 0.5251952632905013,
 'embedded_node_array_hopeMINnode2vec': 0.4705318257310483,
 'embedded_node_array_minimum': 0.47360667623058295,
 'embedded_node_array_sdneMAXhope': 0.4319162283205042,
 'embedded_node_array_sdneMAXnode2vec': 0.503051711992431,
 'embedded_node_ar

# Adamic-Adar

In [15]:
# Calculate Adamic-Adar Coefficient for pairs of nodes connected by an edge

def adamic_adar_sim(original_graph):
    adamic_adar_graph = original_graph.copy()

    adamic_adar = []
    for u, v in original_graph.edges():
        similarity = nx.adamic_adar_index(original_graph, [(u, v)])
        adamic_adar.append((u, v, list(similarity)[0][2]))

    # Normalize the scores
    scaler = MinMaxScaler()
    normalized_adamic_adar = scaler.fit_transform([[s] for _, _, s in adamic_adar])

    for (u, v, s), norm_s in zip(adamic_adar, normalized_adamic_adar):
        if norm_s[0] == 0 :
            adamic_adar_graph[u][v]['weight'] =  0.0000000000000001
        else:
            adamic_adar_graph[u][v]['weight'] = norm_s[0]

    print("Adamic-Adar Index Weighted Graph")
    print(adamic_adar_graph.edges(data=True))

    return(adamic_adar_graph)

In [16]:
graph_karate_adamic_adar = adamic_adar_sim(graph_karate)
graph_dolphin_adamic_adar = adamic_adar_sim(graph_dolphin)
graph_football_adamic_adar = adamic_adar_sim(graph_football)
graph_book_adamic_adar = adamic_adar_sim(graph_book)

Adamic-Adar Index Weighted Graph
[(0, 1, {'weight': 0.5862815101369199}), (0, 2, {'weight': 0.284714703470282}), (0, 3, {'weight': 0.3514209526396614}), (0, 4, {'weight': 0.1560289215739986}), (0, 5, {'weight': 0.1560289215739986}), (0, 6, {'weight': 0.1560289215739986}), (0, 7, {'weight': 0.13842703839961878}), (0, 8, {'weight': 0.0415316560878763}), (0, 10, {'weight': 0.1560289215739986}), (0, 11, {'weight': 1e-16}), (0, 12, {'weight': 0.05337221532112219}), (0, 13, {'weight': 0.13842703839961878}), (0, 17, {'weight': 0.04352316699062029}), (0, 19, {'weight': 0.04352316699062029}), (0, 21, {'weight': 0.04352316699062029}), (0, 31, {'weight': 1e-16}), (1, 2, {'weight': 0.21626446349314998}), (1, 3, {'weight': 0.2044239042599041}), (1, 7, {'weight': 0.1293951652053775}), (1, 13, {'weight': 0.1293951652053775}), (1, 17, {'weight': 0.03449129379637901}), (1, 19, {'weight': 0.03449129379637901}), (1, 21, {'weight': 0.03449129379637901}), (1, 30, {'weight': 1e-16}), (2, 3, {'weight': 0.206

In [17]:
# Creating data array for each dataset

karate_club_data_array_adamic_adar = prepare_data_arrays(graph_karate_adamic_adar)
dolphin_data_array_adamic_adar = prepare_data_arrays(graph_dolphin_adamic_adar)
football_data_array_adamic_adar = prepare_data_arrays(graph_football_adamic_adar)
book_data_array_adamic_adar = prepare_data_arrays(graph_book_adamic_adar)

Computing transition probabilities: 100%|██████████| 34/34 [00:00<00:00, 6797.58it/s]
Generating walks (CPU: 1): 100%|██████████| 100/100 [00:01<00:00, 83.64it/s]
Computing transition probabilities: 100%|██████████| 62/62 [00:00<00:00, 5809.54it/s]
Generating walks (CPU: 1): 100%|██████████| 100/100 [00:02<00:00, 49.88it/s]
Computing transition probabilities: 100%|██████████| 115/115 [00:00<00:00, 2130.08it/s]
Generating walks (CPU: 1): 100%|██████████| 100/100 [00:03<00:00, 25.16it/s]
Computing transition probabilities: 100%|██████████| 105/105 [00:00<00:00, 2499.87it/s]
Generating walks (CPU: 1): 100%|██████████| 100/100 [00:03<00:00, 25.02it/s]


In [18]:
# Performing K-means

kmeans_labels_dict_karate_adamic_adar, kmeans_modularity_karate_adamic_adar, kmeans_work_log_karate_adamic_adar = cluster_all_data_arrays(karate_club_data_array_adamic_adar, adjacency_karate)
kmeans_labels_dict_dolphin_adamic_adar, kmeans_modularity_dolphin_adamic_adar, kmeans_work_log_dolphin_adamic_adar = cluster_all_data_arrays(dolphin_data_array_adamic_adar, adjacency_dolphin)
kmeans_labels_dict_football_adamic_adar, kmeans_modularity_football_adamic_adar, kmeans_work_log_football_adamic_adar = cluster_all_data_arrays(football_data_array_adamic_adar, adjacency_football)
kmeans_labels_dict_book_adamic_adar, kmeans_modularity_book_adamic_adar, kmeans_work_log_book_adamic_adar = cluster_all_data_arrays(book_data_array_adamic_adar, adjacency_book)

In [19]:
kmeans_modularity_karate_adamic_adar

{'node_array': -0.011834319526626946,
 'embedded_node_array_node2vec': 0.38798487836949386,
 'embedded_node_array_hope': 0.3433596318211703,
 'embedded_node_array_sdne': 0.1535174227481917,
 'embedded_node_array_sdne+hope': 0.3458251150558842,
 'embedded_node_array_sdne+node2vec': 0.38535502958579887,
 'embedded_node_array_hope+node2vec': 0.38798487836949375,
 'embedded_node_array_summation': 0.38798487836949375,
 'embedded_node_array_sdne*hope': 0.3560157790927021,
 'embedded_node_array_sdne*node2vec': 0.38535502958579887,
 'embedded_node_array_hope*node2vec': 0.29380341880341876,
 'embedded_node_array_multiply': 0.29380341880341876,
 'embedded_node_array_sdneMINhope': 0.2977481919789612,
 'embedded_node_array_sdneMINnode2vec': 0.38593030900723213,
 'embedded_node_array_hopeMINnode2vec': 0.3717948717948718,
 'embedded_node_array_minimum': 0.37146614069690986,
 'embedded_node_array_sdneMAXhope': 0.35100262984878366,
 'embedded_node_array_sdneMAXnode2vec': 0.38535502958579887,
 'embedde

In [20]:
kmeans_modularity_dolphin_adamic_adar

{'node_array': 0.27564178632174363,
 'embedded_node_array_node2vec': 0.5267987816937623,
 'embedded_node_array_hope': 0.5264625608164235,
 'embedded_node_array_sdne': 0.40267394485977615,
 'embedded_node_array_sdne+hope': 0.497745342351964,
 'embedded_node_array_sdne+node2vec': 0.4808156322930265,
 'embedded_node_array_hope+node2vec': 0.515426604960247,
 'embedded_node_array_summation': 0.5264625608164235,
 'embedded_node_array_sdne*hope': 0.4703532296981923,
 'embedded_node_array_sdne*node2vec': 0.5263241169257546,
 'embedded_node_array_hope*node2vec': 0.435722479332305,
 'embedded_node_array_multiply': 0.4455717732684625,
 'embedded_node_array_sdneMINhope': 0.45405640599659836,
 'embedded_node_array_sdneMINnode2vec': 0.5162968237015941,
 'embedded_node_array_hopeMINnode2vec': 0.5025117677307068,
 'embedded_node_array_minimum': 0.5062893081761006,
 'embedded_node_array_sdneMAXhope': 0.4257149638068114,
 'embedded_node_array_sdneMAXnode2vec': 0.47790831058897987,
 'embedded_node_array_

In [21]:
kmeans_modularity_football_adamic_adar

{'node_array': 0.5576125225870149,
 'embedded_node_array_node2vec': 0.6001572774763219,
 'embedded_node_array_hope': 0.43541510875032274,
 'embedded_node_array_sdne': 0.6044285185845559,
 'embedded_node_array_sdne+hope': 0.4766319201424279,
 'embedded_node_array_sdne+node2vec': 0.591721243636383,
 'embedded_node_array_hope+node2vec': 0.4733200450276633,
 'embedded_node_array_summation': 0.5036072693596332,
 'embedded_node_array_sdne*hope': 0.38480422812951576,
 'embedded_node_array_sdne*node2vec': 0.5941083484800502,
 'embedded_node_array_hope*node2vec': 0.3387639746759312,
 'embedded_node_array_multiply': 0.28680652209203017,
 'embedded_node_array_sdneMINhope': 0.42191080158288735,
 'embedded_node_array_sdneMINnode2vec': 0.5989890065439138,
 'embedded_node_array_hopeMINnode2vec': 0.4904262991359053,
 'embedded_node_array_minimum': 0.5010312186476267,
 'embedded_node_array_sdneMAXhope': 0.4705603708661438,
 'embedded_node_array_sdneMAXnode2vec': 0.5875870548129303,
 'embedded_node_arra

In [22]:
kmeans_modularity_book_adamic_adar

{'node_array': 0.11905276093808642,
 'embedded_node_array_node2vec': 0.5248841789172207,
 'embedded_node_array_hope': 0.45254806382114443,
 'embedded_node_array_sdne': 0.4768306415536734,
 'embedded_node_array_sdne+hope': 0.481309228150822,
 'embedded_node_array_sdne+node2vec': 0.5017482427589326,
 'embedded_node_array_hope+node2vec': 0.4647780502979726,
 'embedded_node_array_summation': 0.47432139900555853,
 'embedded_node_array_sdne*hope': 0.33858577444583265,
 'embedded_node_array_sdne*node2vec': 0.5220741357767595,
 'embedded_node_array_hope*node2vec': 0.30670091165717994,
 'embedded_node_array_multiply': 0.2943243813020294,
 'embedded_node_array_sdneMINhope': 0.445411119852325,
 'embedded_node_array_sdneMINnode2vec': 0.5017482427589326,
 'embedded_node_array_hopeMINnode2vec': 0.4617520477578788,
 'embedded_node_array_minimum': 0.4750258379995991,
 'embedded_node_array_sdneMAXhope': 0.4736118181210504,
 'embedded_node_array_sdneMAXnode2vec': 0.5240074865925205,
 'embedded_node_arra

# Preferential Attachment

In [23]:
# Create a copy for Preferential Attachment

def preferential_sim(original_graph):
    preferential_graph = original_graph.copy()

    preferential_attachment = []
    for u, v in original_graph.edges():
        pa_score = nx.preferential_attachment(original_graph, [(u, v)])
        preferential_attachment.append((u, v, list(pa_score)[0][2]))

    # Normalize the scores
    scaler = MinMaxScaler()
    normalized_preferential_attachment = scaler.fit_transform([[s] for _, _, s in preferential_attachment])

    for (u, v, s), norm_s in zip(preferential_attachment, normalized_preferential_attachment):
        if preferential_graph.has_edge(u, v):
            if norm_s[0] == 0 :
                preferential_graph[u][v]['weight'] =  0.0000000000000001
            else:
                preferential_graph[u][v]['weight'] = norm_s[0]

    print("Preferential Attachment Weighted Graph")
    print(preferential_graph.edges(data=True))

    return(preferential_graph)


In [24]:
graph_karate_preferential = preferential_sim(graph_karate)
graph_dolphin_preferential = preferential_sim(graph_dolphin)
graph_football_preferential = preferential_sim(graph_football)
graph_book_preferential = preferential_sim(graph_book)

Preferential Attachment Weighted Graph
[(0, 1, {'weight': 0.693877551020408}), (0, 2, {'weight': 0.7755102040816325}), (0, 3, {'weight': 0.4489795918367347}), (0, 4, {'weight': 0.20408163265306123}), (0, 5, {'weight': 0.2857142857142857}), (0, 6, {'weight': 0.2857142857142857}), (0, 7, {'weight': 0.2857142857142857}), (0, 8, {'weight': 0.36734693877551017}), (0, 10, {'weight': 0.20408163265306123}), (0, 11, {'weight': 0.04081632653061224}), (0, 12, {'weight': 0.12244897959183673}), (0, 13, {'weight': 0.36734693877551017}), (0, 17, {'weight': 0.12244897959183673}), (0, 19, {'weight': 0.20408163265306123}), (0, 21, {'weight': 0.12244897959183673}), (0, 31, {'weight': 0.4489795918367347}), (1, 2, {'weight': 0.4183673469387755}), (1, 3, {'weight': 0.2346938775510204}), (1, 7, {'weight': 0.14285714285714285}), (1, 13, {'weight': 0.18877551020408162}), (1, 17, {'weight': 0.0510204081632653}), (1, 19, {'weight': 0.09693877551020408}), (1, 21, {'weight': 0.0510204081632653}), (1, 30, {'weight'

In [25]:
# Creating data array for each dataset

karate_club_data_array_preferential = prepare_data_arrays(graph_karate_preferential)
dolphin_data_array_preferential = prepare_data_arrays(graph_dolphin_preferential)
football_data_array_preferential = prepare_data_arrays(graph_football_preferential)
book_data_array_preferential = prepare_data_arrays(graph_book_preferential)

Computing transition probabilities: 100%|██████████| 34/34 [00:00<00:00, 6808.94it/s]
Generating walks (CPU: 1): 100%|██████████| 100/100 [00:01<00:00, 84.11it/s]
Computing transition probabilities: 100%|██████████| 62/62 [00:00<00:00, 5633.48it/s]
Generating walks (CPU: 1): 100%|██████████| 100/100 [00:02<00:00, 49.24it/s]
Computing transition probabilities: 100%|██████████| 115/115 [00:00<00:00, 2034.91it/s]
Generating walks (CPU: 1): 100%|██████████| 100/100 [00:04<00:00, 24.89it/s]
Computing transition probabilities: 100%|██████████| 105/105 [00:00<00:00, 2333.55it/s]
Generating walks (CPU: 1): 100%|██████████| 100/100 [00:04<00:00, 24.70it/s]


In [26]:
# Performing K-means

kmeans_labels_dict_karate_preferential, kmeans_modularity_karate_preferential, kmeans_work_log_karate_preferential = cluster_all_data_arrays(karate_club_data_array_preferential, adjacency_karate)
kmeans_labels_dict_dolphin_preferential, kmeans_modularity_dolphin_preferential, kmeans_work_log_dolphin_preferential = cluster_all_data_arrays(dolphin_data_array_preferential, adjacency_dolphin)
kmeans_labels_dict_football_preferential, kmeans_modularity_football_preferential, kmeans_work_log_football_preferential = cluster_all_data_arrays(football_data_array_preferential, adjacency_football)
kmeans_labels_dict_book_preferential, kmeans_modularity_book_preferential, kmeans_work_log_book_preferential = cluster_all_data_arrays(book_data_array_preferential, adjacency_book)

In [27]:
kmeans_modularity_karate_preferential

{'node_array': 0.0367357001972386,
 'embedded_node_array_node2vec': 0.3744247205785668,
 'embedded_node_array_hope': 0.3579881656804733,
 'embedded_node_array_sdne': 0.11974030243261014,
 'embedded_node_array_sdne+hope': 0.3579881656804733,
 'embedded_node_array_sdne+node2vec': 0.37146614069690986,
 'embedded_node_array_hope+node2vec': 0.37146614069690986,
 'embedded_node_array_summation': 0.37146614069690986,
 'embedded_node_array_sdne*hope': 0.15170940170940173,
 'embedded_node_array_sdne*node2vec': 0.37146614069690986,
 'embedded_node_array_hope*node2vec': 0.169543063773833,
 'embedded_node_array_multiply': 0.059993425378040754,
 'embedded_node_array_sdneMINhope': 0.3445923734385273,
 'embedded_node_array_sdneMINnode2vec': 0.37442472057856685,
 'embedded_node_array_hopeMINnode2vec': 0.3609467455621303,
 'embedded_node_array_minimum': 0.35691978961209725,
 'embedded_node_array_sdneMAXhope': 0.3579881656804733,
 'embedded_node_array_sdneMAXnode2vec': 0.35691978961209725,
 'embedded_no

In [28]:
kmeans_modularity_dolphin_adamic_adar

{'node_array': 0.27564178632174363,
 'embedded_node_array_node2vec': 0.5267987816937623,
 'embedded_node_array_hope': 0.5264625608164235,
 'embedded_node_array_sdne': 0.40267394485977615,
 'embedded_node_array_sdne+hope': 0.497745342351964,
 'embedded_node_array_sdne+node2vec': 0.4808156322930265,
 'embedded_node_array_hope+node2vec': 0.515426604960247,
 'embedded_node_array_summation': 0.5264625608164235,
 'embedded_node_array_sdne*hope': 0.4703532296981923,
 'embedded_node_array_sdne*node2vec': 0.5263241169257546,
 'embedded_node_array_hope*node2vec': 0.435722479332305,
 'embedded_node_array_multiply': 0.4455717732684625,
 'embedded_node_array_sdneMINhope': 0.45405640599659836,
 'embedded_node_array_sdneMINnode2vec': 0.5162968237015941,
 'embedded_node_array_hopeMINnode2vec': 0.5025117677307068,
 'embedded_node_array_minimum': 0.5062893081761006,
 'embedded_node_array_sdneMAXhope': 0.4257149638068114,
 'embedded_node_array_sdneMAXnode2vec': 0.47790831058897987,
 'embedded_node_array_

In [29]:
kmeans_modularity_football_adamic_adar

{'node_array': 0.5576125225870149,
 'embedded_node_array_node2vec': 0.6001572774763219,
 'embedded_node_array_hope': 0.43541510875032274,
 'embedded_node_array_sdne': 0.6044285185845559,
 'embedded_node_array_sdne+hope': 0.4766319201424279,
 'embedded_node_array_sdne+node2vec': 0.591721243636383,
 'embedded_node_array_hope+node2vec': 0.4733200450276633,
 'embedded_node_array_summation': 0.5036072693596332,
 'embedded_node_array_sdne*hope': 0.38480422812951576,
 'embedded_node_array_sdne*node2vec': 0.5941083484800502,
 'embedded_node_array_hope*node2vec': 0.3387639746759312,
 'embedded_node_array_multiply': 0.28680652209203017,
 'embedded_node_array_sdneMINhope': 0.42191080158288735,
 'embedded_node_array_sdneMINnode2vec': 0.5989890065439138,
 'embedded_node_array_hopeMINnode2vec': 0.4904262991359053,
 'embedded_node_array_minimum': 0.5010312186476267,
 'embedded_node_array_sdneMAXhope': 0.4705603708661438,
 'embedded_node_array_sdneMAXnode2vec': 0.5875870548129303,
 'embedded_node_arra

In [30]:
kmeans_modularity_book_adamic_adar

{'node_array': 0.11905276093808642,
 'embedded_node_array_node2vec': 0.5248841789172207,
 'embedded_node_array_hope': 0.45254806382114443,
 'embedded_node_array_sdne': 0.4768306415536734,
 'embedded_node_array_sdne+hope': 0.481309228150822,
 'embedded_node_array_sdne+node2vec': 0.5017482427589326,
 'embedded_node_array_hope+node2vec': 0.4647780502979726,
 'embedded_node_array_summation': 0.47432139900555853,
 'embedded_node_array_sdne*hope': 0.33858577444583265,
 'embedded_node_array_sdne*node2vec': 0.5220741357767595,
 'embedded_node_array_hope*node2vec': 0.30670091165717994,
 'embedded_node_array_multiply': 0.2943243813020294,
 'embedded_node_array_sdneMINhope': 0.445411119852325,
 'embedded_node_array_sdneMINnode2vec': 0.5017482427589326,
 'embedded_node_array_hopeMINnode2vec': 0.4617520477578788,
 'embedded_node_array_minimum': 0.4750258379995991,
 'embedded_node_array_sdneMAXhope': 0.4736118181210504,
 'embedded_node_array_sdneMAXnode2vec': 0.5240074865925205,
 'embedded_node_arra

# Adasim

In [47]:
def compute_AdaSim(graph, decay_factor=0.9, iterations=2, alpha_val=1.0, link_type='none'):
    nodes = sorted(graph.nodes())
    adj = nx.adjacency_matrix(graph, nodelist=nodes, weight=None)

    if link_type == 'in-link':
        degrees = adj.sum(axis=0).T
        weights = csr_matrix(1 / np.log(degrees + np.e))
        weight_matrix = csr_matrix(adj.multiply(weights))

        adamic_scores = csr_matrix(weight_matrix.T * adj)
        adamic_scores.setdiag(0)
        adamic_scores = adamic_scores / np.max(adamic_scores)
        result_matrix = csr_matrix(decay_factor * alpha_val * adamic_scores)
        result_matrix.setdiag(1)

        weight_matrix = normalize(weight_matrix, norm='l1', axis=0)
        for itr in range(2, iterations + 1):
            result_matrix.setdiag(0)
            result_matrix = decay_factor * (alpha_val * adamic_scores + (1 - alpha_val) * (weight_matrix.T * result_matrix * weight_matrix))
            result_matrix.setdiag(1)

    else:
        degrees = adj.sum(axis=1).T
        weights = csr_matrix(1 / np.log(degrees + np.e))
        weight_matrix = csr_matrix(adj.multiply(weights))

        adamic_scores = weight_matrix * adj.T
        adamic_scores.setdiag(0)
        adamic_scores = adamic_scores / np.max(adamic_scores)
        result_matrix = decay_factor * alpha_val * adamic_scores
        result_matrix.setdiag(1)

        weight_matrix = normalize(weight_matrix, norm='l1', axis=1)
        for itr in range(2, iterations + 1):
            result_matrix.setdiag(0)
            result_matrix = decay_factor * (alpha_val * adamic_scores + (1 - alpha_val) * (weight_matrix * result_matrix * weight_matrix.T))
            result_matrix.setdiag(1)

    return result_matrix

def ensure_finite_weights(graph):
    for u, v, data in graph.edges(data=True):
        if 'weight' not in data or not np.isfinite(data['weight']):
            data['weight'] = 0.0001  # Replace non-finite weights with a small finite value
    return graph

def adasim_sim(original_graph):
    adasim_graph = original_graph.copy()
    adasim_result = compute_AdaSim(original_graph, decay_factor=0.9, iterations=2, alpha_val=1.0, link_type='none')
    for i, u in enumerate(sorted(original_graph.nodes())):
        for j, v in enumerate(sorted(original_graph.nodes())):
            if adasim_graph.has_edge(u, v):
                adasim_graph[u][v]['weight'] = max(adasim_result[i, j], 0.0000000000000001)
    adasim_graph = ensure_finite_weights(adasim_graph)
    return adasim_graph


In [48]:
graph_karate_adasim = adasim_sim(graph_karate)
graph_dolphin_adasim = adasim_sim(graph_dolphin)
graph_football_adasim = adasim_sim(graph_football)
graph_book_adasim = adasim_sim(graph_book)

  adj = nx.adjacency_matrix(graph, nodelist=nodes, weight=None)


In [49]:
# Creating data array for each dataset

karate_club_data_array_adasim = prepare_data_arrays(graph_karate_adasim)
dolphin_data_array_adasim = prepare_data_arrays(graph_dolphin_adasim)
football_data_array_adasim = prepare_data_arrays(graph_football_adasim)
book_data_array_adasim = prepare_data_arrays(graph_book_adasim)

Computing transition probabilities: 100%|██████████| 34/34 [00:00<00:00, 7483.54it/s]
Generating walks (CPU: 1): 100%|██████████| 100/100 [00:01<00:00, 89.39it/s]
Computing transition probabilities: 100%|██████████| 62/62 [00:00<00:00, 6201.63it/s]
Generating walks (CPU: 1): 100%|██████████| 100/100 [00:01<00:00, 50.56it/s]
Computing transition probabilities: 100%|██████████| 115/115 [00:00<00:00, 2091.00it/s]
Generating walks (CPU: 1): 100%|██████████| 100/100 [00:04<00:00, 24.85it/s]
Computing transition probabilities: 100%|██████████| 105/105 [00:00<00:00, 2333.56it/s]
Generating walks (CPU: 1): 100%|██████████| 100/100 [00:03<00:00, 25.88it/s]


In [50]:
# Performing K-means

kmeans_labels_dict_karate_adasim, kmeans_modularity_karate_adasim, kmeans_work_log_karate_adasim = cluster_all_data_arrays(karate_club_data_array_adasim, adjacency_karate)
kmeans_labels_dict_dolphin_adasim, kmeans_modularity_dolphin_adasim, kmeans_work_log_dolphin_adasim = cluster_all_data_arrays(dolphin_data_array_adasim, adjacency_dolphin)
kmeans_labels_dict_football_adasim, kmeans_modularity_football_adasim, kmeans_work_log_football_adasim = cluster_all_data_arrays(football_data_array_adasim, adjacency_football)
kmeans_labels_dict_book_adasim, kmeans_modularity_book_adasim, kmeans_work_log_book_adasim = cluster_all_data_arrays(book_data_array_adasim, adjacency_book)

In [51]:
kmeans_modularity_karate_adasim

{'node_array': -0.004109138724523315,
 'embedded_node_array_node2vec': 0.387491781722551,
 'embedded_node_array_hope': 0.37146614069690986,
 'embedded_node_array_sdne': 0.1535174227481917,
 'embedded_node_array_sdne+hope': 0.3312787639710717,
 'embedded_node_array_sdne+node2vec': 0.4151051939513479,
 'embedded_node_array_hope+node2vec': 0.37146614069690986,
 'embedded_node_array_summation': 0.3904503616042078,
 'embedded_node_array_sdne*hope': 0.33719592373438534,
 'embedded_node_array_sdne*node2vec': 0.37146614069690986,
 'embedded_node_array_hope*node2vec': 0.3303747534516765,
 'embedded_node_array_multiply': 0.2585470085470085,
 'embedded_node_array_sdneMINhope': 0.3312787639710717,
 'embedded_node_array_sdneMINnode2vec': 0.4151051939513479,
 'embedded_node_array_hopeMINnode2vec': 0.37146614069690986,
 'embedded_node_array_minimum': 0.37146614069690986,
 'embedded_node_array_sdneMAXhope': 0.3717948717948718,
 'embedded_node_array_sdneMAXnode2vec': 0.4155982905982906,
 'embedded_node

In [55]:
kmeans_modularity_dolphin_adasim

{'node_array': 0.25246232348403946,
 'embedded_node_array_node2vec': 0.5267987816937623,
 'embedded_node_array_hope': 0.5264625608164235,
 'embedded_node_array_sdne': 0.38087892092876074,
 'embedded_node_array_sdne+hope': 0.490724259325185,
 'embedded_node_array_sdne+node2vec': 0.5263241169257546,
 'embedded_node_array_hope+node2vec': 0.5264625608164235,
 'embedded_node_array_summation': 0.5267987816937623,
 'embedded_node_array_sdne*hope': 0.49339424864522763,
 'embedded_node_array_sdne*node2vec': 0.5267987816937623,
 'embedded_node_array_hope*node2vec': 0.4986155610933113,
 'embedded_node_array_multiply': 0.4961235710612714,
 'embedded_node_array_sdneMINhope': 0.4756734306396109,
 'embedded_node_array_sdneMINnode2vec': 0.5143388315335629,
 'embedded_node_array_hopeMINnode2vec': 0.47383410466358133,
 'embedded_node_array_minimum': 0.4756734306396109,
 'embedded_node_array_sdneMAXhope': 0.4259720738894822,
 'embedded_node_array_sdneMAXnode2vec': 0.4885882678691508,
 'embedded_node_arra

In [56]:
kmeans_modularity_football_adasim

{'node_array': 0.5818960584827381,
 'embedded_node_array_node2vec': 0.6001413102198425,
 'embedded_node_array_hope': 0.444613579087152,
 'embedded_node_array_sdne': 0.5991885972499061,
 'embedded_node_array_sdne+hope': 0.5131437132919425,
 'embedded_node_array_sdne+node2vec': 0.6028717110778165,
 'embedded_node_array_hope+node2vec': 0.4672844220784578,
 'embedded_node_array_summation': 0.5551522344844838,
 'embedded_node_array_sdne*hope': 0.4032597154102654,
 'embedded_node_array_sdne*node2vec': 0.6018231945690038,
 'embedded_node_array_hope*node2vec': 0.3350994893139138,
 'embedded_node_array_multiply': 0.31857071764834255,
 'embedded_node_array_sdneMINhope': 0.5788436512857632,
 'embedded_node_array_sdneMINnode2vec': 0.60440722890925,
 'embedded_node_array_hopeMINnode2vec': 0.4904090012747194,
 'embedded_node_array_minimum': 0.5879263590131172,
 'embedded_node_array_sdneMAXhope': 0.5125675614539783,
 'embedded_node_array_sdneMAXnode2vec': 0.5984727319177474,
 'embedded_node_array_hop

In [57]:
kmeans_modularity_book_adasim

{'node_array': 0.16014932049917466,
 'embedded_node_array_node2vec': 0.5248841789172207,
 'embedded_node_array_hope': 0.4777561818378145,
 'embedded_node_array_sdne': 0.4235195211871597,
 'embedded_node_array_sdne+hope': 0.48102128228464486,
 'embedded_node_array_sdne+node2vec': 0.5214725345920681,
 'embedded_node_array_hope+node2vec': 0.49861168957378865,
 'embedded_node_array_summation': 0.5054941099644694,
 'embedded_node_array_sdne*hope': 0.4320139242393859,
 'embedded_node_array_sdne*node2vec': 0.5154256714023477,
 'embedded_node_array_hope*node2vec': 0.4140070238223782,
 'embedded_node_array_multiply': 0.17988132516801103,
 'embedded_node_array_sdneMINhope': 0.4580833089093536,
 'embedded_node_array_sdneMINnode2vec': 0.5220741357767595,
 'embedded_node_array_hopeMINnode2vec': 0.5054504038954963,
 'embedded_node_array_minimum': 0.4561910932173323,
 'embedded_node_array_sdneMAXhope': 0.4012088584488973,
 'embedded_node_array_sdneMAXnode2vec': 0.5116360981278375,
 'embedded_node_arr