# Links Analysis

### Configuration

In [86]:
from networkx import hits
import numpy as np
import networkx as nx
import pandas as pd
from sklearn.preprocessing import MinMaxScaler

Créer les repertoires

In [None]:
def creer_repertoires():
    reps = ['output', 'output/clusters', 'cluster_links', 'outputgml', 'outputLA']
    for rep in reps:
        os.makedirs(rep, exist_ok=True)
    print("Répertoires crées.")

# Call the function to create the directories
creer_repertoires()

### Fonctions

Matrice d'adjacence

In [131]:
def create_numpy_adjacency_matrix(gml_file):
    with open(gml_file, 'r', encoding='utf-8') as f:
        graph = nx.parse_gml(f)  # Utilisez parse_gml() au lieu de read_gml()
    adjacency_matrix = nx.to_numpy_array(graph)
    return adjacency_matrix, list(graph.nodes())

Voisins communs

In [132]:
def voisins_communs(matrix):
    voisins = np.zeros(matrix.shape)
    for i in range(len(matrix)):
        for j in range(len(matrix)):
            voisins[i, j] = np.sum(np.multiply(matrix[i, :], matrix[j, :]))
    return voisins

Matrice d'attachement préférentiel

In [133]:
def preferential_attachement(matrix):
    degrees = np.sum(matrix, axis=1)
    pref_attach = np.zeros(matrix.shape) 
    # Parcourir les paires de nœuds
    for i in range(len(matrix)):
        for j in range(len(matrix)):
            # L'attachement préférentiel entre deux nœuds est le produit de leurs degrés
            pref_attach[i, j] = degrees[i] * degrees[j]
    return pref_attach 

Matrice de similarité par cosinus

In [134]:
def cosine_similarity(matrix):
    cosine_sim = np.zeros(matrix.shape)
    # parcourir les paires
    for i in range(len(matrix)):
        for j in range(len(matrix)):
            # produits scalaire des 2 vecteurs i, j
            dot_product = np.dot(matrix[i, :], matrix[j, :])
            # normalisation des vecteurs
            norm_i = np.linalg.norm(matrix[i, :])
            norm_j = np.linalg.norm(matrix[j, :])         
            # similarité cosinus
            if norm_i > 0 and norm_j > 0:
                cosine_sim[i, j] = dot_product / (norm_i * norm_j)
            else:
                cosine_sim[i, j] = 0  # gestion des vecteurs nuls
    return cosine_sim

Matrice de similarité par Jaccard

In [135]:
def jaccard_similarity(matrix):
    n = matrix.shape[0]  # définir le nbre de noeud
    degrees = np.sum(matrix, axis=1)  # définir leur degré
    sim_common = np.dot(matrix, matrix.T)  # nbre de voisins communs par noeud
    sim_jac = np.zeros((n, n))  
    for i in range(n):
        for k in range(n):
            # Jaccard application
            denominator = degrees[i] + degrees[k] - sim_common[i, k]
            if denominator > 0:
                sim_jac[i, k] = sim_common[i, k] / denominator
            else:
                sim_jac[i, k] = 0   
    return sim_jac

In [136]:
def top_20_jaccard_relations(matrix, nodes):
    relations = []
    for i in range(len(matrix)):
        for j in range(len(matrix)):
            if i != j and matrix[i, j] < 0.65:
                relations.append((nodes[i], nodes[j], matrix[i, j]))
    # Trier ordre decroissant
    relations = sorted(relations, key=lambda x: x[2], reverse=True)
    top_20 = relations[:20]
    
    return top_20

Matrice de Katz

In [137]:
def Katz(graph, alpha):
    A = graph
    n = A.shape[0] # nombre de noeuds
    I = np.identity(n) # matrice identité
    Katz = np.linalg.inv(I - alpha*A) - I # matrice de Katz, on calcule d'abord l'inverse de I - alpha*A puis on soustrait I
    print(f"Ceci est la matrice de Katz :\n {Katz}\n")
    return Katz

Afficher les noeuds avec un score Katz élevé

In [138]:
def indirect_influencers(adjacency_matrix, katz_matrix):
    degree = np.sum(adjacency_matrix, axis=1)
    katz_scores = np.sum(katz_matrix, axis=1)
    
    # normalisation des vecteurs pour les comparer à echelle égale
    scaler = MinMaxScaler()
    scaled_deg = scaler.fit_transform(degree.reshape(-1, 1))
    scaled_katz = scaler.fit_transform(katz_scores.reshape(-1, 1))
    # ecart d'influecne entre les vecteurs
    influencers_gap = scaled_katz - scaled_deg
    # identification des top vecteurs
    indirect_influencers = np.where((scaled_deg < 0.3) & (scaled_katz > 0.4))[0]
    
    return indirect_influencers, influencers_gap

Probabilité de transition matrice

In [139]:
def transition_proba(graph):
    A = graph
    n = A.shape[0] # nombre de noeuds
    degrees = [sum(A[i]) for i in range(n)] # degré de chaque noeud
    transition_graph = np.zeros((n,n))
    """
    au début je n'avais pas mis (transition_graph) mais c'est nécessaire sinon on modifie la matrice A pendant
    le calcul et on obtient un matrice null -- OK
    """

    #Calculer la matrice de transition
    for i in range(len(A)):
        for k in range(len(A)):
            transition_graph[i,k] = A[i,k]/degrees[i]
    return transition_graph

Matrice des probas de transition avec ID en etiquettes

In [145]:
def trans_proba_with_id(graph, adjacency_matrix):
    nodes=list(graph.nodes)
    degrees = np.sum(adjacency_matrix, axis=1) #degrés sortants

    transition_matrix = np.zeros(adjacency_matrix.shape)
    for i in range(adjacency_matrix.shape[0]):
        if degrees[i] > 0: #pas diviser par 0
            transition_matrix[i, :] = adjacency_matrix[i, :] / degrees[i]

    edges = []
    for i in range(len(nodes)):
        for j in range(len(nodes)):
            if transition_matrix[i, j] > 0: 
                edges.append([i, j, transition_matrix[i, j]]) #utiliser les ind comme IDS

    #dataframe pour export facile
    df_edges = pd.DataFrame(edges, columns=['Source', 'Target', 'Weight'])
    df_edges.to_csv('outputLA/trans_proba_with_IDs.csv', index=False, encoding='utf-8')

    print("Exporté : trans_proba_with_IDs.csv")

Matrice FPT et CT

In [141]:
def matrice_FPT_et_CT(transition_matrix):
	n = transition_matrix.shape[0]
	FPT = np.zeros((n, n))
	CT = np.zeros((n, n))
	
	for i in range(n):
		for j in range(n):
			if i != j:
				FPT[i, j] = 1 / transition_matrix[i, j] if transition_matrix[i, j] != 0 else np.inf
				CT[i, j] = FPT[i, j] + FPT[j, i]
	
	return FPT, CT

In [142]:
def fpt_ct_between_nodes(FPT, CT, node1, node2):

    fpt_value = FPT[node1, node2]
    ct_value = CT[node1, node2]
    
    return fpt_value, ct_value

Score Hub

In [143]:
def scores_Hub_Authority(graph):
    # Calculer les scores HITS
    hub_scores, authority_scores = nx.hits(graph)
    
    # Afficher les résultats
    print(f"Ceci est le score de Hub :\n {hub_scores}\n")
    return hub_scores, authority_scores

### Applications

In [130]:
gml_file = 'outputgml/graph.gml'

#créer la matrice d'adjacence
adjacency_matrix, nodes = create_numpy_adjacency_matrix(gml_file)
np.savetxt('outputLA/adjacency_matrix.txt', adjacency_matrix)

In [None]:
common_neighbors = voisins_communs(adjacency_matrix)
print(common_neighbors)
np.savetxt('outputLA/common_neighbors_matrix.txt', common_neighbors)

In [None]:
cosine_sim = cosine_similarity(adjacency_matrix)
print(cosine_sim)
np.savetxt('outputLA/cosine_similarity_matrix.txt', cosine_sim)

In [None]:
jaccard_sim = jaccard_similarity(adjacency_matrix)
print(jaccard_sim)
np.savetxt('outputLA/jaccard_similarity_matrix.txt', jaccard_sim)

In [None]:
# Charger la matrice de similarité Jaccard depuis le fichier
jaccard_sim_matrix = np.loadtxt("outputLA/jaccard_similarity_matrix.txt")

#affiche les 20 plus grands résultats inférieur à un seuil prédifinis (sinon on a des doublons)
top_20_jaccard = top_20_jaccard_relations(jaccard_sim_matrix, nodes)
for relation in top_20_jaccard:
    print(relation)

In [None]:
alpha = 0.1
katz_matrix = Katz(adjacency_matrix, alpha)
np.savetxt('outputLA/katz_matrix.txt', katz_matrix)

In [None]:
adjacency_matrix = np.loadtxt('outputLA/adjacency_matrix.txt')
katz_matrix = np.loadtxt('outputLA/katz_matrix.txt')
degree = np.sum(adjacency_matrix, axis=1) 
katz_scores = np.sum(katz_matrix, axis=1) 
indirect_influencers, influence_gap = indirect_influencers(adjacency_matrix, katz_matrix)

print("Nœuds avec une forte influence indirecte :", indirect_influencers)


In [None]:
pref_attach = preferential_attachement(adjacency_matrix)
print(pref_attach)
np.savetxt('outputLA/preferential_attachment_matrix.txt', pref_attach)

In [None]:
transition_graph = transition_proba(adjacency_matrix)
print(transition_graph)
np.savetxt('outputLA/transition_probability_matrix.txt', transition_graph)

In [None]:
trans_proba_with_id(graph, adjacency_matrix)

In [None]:
FPT, CT = matrice_FPT_et_CT(transition_graph)
np.savetxt('outputLA/FPT_matrix.txt', FPT)
np.savetxt('outputLA/CT_matrix.txt', CT)

In [None]:
node1 = 1056 #insérez les valeurs
node2 = 1109

fpt_value, ct_value = fpt_ct_between_nodes(FPT, CT, node1, node2)
print(f"FPT entre les nœuds {node1} et {node2} : {fpt_value}")
print(f"CT entre les nœuds {node1} et {node2} : {ct_value}")

In [129]:
with open('outputgml/graph.gml', 'r', encoding='utf-8') as f:
    graph = nx.parse_gml(f)
hub_scores, authority_scores = nx.hits(graph)
np.savetxt('outputLA/hub_scores.txt', list(hub_scores.values()))
np.savetxt('outputLA/authority_scores.txt', list(authority_scores.values()))