In [2]:
import igraph as ig
import pandas as pd
import glob
import os
import seaborn as sns
import matplotlib.pyplot as plt

In [3]:
### Función para calcular las métricas de un grafo

def calcular_metricas(grafo, bip_num_nodes, simple_num_edges):
    num_nodes = abs(grafo.vcount() - bip_num_nodes)
    num_edges = abs(grafo.ecount() - simple_num_edges)
    densidad = grafo.density()
    modularidad = grafo.community_multilevel().modularity
    grado_promedio = sum(grafo.degree()) / grafo.vcount() if grafo.vcount() > 0 else 0
    
    # Si el grafo contiene una o más 
    if grafo.is_connected():
        distancia_promedio = grafo.average_path_length()
    else:
        # Encontrar el componente más grande
        componentes = grafo.decompose()
        if(len(componentes)) < 1:
            distancia_promedio = 0
        else:
            componente_mas_grande = max(componentes, key=lambda c: c.vcount())
            distancia_promedio = componente_mas_grande.average_path_length()
    
    num_componentes = len(grafo.components())
    coeficiente_clustering = grafo.transitivity_undirected()
    
    return {
        "n nodes": num_nodes,
        "n edges": num_edges,
        "Densidad": densidad,
        "Modularidad": modularidad,
        "Grado Promedio": grado_promedio,
        "Distancia Promedio": distancia_promedio,
        "Número de Componentes": num_componentes,
        "Coeficiente de Clustering": coeficiente_clustering
    }

def simple_projection(bigraph, typen=True):
    if typen:
        return bigraph.bipartite_projection(which=typen)
    else:
        return bigraph.bipartite_projection(which=typen)
    
# Función para asignar el identificador
def asignar_identificador(texto):
    if 'simple' in texto:
        if "disparity" in texto:
            return 1
        elif "noise" in texto:
            return 2
    elif 'weights' in texto:
        if "disparity" in texto:
            return 3
        elif "noise" in texto:
            return 4
    elif 'vector' in texto:
        if "disparity" in texto:
            return 5
        elif "noise" in texto:
            return 6
    elif 'master' in texto:
        if "disparity" in texto:
            return 7
        elif "noise" in texto:
            return 8
    elif 'hyperbolic' in texto:
        if "disparity" in texto:
            return 9
        elif "noise" in texto:
            return 10
    elif 'resall' in texto:
        if "disparity" in texto:
            return 11
        elif "noise" in texto:
            return 12
    else:
        return None  # O algún otro valor por defecto si no se encuentra ninguna de las subcadenas

In [37]:

##### **** hVariables selection **** #####
DATASET = "AMZ"
DATASET_PATH = "01-AMZ"
TOP_TYPE = True

if TOP_TYPE:
    DIRPATH = "../00-Data/"+DATASET_PATH+"/02-Graphs/01-Top/"
else:
    DIRPATH = "../00-Data/"+DATASET_PATH+"/02-Graphs/02-Bot/"

#bip_graph = ig.read("../00-Data/"+DATASET_PATH+"/02-Graphs/binet-"+DATASET+"-Rw.graphml")
bip_graph = ig.read("../00-Data/"+DATASET_PATH+"/02-Graphs/binet-"+DATASET+"-Rw.graphml")
bip_num_nodes = len(bip_graph.vs.select(type_eq=TOP_TYPE))
proj = simple_projection(bip_graph, TOP_TYPE)
edge_num_simple = proj.ecount()

# Ruta de la carpeta que contiene los archivos .graphml
archivos = glob.glob(os.path.join(DIRPATH, "*.graphml"))

# Lista para almacenar las métricas de cada grafo
metricas = []

# Procesar cada archivo .graphml
for archivo in archivos:
    g = ig.Graph.Read_GraphML(archivo)
    nombre_grafo = os.path.basename(archivo)
    metrica = calcular_metricas(g, bip_num_nodes, edge_num_simple)
    metrica["G"] = nombre_grafo
    metricas.append(metrica)

# Crear un DataFrame con las métricas
df_metricas = pd.DataFrame(metricas)

# Mostrar estadísticas descriptivas
print(df_metricas.describe())



           n nodes       n edges      Densidad  Modularidad  Grado Promedio  \
count    48.000000  4.800000e+01  4.800000e+01    48.000000       48.000000   
mean    156.166667  6.554083e+05  1.203456e-02     0.639378      111.859198   
std     394.921610  4.723956e+05  1.092886e-02     0.141036      101.609462   
min       1.000000  3.410000e+02  9.182319e-08     0.452492        0.000742   
25%       1.000000  2.185872e+05  7.292368e-05     0.547477        0.677607   
50%       1.000000  5.886465e+05  1.357661e-02     0.588094      126.208132   
75%       5.000000  1.172176e+06  2.214032e-02     0.706982      205.816446   
max    1214.000000  1.175322e+06  2.719086e-02     0.929884      252.766269   

       Distancia Promedio  Número de Componentes  Coeficiente de Clustering  
count           48.000000              48.000000                  47.000000  
mean             3.556436            3199.000000                   0.415701  
std              1.609853            3475.896052      

In [38]:
df = df_metricas.copy()

df["DS"] = DATASET
df['PB'] = df['G'].apply(asignar_identificador)
df = df.rename(columns={"n nodes":"|V|", "Distancia Promedio": "L",
                                    "Densidad":"d", "Modularidad":"Q",
                                    "Grado Promedio":"K", "n edges":"|E|",
                                    "Número de Componentes":"N", 
                                    "Coeficiente de Clustering":"CC"})
df

Unnamed: 0,|V|,|E|,d,Q,K,L,N,CC,G,DS,PB
0,1,149265,0.02374454,0.55045,220.729268,2.742281,307,0.618188,AMZ_top_simple_noise_alpha2.graphml,AMZ,2
1,1,321149,0.01976689,0.553464,183.753039,2.793962,307,0.552987,AMZ_top_master_noise_alpha2.graphml,AMZ,8
2,1214,1175320,1.530386e-07,0.56,0.001237,1.666667,8079,0.0,AMZ_top_weights_disparity_alpha2.graphml,AMZ,3
3,5,1172262,7.094338e-05,0.923872,0.659206,7.769531,7324,0.347328,AMZ_top_master_disparity_alpha1.graphml,AMZ,7
4,1,28149,0.02654735,0.505621,246.784124,2.730385,348,0.62854,AMZ_top_vector_disparity_alpha2.graphml,AMZ,5
5,1,152336,0.02367348,0.554334,220.068624,2.742739,307,0.617781,AMZ_top_simple_noise_alpha15.graphml,AMZ,2
6,1,161801,0.02345444,0.538516,218.032484,2.744576,307,0.61612,AMZ_top_simple_noise_alpha05.graphml,AMZ,2
7,1,521440,0.01513186,0.595909,140.665806,2.888914,308,0.527302,AMZ_top_master_noise_alpha1.graphml,AMZ,8
8,5,1172277,7.059596e-05,0.835848,0.655978,4.675767,7310,0.082416,AMZ_top_hyperbolic_disparity_alpha05.graphml,AMZ,9
9,5,1171929,7.865612e-05,0.844463,0.730873,4.675767,7177,0.106215,AMZ_top_hyperbolic_disparity_alpha15.graphml,AMZ,9


In [39]:
if TOP_TYPE:
    df.to_csv("DF-MET-"+DATASET+"1.csv")
else:
    df.to_csv("DF-MET-"+DATASET+"2.csv")