In [1]:
import os
import igraph as ig
import numpy as np
import pandas as pd
import seaborn as sns
import statistics as st
import matplotlib.pyplot as plt
from collections import defaultdict

In [2]:
# Función para determinar si un grafo domina a otro
def dominates(graph_a, graph_b, metrics):
    dominates_flag = False
    for metric in metrics.keys():
        if metrics[metric]["optimize"] == "minimize":
            if graph_a[metric] > graph_b[metric]:
                return False
            if graph_a[metric] <=  graph_b[metric]:
                dominates_flag = True
        elif metrics[metric]["optimize"] == "maximize":
            if graph_a[metric] < graph_b[metric]:
                return False
            if graph_a[metric] > graph_b[metric]:
                dominates_flag = True
    return dominates_flag

# Función para calcular los frentes de Pareto
def calculate_pareto_fronts(graph_metrics):
    pareto_fronts = []
    remaining_graphs = list(graph_metrics.items())
    
    while remaining_graphs:
        current_front = []
        for i, (graph_i, metrics_i) in enumerate(remaining_graphs):
            dominated = False
            for j, (graph_j, metrics_j) in enumerate(remaining_graphs):
                if i != j and dominates(metrics_j, metrics_i, metrics_definitions):
                    dominated = True
                    break
            if not dominated:
                current_front.append((graph_i, metrics_i))
        pareto_fronts.append(current_front)
        remaining_graphs = [graph for graph in remaining_graphs if graph not in current_front]
    
    return pareto_fronts

# Función para preparar los datos de las métricas y frentes de Pareto
def prepare_parallel_coordinates_data(pareto_fronts, graph_names):
    data = []
    id_to_name = {i: graph_names[i] for i in range(len(graph_names))}

    for level, front in enumerate(pareto_fronts):
        for graph_id, metrics in front:
            row = {
                "ID": graph_id,
                "Pareto Level": level + 1,
                **metrics
            }
            data.append(row)

    return pd.DataFrame(data), id_to_name

def visualize_parallel_coordinates(df, id_to_name):
    # Configurar el estilo de Seaborn
    sns.set_theme(style="whitegrid")

    # Crear figura
    plt.figure(figsize=(15, 8))

    # Lista de colores para cada frente de Pareto
    colors = sns.color_palette("tab10", len(df["Pareto Level"].unique()))

    # Trazar líneas por cada grafo
    for idx, row in df.iterrows():
        metrics = row.drop(["ID", "Pareto Level"])
        pareto_level = int(row["Pareto Level"])  # Convertir nivel de Pareto a entero
        plt.plot(metrics.index, metrics.values, label=f"ID {row['ID']}", color=colors[pareto_level - 1], linewidth=1.5)

        # Agregar el identificador al final de cada línea
        plt.text(
            len(metrics) - 1,  # Posición X al final de las métricas
            metrics.values[-1],  # Último valor Y
            f"{row['ID']}",  # Texto del identificador
            fontsize=9,
            color=colors[pareto_level - 1],
            va="center"
        )

    # Ajustar etiquetas y leyendas
    plt.title("Parallel Coordinates Plot: Metrics Across Pareto Levels", fontsize=14)
    plt.xlabel("Metrics", fontsize=12)
    plt.ylabel("Metric Value", fontsize=12)
    plt.xticks(rotation=45)
    plt.grid(True)

    # Leyenda de niveles de Pareto
    for level in df["Pareto Level"].unique():
        plt.plot([], [], color=colors[int(level) - 1], label=f"Pareto Level {int(level)}")
    plt.legend(title="Pareto Level", loc="upper left", bbox_to_anchor=(1, 1))

    # Mostrar gráfica
    plt.tight_layout()
    plt.show()

    # Mostrar tabla de identificadores
    print("\nGraph Identifiers:")
    for graph_id, graph_name in id_to_name.items():
        print(f"ID {graph_id}: {graph_name}")

def visualize_2d_coordinates(df, metric_x, metric_y, id_to_name):
    """
    Visualiza en 2D las métricas seleccionadas para cada nivel de Pareto.

    :param df: DataFrame que contiene las métricas, niveles de Pareto e identificadores de los grafos.
    :param metric_x: Métrica para el eje X.
    :param metric_y: Métrica para el eje Y.
    :param id_to_name: Diccionario que mapea los IDs de grafos a sus nombres.
    """
    # Configurar el estilo de Seaborn
    sns.set_theme(style="whitegrid")

    # Crear figura
    plt.figure(figsize=(12, 8))

    # Lista de colores para cada frente de Pareto
    colors = sns.color_palette("tab10", len(df["Pareto Level"].unique()))

    # Graficar cada punto y conectar con líneas
    for idx, row in df.iterrows():
        pareto_level = int(row["Pareto Level"])  # Convertir nivel de Pareto a entero
        graph_id = row["ID"]
        
        x_val = row[metric_x]
        y_val = row[metric_y]

        # Trazar puntos en 2D y conectar con líneas si necesario
        plt.scatter(x_val, y_val, color=colors[pareto_level - 1], s=80, label=f"ID {graph_id}" if idx == 0 else "", alpha=0.8)

        # Agregar el identificador al final de cada punto
        plt.text(
            x_val, 
            y_val, 
            f"{graph_id}", 
            fontsize=9, 
            color=colors[pareto_level - 1], 
            va="center", 
            ha="center", 
            alpha=0.8
        )

    # Ajustar etiquetas y leyendas
    plt.title(f"2D Coordinate Visualization: {metric_x} vs {metric_y}", fontsize=14)
    plt.xlabel(metric_x, fontsize=12)
    plt.ylabel(metric_y, fontsize=12)
    plt.grid(True)

    # Leyenda de niveles de Pareto
    handles, labels = plt.gca().get_legend_handles_labels()
    by_label = dict(zip(labels, handles))
    plt.legend(by_label.values(), by_label.keys(), title="Graph ID", loc="upper left", bbox_to_anchor=(1, 1))

    # Mostrar gráfica
    plt.tight_layout()
    plt.show()

    # Mostrar tabla de identificadores
    print("\nGraph Identifiers:")
    for graph_id, graph_name in id_to_name.items():
        print(f"ID {graph_id}: {graph_name}")



In [16]:
# Función para cargar grafos desde un directorio
def load_graphs_from_directory(directory):
    graphs = []
    graph_names = []
    for filename in os.listdir(directory):
        if filename.endswith(".graphml"):
            graph = ig.Graph.Read_GraphML(os.path.join(directory, filename))
            graphs.append(graph)
            graph_names.append(filename)
    return graphs, graph_names

def simple_projection(bigraph, typen=True):
    if typen:
        return bigraph.bipartite_projection(which=typen)
    else:
        return bigraph.bipartite_projection(which=typen)

# Función para calcular las métricas para cada grafo
def calculate_metrics(graph, bigraph, bipmetrics):
    numnodes = abs(graph.vcount() - bipmetrics["UN"])
    numedges = abs(graph.ecount() - bipmetrics["EN"])

    # FINAL Density (d)
    density = abs(graph.density() - bipmetrics["d"])

    # Number of components
    components = len(graph.components())

    # Minimize modularity
    modularidad = 1-graph.community_multilevel().modularity

    # Coefficient clustering difference
    coeficiente_clustering = 1-graph.transitivity_undirected()
    
    # Avg degree
    avg_degree = st.mean(bigraph.degree()) if graph.vcount() > 0 else 0
   
    # Calcular avg_path_length sobre el componente más grande si el grafo no está conectado
    if graph.is_connected():
        distancia_promedio = graph.average_path_length()
    else:
        # Encontrar el componente más grande
        componentes = graph.decompose()
        if(len(componentes)) < 1:
            distancia_promedio = 0
        else:
            componente_mas_grande = max(componentes, key=lambda c: c.vcount())
            distancia_promedio = componente_mas_grande.average_path_length()

    metrics = {
        "V": numnodes,
        "E": numedges,
        "d": density,
        #"N": components,
        "Q": modularidad,
        "CC": coeficiente_clustering,
        "K": avg_degree,
        "L": distancia_promedio
    }
    return metrics


In [4]:
# Paso 1: Leer todos los archivos .csv de un directorio
directorio = "metricas/"  # Cambia esto por la ruta de tu directorio
archivos_csv = [archivo for archivo in os.listdir(directorio) if archivo.endswith('.csv')]

# Lista para almacenar los DataFrames
dataframes = []

# Leer cada archivo y almacenarlo en la lista
for archivo in archivos_csv:
    ruta_completa = os.path.join(directorio, archivo)
    df = pd.read_csv(ruta_completa)
    dataframes.append(df)

# Paso 2: Concatenar todos los DataFrames en uno solo
df_final = pd.concat(dataframes, ignore_index=True)
df_final = df_final.drop(columns=["Unnamed: 0"])
df_final.fillna(0, inplace=True)
df_final

Unnamed: 0,|V|,|E|,d,Q,K,L,N,CC,G,DS,PB
0,3,732,0.011794,0.819059,1.297297,2.076923,71,0.754286,PM_top_master_disparity_alpha05.graphml,PM,7
1,3,659,0.023751,0.751581,2.612613,1.475000,57,0.724696,PM_top_master_disparity_alpha15.graphml,PM,7
2,114,804,0.000000,0.000000,0.000000,0.000000,0,0.000000,PM_top_vector_disparity_alpha05.graphml,PM,5
3,114,804,0.000000,0.000000,0.000000,0.000000,0,0.000000,PM_top_vector_disparity_alpha15.graphml,PM,5
4,1,0,0.127054,0.443406,14.230088,1.367663,22,0.816810,PM_top_vector_noise_alpha05.graphml,PM,6
...,...,...,...,...,...,...,...,...,...,...,...
367,2,527489,0.007202,0.701306,20.129471,3.786907,770,0.583062,HC_top_hyperbolic_disparity_alpha15.graphml,HC,9
368,0,80527,0.121416,0.488800,339.601858,2.028176,1,0.726492,HC_top_vector_noise_alpha1.graphml,HC,6
369,1344,555626,0.000004,0.750000,0.005502,1.000000,1450,0.000000,HC_top_simple_disparity_alpha05.graphml,HC,1
370,38,549992,0.001481,0.834404,4.085507,4.899659,1976,0.653652,HC_top_simple_disparity_alpha15.graphml,HC,1


In [17]:
DATASET = "AMZ"
DATASET_PATH = "01-AMZ"
TOPNODE = False

# Definición de las métricas y sus objetivos
metrics_definitions = {
    "V": {"optimize": "minimize"},
    "E": {"optimize": "maximize"},
    "d": {"optimize": "minimize"},
    #"N": {"optimize": "minimize"},
    "Q": {"optimize": "minimize"},
    "CC": {"optimize": "minimize"},
    "K": {"optimize": "minimize"},
    "L": {"optimize": "minimize"}
}

if TOPNODE:
    DIRPATH = "../00-Data/"+DATASET_PATH+"/02-Graphs/01-Top/"
else:
    DIRPATH = "../00-Data/"+DATASET_PATH+"/02-Graphs/02-Bot/"

directory = "../00-Data/"+DATASET_PATH+"/02-Graphs/02-Bot/"  # Cambia esta ruta
graphs, graph_names = load_graphs_from_directory(directory)

#bigraph = ig.read("user-movie-lens.graphml")
#bigraph = ig.read("../data/AMZ/PosBinet-AMZ.graphml")
bigraph = ig.read("../00-Data/"+DATASET_PATH+"/02-Graphs/binet-"+DATASET+"-Rw.graphml")

# Compute bipartite metrics
bip_num_nodes = len(bigraph.vs.select(type_eq=TOPNODE))
proj = simple_projection(bigraph, TOPNODE)
# Bipartite density
user_nodes = bigraph.vs.select(type=0)
res_nodes = bigraph.vs.select(type=1)
bidensity = bigraph.ecount() / (len(user_nodes) * len(res_nodes))
avg_path_bip = bigraph.average_path_length()

bipartite_metrics = {"UN": bip_num_nodes, "EN": proj.ecount(), "d":bidensity,
                     "CC": 0, "K": 0, "L": avg_path_bip}

graph_metrics = {}
for i, graph in enumerate(graphs):
    metrics = calculate_metrics(graph, bigraph, bipartite_metrics)
    graph_metrics[i] = metrics

pareto_fronts = calculate_pareto_fronts(graph_metrics)
df, id_to_name = prepare_parallel_coordinates_data(pareto_fronts, graph_names)
print(df)
print(id_to_name)

    ID  Pareto Level   V      E         d         Q        CC         K  \
0    1             1   0   3796  0.001879  0.382523  0.888625  3.736626   
1    4             1   0   3573  0.001888  0.380872  0.890181  3.736626   
2    6             1   0      2  0.002025  0.408283  0.895441  3.736626   
3    7             1   0     25  0.002024  0.413406  0.895503  3.736626   
4    8             1  74  64725  0.000455  0.039744  0.981481  3.736626   
5    9             1   1  62428  0.000367  0.193344  0.945372  3.736626   
6   13             1  74  64785  0.000457  0.033846  0.900000  3.736626   
7   14             1  74  64812  0.000458  0.036011  0.571429  3.736626   
8   16             1  74  64809  0.000458  0.043427  0.727273  3.736626   
9   17             1   1  63971  0.000426  0.092895  0.950194  3.736626   
10  18             1   1  63465  0.000406  0.183666  0.969170  3.736626   
11  19             1   0     21  0.002024  0.411507  0.895491  3.736626   
12  21             1   1 