In [31]:
import networkx as nx
import pandas as pd
import matplotlib.pyplot as plt
from joblib import Parallel, delayed
import csv
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import StandardScaler

In [32]:
# Lectura de datos
nodes = pd.read_csv("../datos/musae_git_target.csv")
atributeDegreeCentrality = pd.read_csv("../metricas/degree_centrality.csv")
atributeClosenessCentrality = pd.read_csv("../metricas/closeness_centrality.csv")
atributeBetweennessCentrality = pd.read_csv("../metricas/betweenness_centrality.csv")
atributeClusteringCoefficient = pd.read_csv("../metricas/clustering_coefficient.csv")
atributeGeneralizedDegree = pd.read_csv("../metricas/generalized_degree.csv")
atributeTriangles = pd.read_csv("../metricas/triangles.csv")
atributeGreedyModularityCommunities = pd.read_csv("../metricas/greedy_modularity_communities.csv")
atributeCoreNumber = pd.read_csv("../metricas/core_number.csv")
atributeAsynLpaCommunities = pd.read_csv("../metricas/asyn_lpa_communities.csv")


In [33]:
# Renombrar columnas para evitar conflictos
atributeDegreeCentrality.rename(columns={'Degree Centrality': 'degree_centrality'}, inplace=True)
atributeClosenessCentrality.rename(columns={'Closeness Centrality': 'closeness_centrality'}, inplace=True)
atributeBetweennessCentrality.rename(columns={'Betweenness Centrality': 'betweenness_centrality'}, inplace=True)
atributeClusteringCoefficient.rename(columns={'Clustering Coefficient': 'clustering_coefficient'}, inplace=True)
atributeGeneralizedDegree.rename(columns={'Generalized Degree': 'generalized_degree'}, inplace=True)
atributeTriangles.rename(columns={'Triangles': 'triangles'}, inplace=True)
atributeGreedyModularityCommunities.rename(columns={'Community': 'greedy_modularity_communities'}, inplace=True)
atributeCoreNumber.rename(columns={'Core Number': 'core_number'}, inplace=True)
atributeAsynLpaCommunities.rename(columns={'Community': 'asyn_lpa_communities'}, inplace=True)


# Añadir columnas directamente al DataFrame nodes
nodes = pd.concat([nodes, atributeDegreeCentrality, atributeClosenessCentrality,
                   atributeBetweennessCentrality, atributeClusteringCoefficient,
                   atributeGeneralizedDegree, atributeTriangles,
                   atributeGreedyModularityCommunities, atributeCoreNumber,
                   atributeAsynLpaCommunities], axis=1)

# Eliminar las tres primeras columnas
nodes = nodes.loc[:, ~nodes.columns.duplicated()]


# Ordenar el DataFrame por la columna 'id'
nodes_sorted = nodes.sort_values(by='id')

nodes.head()


Unnamed: 0,id,name,ml_target,degree_centrality,closeness_centrality,betweenness_centrality,clustering_coefficient,generalized_degree,triangles,greedy_modularity_communities,Core number,asyn_lpa_communities
0,0.0,Eiryyy,0.0,2.7e-05,0.275005,0.0,0.0,Counter({0: 1}),0.0,0,1.0,3.0
1,1.0,shawflying,0.0,0.000849,0.379327,0.0001452081,0.09879,"Counter({1: 9, 3: 6, 0: 5, 2: 5, 5: 2, 4: 1, 8...",49.0,0,18.0,3.0
2,2.0,JpMCarrilho,1.0,0.000212,0.294956,1.149733e-06,0.178571,"Counter({0: 3, 1: 2, 3: 2, 2: 1})",5.0,0,6.0,3.0
3,3.0,SuhwanCha,0.0,5.3e-05,0.287208,3.49129e-07,0.0,Counter({0: 2}),0.0,0,2.0,3.0
4,4.0,sunilangadi2,1.0,0.001061,0.375335,2.197155e-05,0.176923,"Counter({1: 5, 4: 5, 3: 4, 10: 4, 8: 3, 5: 3, ...",138.0,0,26.0,3.0


In [34]:
# Obtener la fila correspondiente al ID específico (por ejemplo, ID = 0)
id_especifico = 6
fila_id_especifico = nodes_sorted.loc[nodes_sorted['id'] == id_especifico]

# Mostrar la fila
print(fila_id_especifico)

    id   name  ml_target  degree_centrality  closeness_centrality  \
6  6.0  sfate        0.0            0.01297              0.403407   

   betweenness_centrality  clustering_coefficient  \
6                0.001086                0.035335   

                                  generalized_degree  triangles  \
6  Counter({3: 37, 5: 36, 8: 33, 6: 32, 2: 30, 4:...     4216.0   

   greedy_modularity_communities  Core number  asyn_lpa_communities  
6                              0         34.0                   3.0  


In [37]:
#Normalizar columnas
columns_to_normalize = ['clustering_coefficient', 'triangles', 'asyn_lpa_communities','greedy_modularity_communities', 'Core number']  # Reemplaza con los nombres de las columnas que deseas normalizar

# Inicializa el escalador
scaler = MinMaxScaler()

# Aplica el escalador a las columnas seleccionadas
nodes[columns_to_normalize] = scaler.fit_transform(nodes[columns_to_normalize])
nodes.head()

Unnamed: 0,id,name,ml_target,degree_centrality,closeness_centrality,betweenness_centrality,clustering_coefficient,generalized_degree,triangles,greedy_modularity_communities,Core number,asyn_lpa_communities
0,0.0,Eiryyy,0.0,2.7e-05,0.275005,0.0,0.0,Counter({0: 1}),0.0,0.0,0.0,0.0
1,1.0,shawflying,0.0,0.000849,0.379327,0.0001452081,0.09879,"Counter({1: 9, 3: 6, 0: 5, 2: 5, 5: 2, 4: 1, 8...",0.00061,0.0,0.515152,0.0
2,2.0,JpMCarrilho,1.0,0.000212,0.294956,1.149733e-06,0.178571,"Counter({0: 3, 1: 2, 3: 2, 2: 1})",6.2e-05,0.0,0.151515,0.0
3,3.0,SuhwanCha,0.0,5.3e-05,0.287208,3.49129e-07,0.0,Counter({0: 2}),0.0,0.0,0.030303,0.0
4,4.0,sunilangadi2,1.0,0.001061,0.375335,2.197155e-05,0.176923,"Counter({1: 5, 4: 5, 3: 4, 10: 4, 8: 3, 5: 3, ...",0.001719,0.0,0.757576,0.0


In [42]:
# Tablas con todos los atributos sin las centralidades

# Renombrar columnas para evitar conflictos
atributeClusteringCoefficient.rename(columns={'Clustering Coefficient': 'clustering_coefficient'}, inplace=True)
atributeGeneralizedDegree.rename(columns={'Generalized Degree': 'generalized_degree'}, inplace=True)
atributeTriangles.rename(columns={'Triangles': 'triangles'}, inplace=True)
atributeGreedyModularityCommunities.rename(columns={'Community': 'greedy_modularity_communities'}, inplace=True)
atributeCoreNumber.rename(columns={'Core Number': 'core_number'}, inplace=True)
atributeAsynLpaCommunities.rename(columns={'Community': 'asyn_lpa_communities'}, inplace=True)


# Añadir columnas directamente al DataFrame nodes
nodes = pd.concat([nodes, atributeClusteringCoefficient,
                   atributeGeneralizedDegree, atributeTriangles,
                   atributeGreedyModularityCommunities, atributeCoreNumber,
                   atributeAsynLpaCommunities], axis=1)

# Eliminar las tres primeras columnas
nodes = nodes.loc[:, ~nodes.columns.duplicated()]


# Ordenar el DataFrame por la columna 'id'
nodes_sorted = nodes.sort_values(by='id')

nodes.head()


Unnamed: 0,id,name,ml_target,clustering_coefficient,generalized_degree,triangles,greedy_modularity_communities,Core number,asyn_lpa_communities
0,0.0,Eiryyy,0.0,0.0,Counter({0: 1}),0.0,0.0,0.0,0.0
1,1.0,shawflying,0.0,0.09879,"Counter({1: 9, 3: 6, 0: 5, 2: 5, 5: 2, 4: 1, 8...",0.00061,0.0,0.515152,0.0
2,2.0,JpMCarrilho,1.0,0.178571,"Counter({0: 3, 1: 2, 3: 2, 2: 1})",6.2e-05,0.0,0.151515,0.0
3,3.0,SuhwanCha,0.0,0.0,Counter({0: 2}),0.0,0.0,0.030303,0.0
4,4.0,sunilangadi2,1.0,0.176923,"Counter({1: 5, 4: 5, 3: 4, 10: 4, 8: 3, 5: 3, ...",0.001719,0.0,0.757576,0.0


In [None]:
# Obtener la fila correspondiente al ID específico (por ejemplo, ID = 0)
id_especifico = 6
fila_id_especifico = nodes_sorted.loc[nodes_sorted['id'] == id_especifico]

# Mostrar la fila
print(fila_id_especifico)

    id   name  ml_target  clustering_coefficient  \
6  6.0  sfate        0.0                0.035335   

                                  generalized_degree  triangles  Community  \
6  Counter({3: 37, 5: 36, 8: 33, 6: 32, 2: 30, 4:...   0.052512        0.0   

   Core number  
6          1.0  


In [None]:
#Tablas con todos los atributos sin los de agrupamiento



In [None]:
#Tablas con todos los atributos sin los de comunidades



In [None]:
#Tablas con todos los atributos sin los de nucleos

