# Notebook para convertir los CSV exportados de Gephi a una matriz de adyacencia.

In [1]:
import pandas as pd
import numpy as np 
import os

In [6]:
def edge_to_madj_matrix(in_file, out_file):
    # Read in data
    gephi_edges = pd.read_csv(in_file)
    return edge_to_madj_matrix_df(gephi_edges, out_file)

def edge_to_madj_matrix_df(df_in, out_file):
    # Create Adj Matrix
    gephi_reverse_edges = df_in.rename(columns={"Source":"Target", "Target":"Source"})
    edges = pd.concat([df_in, gephi_reverse_edges])
    df = pd.crosstab(edges.Source, edges.Target)
    df.to_csv(out_file)

### Escenario 1

In [3]:
# Scenario 1
output_file = os.path.join('data','adj_mat_scenario_1_v2.csv')
input_file = os.path.join('data','gephi_edge_export_scenario1_v2.csv')

edge_to_madj_matrix(input_file, output_file)

### Escenario 2

In [4]:
node_file = os.path.join('data','gephi_node_export_scenario2.csv')
input_file = os.path.join('data','gephi_edge_export_scenario1_v2.csv')
nodes = pd.read_csv(node_file)
edges = pd.read_csv(input_file)

nodes.modularity_class.unique()

array([3, 5, 4, 0, 2, 1])

In [5]:
nodes_0 = nodes[nodes['modularity_class'] == 0].rename(columns={'Id':'Source'})
nodes_1 = nodes[nodes['modularity_class'] == 1].rename(columns={'Id':'Source'})
nodes_2 = nodes[nodes['modularity_class'] == 2].rename(columns={'Id':'Source'})
nodes_3 = nodes[nodes['modularity_class'] == 3].rename(columns={'Id':'Source'})
nodes_4 = nodes[nodes['modularity_class'] == 4].rename(columns={'Id':'Source'})
nodes_5 = nodes[nodes['modularity_class'] == 5].rename(columns={'Id':'Source'})

In [6]:
edges_0 = edges.merge(nodes_0, on="Source")
edges_1 = edges.merge(nodes_1, on="Source")
edges_2 = edges.merge(nodes_2, on="Source")
edges_3 = edges.merge(nodes_3, on="Source")
edges_4 = edges.merge(nodes_4, on="Source")
edges_5 = edges.merge(nodes_5, on="Source")

In [7]:
output_file = os.path.join('data','adj_mat_scenario_2_0.csv')
edge_to_madj_matrix_df(edges_0, output_file)

In [8]:
output_file = os.path.join('data','adj_mat_scenario_2_1.csv')
edge_to_madj_matrix_df(edges_1, output_file)

In [9]:
output_file = os.path.join('data','adj_mat_scenario_2_2.csv')
edge_to_madj_matrix_df(edges_2, output_file)

In [10]:
output_file = os.path.join('data','adj_mat_scenario_2_3.csv')
edge_to_madj_matrix_df(edges_3, output_file)

In [11]:
output_file = os.path.join('data','adj_mat_scenario_2_4.csv')
edge_to_madj_matrix_df(edges_4, output_file)

In [12]:
output_file = os.path.join('data','adj_mat_scenario_2_5.csv')
edge_to_madj_matrix_df(edges_5, output_file)

### Escenario 3

In [5]:
node_file = os.path.join('data','clustered_data.csv')
edge_file = os.path.join('data','gephi_edge_export_scenario3.csv')

nodes = pd.read_csv(node_file)
edges = pd.read_csv(edge_file)

clusters = nodes.cluster.unique()
clusters

array([0, 4, 1, 3, 2])

In [12]:
for cluster_id in clusters:
    nodes_in_cluster = nodes[nodes['cluster'] == cluster_id].rename(columns={'Id':'Target'})
    edges_in_cluster = edges.merge(nodes_in_cluster, on="Target")
    
    output_file = os.path.join('data','adj_mat_scenario_3_{}.csv'.format(cluster_id))
    edge_to_madj_matrix_df(edges_in_cluster, output_file)