In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

In [2]:
conexoes = pd.read_csv('database/conexoes_espec.csv', sep=';')
conexoes.head()

Unnamed: 0,V1,V2,grau,proximidade,prob_V1_V2
0,1,2,trabalho,visita_frequente,0.589462
1,1,3,trabalho,visita_rara,0.708465
2,2,4,trabalho,visita_casual,
3,2,5,trabalho,visita_rara,0.638842
4,3,6,amigos,mora_junto,


In [3]:
unknown_conexoes = conexoes[conexoes['prob_V1_V2'].isna()]
len(unknown_conexoes)

500000

In [4]:
filled_conexoes = conexoes[conexoes['prob_V1_V2'].notna()]
len(filled_conexoes)

499999

In [5]:
###   INSIGHTS

#    use the structure of graphs
#    directed graph (v1->v2 != v2->v1)
#    weighted edge (which property to use? prob_V1_V2, grau or proximidade? Try to use all?)


In [6]:
import networkx as nx

In [7]:
G = nx.from_pandas_edgelist(df=filled_conexoes, source="V1", target="V2", edge_attr=True)

In [8]:
type(G)

networkx.classes.graph.Graph

In [17]:
###  This function is original from a post from Ani Madurkar on the link https://towardsdatascience.com/graph-machine-learning-with-python-pt-1-basics-metrics-and-algorithms-cc40972de113?gi=81c0db45ab01
###  I've made some change in order to understand the graph for this problem

def getGraphMetrics(graph):
    
    graph_degree = dict(graph.degree)
    print("Graph Summary:")
    print(f"Number of nodes : {len(graph.nodes)}")
    print(f"Number of edges : {len(graph.edges)}")
    print(f"Maximum degree : {np.max(list(graph_degree.values()))}")
    print(f"Minimum degree : {np.min(list(graph_degree.values()))}")
    print(f"Average degree : {np.mean(list(graph_degree.values()))}")
    print(f"Median degree : {np.median(list(graph_degree.values()))}")
    print("")
    print("Graph Connectivity")
    try:
        print(f"Connected Components : {nx.number_connected_components(graph)}")
    except:
        print(f"Strongly Connected Components : {nx.number_strongly_connected_components(graph)}")
        print(f"Weakly Connected Components : {nx.number_weakly_connected_components(graph)}")
    print("")
    print("Graph Distance")
    try:
        print(f"Average Distance : {nx.average_shortest_path_length(graph)}")
        print(f"Diameter : {nx.algorithms.distance_measures.diameter(graph)}")
    except nx.NetworkXError as nxe:
        print(f"Exeption : {nxe}")
    print("")
    print("Graph Clustering")
    print(f"Transitivity : {nx.transitivity(graph)}")
    print(f"Average Clustering Coefficient : {nx.average_clustering(graph)}")
    
    
    return None

getGraphMetrics(G)

Graph Summary:
Number of nodes : 687855
Number of edges : 499999
Maximum degree : 3
Minimum degree : 1
Average degree : 1.453791860203095
Median degree : 1.0

Graph Connectivity
Connected Components : 187856

Graph Distance
Exeption : Graph is not connected.

Graph Clustering
Transitivity : 0
Average Clustering Coefficient : 0.0
