Importamos librerías

In [None]:
import json
import networkx as nx
import matplotlib.pyplot as plt

Creamos el grafo

In [None]:
graph = nx.Graph()

Definimos una función para capturar los datos de las respuestas de las API de GitHub

In [None]:
def populate_graph(
    sample_name_file: str, node_type: str, number_of_files, start_from=1
) -> None:
    """Función que recibe el nombre del archivo sin numerado ni extensión y el tipo de nodos principales que va a recorrer"""

    for i in range(start_from, number_of_files + 1):

        if node_type == "PullRequest":
            items = "pullRequests"
        elif node_type == "Issue":
            items = "issues"
        # print(f"processing response_pr{i}.json")

        with open(f"{sample_name_file}{i}.json", encoding='utf-8') as f:

            data = json.load(f)

            for item in data["data"]["repository"][items]["nodes"]:

                try:
                    # print(f"{i} - {item["number"]} - {item["title"]}")
                    # add author

                    graph.add_node(item["author"]["login"], type="user")

                    # add item

                    graph.add_node(
                        item["number"],
                        type=node_type,
                        title=item["title"],
                        state=item["state"],
                        author=item["author"]["login"],
                        url=item["url"],
                    )

                    # add edge author -> pr

                    graph.add_edge(item["author"]["login"], item["number"], type="author")

                    for itemReferenced in item["timelineItems"]["nodes"]:

                        autor = itemReferenced["source"]["author"]["login"]

                        title = itemReferenced["source"]["title"]

                        number = itemReferenced["source"]["number"]

                        url = itemReferenced["source"]["url"]

                        state = itemReferenced["source"]["state"]

                        type = itemReferenced["source"]["__typename"]

                        # add author

                        graph.add_node(autor, type="user")

                        # add pr

                        graph.add_node(
                            number,
                            type=type,
                            title=title,
                            state=state,
                            author=autor,
                            url=url,
                        )

                        # add edge author -> pr

                        graph.add_edge(autor, number, type="author")

                        # add edge item -> item

                        graph.add_edge(item["number"], number, type="referenced")

                except Exception:
                    pass

Analizamos tanto Pull-Requests como Issues y Usuarios con la función anterior

In [None]:
populate_graph("json/prs/response_pr", "PullRequest", 57)

In [None]:
populate_graph("json/issues/response_issue", "Issue", 90, start_from=0)

Mostramos el grafo

In [None]:
import matplotlib.pyplot as plt
import networkx as nx

# Define node colors based on node type
node_colors = {
    'user': 'blue',
    'PullRequest': 'green',
    'Issue': 'red'
}
degrees = dict(graph.degree())
nodes_sizes = [degrees[node] * 100 for node in graph.nodes]
# Create a new figure
plt.figure(figsize=(100, 100))

# Draw the graph with colored nodes
nx.draw(
    graph,
    with_labels=True,
    node_color=[node_colors.get(graph.nodes[node]['type'], 'gray') for node in graph.nodes],
    # nodes_sizes=[node_degrees[node] * 100 for node in graph.nodes],
    node_size=nodes_sizes
    #font_size=3,
    # font_color='white'
)

# Show the plot
plt.show()
#plt.figure(figsize=(100, 100))
#nx.draw(graph, with_labels=True)

Centralidad de grados


In [None]:
centralidad_de_grados = nx.degree_centrality(graph)
nodos_ordenados = sorted(centralidad_de_grados.items(), key= lambda elemento: elemento[1], reverse=True)

for nodo, centralidad in nodos_ordenados:
    if graph.nodes[nodo]["type"] == "user":
        tipo = graph.nodes[nodo]["type"]
        print(f"{nodo}: Tipo: {tipo} - centralidad: {centralidad}")





Centralidad de cercanía

In [None]:
centralidad_de_cercania = nx.closeness_centrality(graph)
nodos_ordenados = sorted(centralidad_de_cercania.items(), key= lambda elemento: elemento[1], reverse=True)



In [None]:
for nodo, centralidad in nodos_ordenados:
    if graph.nodes[nodo]["type"] == "user":
        tipo = graph.nodes[nodo]["type"]
        print(f"{nodo}: Tipo: {tipo} - centralidad: {centralidad}")

Centralidad de intermediación

In [None]:
centralidad_de_intermediacion = nx.betweenness_centrality(graph)
nodos_ordenados = sorted(centralidad_de_intermediacion.items(), key= lambda elemento: elemento[1], reverse=True)


In [None]:

for nodo, centralidad in nodos_ordenados:
    if graph.nodes[nodo]["type"] == "Issue":
        tipo = graph.nodes[nodo]["type"]
        print(f"{nodo}: Tipo: {tipo} - centralidad: {centralidad}")

Grado de los nodos

In [None]:
print("Numero de grados", graph.number_of_edges())

Detección de comunidades

In [None]:

import networkx as nx
import matplotlib.pyplot as plt
import community as community_louvain
# Detectar comunidades utilizando el algoritmo de Louvain
partition = community_louvain.best_partition(graph)

# Extraer las comunidades
communities = {}
for node, community_id in partition.items():
    if community_id not in communities:
        communities[community_id] = []
    communities[community_id].append(node)
    
# Asignar colores a las comunidades
community_colors = [partition[node] for node in graph.nodes()]

# Dibujar el grafo con los nodos coloreados según su comunidad
pos = nx.spring_layout(graph)
plt.figure(figsize=(100, 100))
nx.draw(graph, pos, node_color=community_colors, with_labels=True, cmap=plt.cm.rainbow)
plt.show()

Componentes conectados


In [None]:
nx.is_connected(graph)

In [None]:
import matplotlib.pyplot as plt
import networkx as nx

def dibujar_grafo_con_base_en_centralidad(centralidad_base, proporcion):    # Define node colors based on node type
    node_colors = {
        'user': 'blue',
        'PullRequest': 'green',
        'Issue': 'red'
    }

    # Define node sizes based on centralidad
    node_sizes = [centralidad_base[node] * proporcion for node in graph.nodes]

    # Create a new figure
    plt.figure(figsize=(50, 50))

    # Draw the graph with colored nodes and sized nodes
    nx.draw(
        graph,
        with_labels=True,
        node_color=[node_colors.get(graph.nodes[node]['type'], 'gray') for node in graph.nodes],
        node_size=node_sizes,
        font_size=8,
    )

    # Show the plot
    plt.show()

In [None]:
dibujar_grafo_con_base_en_centralidad(centralidad_de_grados, 1000000)


In [None]:
dibujar_grafo_con_base_en_centralidad(centralidad_de_cercania, 1000)

In [None]:
dibujar_grafo_con_base_en_centralidad(centralidad_de_intermediacion, 100000)

Componentes conectados

In [None]:
connected_components = list(nx.connected_components(graph))
nx.is_connected(graph)

connected_components = list(nx.connected_components(graph))

print("Componentes conectados:")
for i, component in enumerate(connected_components):
    print(f"Componente {i+1}: {component}")