In [20]:
import json
import networkx as nx
import pickle

In [21]:
def build_institution_graph(
    input_json="niu_authors_cleaned.json",
    output_gpickle="niu_graph.pkl"
):

    # Load the collected authors data
    with open(input_json, "r", encoding="utf-8") as f:
        authors_data = json.load(f)

    # Create a dictionary for quick author ID lookup,
    # in case you want to confirm that a co_author ID actually exists in the dataset.
    author_dict = {}
    for author in authors_data:
        author_dict[author["author_id"]] = author

    # Initialize a NetworkX graph
    G = nx.Graph()

    for author in authors_data:
        author_id = author["author_id"]
        # Add each author as a node with attributes
        # Node ID will be the author's OpenAlex ID
        node_id = author_id
        # Add node to the graph with attributes
        G.add_node(
            node_id,
            node_type="author",
            works_count=author["works_count"],
            cited_by_count=author["cited_by_count"],
            abstracts=author["abstracts"],
        )

        # Add edges for co-author relationships
        co_authors = author["co_authors"]
        for co_id in co_authors:
            if not G.has_edge(author_id, co_id):
                G.add_edge(author_id, co_id, relation="co_author")
                
        # Add topic nodes
        topics = author.get("topics", [])
        for topic in topics:
            # Add a node for the topic if it doesn't already exist
            if not G.has_node(topic):
                G.add_node(
                    topic,
                    node_type="topic"
                )
            # Add an edge from the author to the topic
            if not G.has_edge(author_id, topic):
                G.add_edge(author_id, topic, relation="has_topic")

    # Save the constructed graph to a pickle file
    with open(output_gpickle, 'wb') as f:
        pickle.dump(G, f, pickle.HIGHEST_PROTOCOL)
        
    print(f"Graph built and saved to {output_gpickle}.\n"
          f"Number of nodes: {G.number_of_nodes()}, edges: {G.number_of_edges()}")

In [22]:
build_institution_graph(
        input_json="niu_authors_cleaned.json",
        output_gpickle="niu_graph.pkl"
    )

Graph built and saved to niu_graph.pkl.
Number of nodes: 10927, edges: 60976


In [23]:
with open('niu_graph.pkl', 'rb') as f:
    G = pickle.load(f)
G

<networkx.classes.graph.Graph at 0x177ea7b1990>

In [24]:
G.nodes

