In [1]:
import json
import re

In [2]:
# Load the data from the JSON file
with open('./data/nobel-relationship.json') as f:
    relationships = json.load(f)

In [3]:
import networkx as nx

# Create a directed graph
academic_tree = nx.DiGraph()

In [4]:
relationships[:5]

[{'teacher': 'Gustav Zeuner', 'pupil': 'Wilhelm Rontgen (P1901)'},
 {'teacher': 'August Kundt', 'pupil': 'Wilhelm Rontgen (P1901)'},
 {'teacher': 'Victor Regnault', 'pupil': 'Gustav Zeuner'},
 {'teacher': 'Julius Weisbach', 'pupil': 'Gustav Zeuner'},
 {'teacher': 'Pierre Berthier', 'pupil': 'Victor Regnault'}]

In [5]:
# Match names with a Nobel prize annotation
# lLines that end with parenthesis, followed by either P, C, M or E and then a four digit year.
nobel_pattern = re.compile(r'^.*\([PCME]\d{4}\)')

In [6]:
if nobel_pattern.match('Hans von Euler-Chelpin (C1929)'):
    print('"Hans von Euler-Chelpin (C1929)" matched')
else:
    print('"Hans von Euler-Chelpin (C1929)" was not matched')

"Hans von Euler-Chelpin (C1929)" matched


In [7]:
if nobel_pattern.match('Gustav Zeuner'):
    print('"Gustav Zeuner" matched')
else:
    print('"Gustav Zeuner" was not matched')

"Gustav Zeuner" was not matched


In [8]:
# Add edges to the graph
for relationship in relationships:
    teacher = relationship['teacher']
    pupil = relationship['pupil']
    academic_tree.add_edge(teacher, pupil)

In [9]:
# Add Nobel Prize attribute
for node in academic_tree.nodes:
    # If the name matches the Nobel pattern, add the Nobel attribute
    if nobel_pattern.match(node):
        academic_tree.nodes[node]['nobel'] = True
    else:
        academic_tree.nodes[node]['nobel'] = False

In [10]:
# Wilhelm Rontgen is incorrectly listed not as a laureate
academic_tree.nodes['Wilhelm Rontgen (P1901)']

{'nobel': True}

---

In [18]:
# Identify 'parent' nodes (nodes with no incoming edges)
parent_nodes = [node for node in academic_tree.nodes if academic_tree.in_degree(node) == 0]

In [21]:
# Number of parent nodes
len(parent_nodes)

309

In [22]:
parent_nodes[:5]

['Johann Schreiber',
 'Guillaume-Francoise Rouelle',
 'Jean-Etienne Guettard',
 'Johann Gmelin',
 'Olof Rudbeck']

In [23]:
# Check if the graph is strongly connected
nx.is_strongly_connected(academic_tree)

False

In [28]:
# Find all connected components (weakly connected) in the graph
connected_components = list(nx.weakly_connected_components(academic_tree))

In [29]:
# Number of connected components
len(connected_components)

17

In [30]:
# Analyze the size of each connected component
[len(component) for component in connected_components]

[3476, 2, 3, 2, 2, 4, 2, 2, 2, 2, 2, 2, 3, 2, 2, 2, 6]

---

In [11]:
# Function to get descendants of a node
def get_descendants(graph, root):
    descendants = set()
    queue = [root]

    while queue:
        current_node = queue.pop(0)
        descendants.add(current_node)
        for successor in graph.successors(current_node):
            if successor not in descendants:
                queue.append(successor)
                
    return descendants

In [12]:
def extract_subgraph_to_d3(academic_tree, parent_node, file_path):
    """
    Extracts a subgraph from an academic tree starting from a specified parent node,
    formats it for D3.js, and saves the data to a JSON file.

    Parameters:
    - academic_tree: The full academic tree graph.
    - parent_node: The node from which to start the subgraph extraction.
    - file_path: The path to the file where the subgraph data will be saved.

    Returns:
    None
    """
    # Get descendants of the selected parent node
    descendants = get_descendants(academic_tree, parent_node)

    # Filter the graph to include only the selected parent node and its descendants
    subgraph = academic_tree.subgraph(descendants)

    # Extract nodes and links for the subgraph
    subgraph_nodes = [{"id": node, "nobel": subgraph.nodes[node].get('nobel', False) } for node in subgraph.nodes]
    subgraph_links = [{"source": u, "target": v} for u, v in subgraph.edges]

    # Create the final dictionary for D3.js
    subgraph_d3_data = {
        "nodes": subgraph_nodes,
        "links": subgraph_links
    }

    # Save the data to a JSON file
    with open(file_path, 'w') as file:
        json.dump(subgraph_d3_data, file)

In [13]:
extract_subgraph_to_d3(academic_tree, 'Emmanuel Stupanus', 'data/nobel-tree-emmanuel-stupanus-subgraph.json')

In [31]:
extract_subgraph_to_d3(academic_tree, 'John Strutt (P1904)', 'data/nobel-tree-john-strutt-subgraph.json')

---

In [14]:
# Extract nodes and links
nodes = [{"id": person, "nobel": academic_tree.nodes[person].get('nobel', False)} for person in academic_tree.nodes]
links = [{"source": u, "target": v} for u, v in academic_tree.edges]

In [15]:
# Create the final dictionary
d3_data = {
    "nodes": nodes,
    "links": links
}

In [16]:
# Save the data to a JSON file
d3_file_path = './data/nobel-tree-full.json'

In [17]:
with open(d3_file_path, 'w') as file:
    json.dump(d3_data, file)