In [1]:
import re
import json

In [41]:
# Read the content of the MATLAB file
file_path = './data/NobelTree-edit.m'

In [42]:
with open(file_path, 'r') as file:
    matlab_script = file.read()

In [44]:
# Regular expressions to find the successors (mentees) and predecessors (mentors)
# A line which starts with `s0 = {` is pupils / successors
# A line which starts with `p0 = {` in teachers / predecessors
pupil_pattern = re.compile(r's\d+\s*=\s*\{([^\}]+)\};')
teacher_pattern = re.compile(r'p\d+\s*=\s*\{([^\}]+)\};')

In [45]:
# Find all successors and predecessors
pupils = pupil_pattern.findall(matlab_script)
teachers = teacher_pattern.findall(matlab_script)

In [47]:
# Test the first successor and predecessor entry
print(f"Pupil: {pupils[0]}")
print(f"Teacher: {teachers[0]}")

Pupil: 'Wilhelm Rontgen (P1901)' 'Wilhelm Rontgen (P1901)'
Teacher: 'Gustav Zeuner' 'August Kundt'


In this instance, **Wilhelm Rontgen** is the _pupil_ of **Gustav Zeuner** and **August Kundt**.

This is confirmed by [Wilhelm Röntgen's Wikipedia page](https://en.wikipedia.org/wiki/Wilhelm_R%C3%B6ntgen):

> he became a favourite student of Professor August Kundt...

In [57]:
# Extract the nodes and edges
# Edges will be a list of each connection between a predecessor and a successor
edges = []
# Nodes will be a list of each individual person in the network
nodes = set()

In [50]:
for pupil_string, teacher_string in zip(pupils[:4], teachers[:4], strict=True):
    pupil_list = re.findall(r"'(.*?)'", pupil_string)
    teacher_list = re.findall(r"'(.*?)'", teacher_string)
    for pupil, teacher in zip(pupil_list, teacher_list, strict=True):
        print(f"{teacher} is the teacher of {pupil}")

Gustav Zeuner is the teacher of Wilhelm Rontgen (P1901)
August Kundt is the teacher of Wilhelm Rontgen (P1901)
Victor Regnault is the teacher of Gustav Zeuner
Julius Weisbach is the teacher of Gustav Zeuner
Pierre Berthier is the teacher of Victor Regnault
Justus von Liebich is the teacher of Victor Regnault
Claude Bertholet is the teacher of Pierre Berthier
Johann Schreiber is the teacher of Pierre Berthier


In [59]:
for pupil_string, teacher_string in zip(pupils, teachers, strict=True):
    pupil_list = re.findall(r"'(.*?)'", pupil_string)
    teacher_list = re.findall(r"'(.*?)'", teacher_string)
    for pupil, teacher in zip(pupil_list, teacher_list, strict=True):
        edges.append({'source': teacher, 'target': pupil})
        nodes.add(pupil)
        nodes.add(teacher)

In [62]:
print(list(nodes)[:4])

['Georg Wedel', 'Guiseppi Biancani', 'Francesco Grimaldi', 'Christian Hausen']


In [67]:
print(edges[:2])

[{'source': 'Gustav Zeuner', 'target': 'Wilhelm Rontgen (P1901)'}, {'source': 'August Kundt', 'target': 'Wilhelm Rontgen (P1901)'}]


In [68]:
# Convert nodes to a list of dictionaries
nodes = [{'id': node} for node in nodes]

In [69]:
# Prepare the final data structure
data = {
    'nodes': nodes,
    'links': edges
}

In [70]:
# Save the data to a JSON file
output_file_path = './data/nobel-tree-edit.json'

In [71]:
with open(output_file_path, 'w') as output_file:
    json.dump(data, output_file, indent=2)