In [153]:
import re
import json
import networkx as nx

In [280]:
# Read the content of the MATLAB file
file_path = './data/NobelTree.m'

In [281]:
with open(file_path, 'r') as file:
    matlab_script = file.read()

In [282]:
# Regular expressions to find the successors (mentees) and predecessors (mentors)
# A line which starts with `s0 = {` is pupils / successors
# A line which starts with `p0 = {` in teachers / predecessors
pupil_pattern = re.compile(r's\d*\s*=\s*\{([^\}]+)\};')
teacher_pattern = re.compile(r'p\d*\s*=\s*\{([^\}]+)\};')

Line 5470

Change: p16 = {'Sophus Jorgensen'};

To: s16 = {'Sophus Jorgensen'};

---

Line 9956

Change: s = {'Robin Warren (M2005)' 'Barry Marshall (M2005)'};

Change: % = {'Robin Warren (M2005)' 'Barry Marshall (M2005)'};

No corresponding p line

In [283]:
# Find all successors and predecessors
pupils = pupil_pattern.findall(matlab_script)
teachers = teacher_pattern.findall(matlab_script)

In [288]:
# Spot test to check if the extraction was successful
test_index = pupils.index("'Claude Hudson' 'Claude Hudson' 'Claude Hudson' 'Claude Hudson'")
teachers[test_index]

"'Elmer Loomis' 'Walther Nernst (C1920)' 'Jacobus van t Hoff (C1901)' 'William Magie'"

In [289]:
# Spot test to check if the extraction was successful
print(f"Pupil: {pupils[test_index]}")
print(f"Teacher: {teachers[test_index]}")

Pupil: 'Claude Hudson' 'Claude Hudson' 'Claude Hudson' 'Claude Hudson'
Teacher: 'Elmer Loomis' 'Walther Nernst (C1920)' 'Jacobus van t Hoff (C1901)' 'William Magie'


In this instance, **Wilhelm Rontgen** is the _pupil_ of **Gustav Zeuner** and **August Kundt**.

This is confirmed by [Wilhelm Röntgen's Wikipedia page](https://en.wikipedia.org/wiki/Wilhelm_R%C3%B6ntgen):

> he became a favourite student of Professor August Kundt...

In [290]:
# Initialize an empty list to hold the relationships
relationship_list = []

In [291]:
# Extract the relationships from the matches
for pupil_string, teacher_string in zip(pupils, teachers):
    pupil_list = re.findall(r"'(.*?)'", pupil_string)
    teacher_list = re.findall(r"'(.*?)'", teacher_string)
    for pupil, teacher in zip(pupil_list, teacher_list):
        new_dict = {}
        new_dict['teacher'] = teacher
        new_dict['pupil'] = pupil
        relationship_list.append(new_dict)

In [292]:
relationship_list[:4]

[{'teacher': 'Gustav Zeuner', 'pupil': 'Wilhelm Rontgen (P1901)'},
 {'teacher': 'August Kundt', 'pupil': 'Wilhelm Rontgen (P1901)'},
 {'teacher': 'Victor Regnault', 'pupil': 'Gustav Zeuner'},
 {'teacher': 'Julius Weisbach', 'pupil': 'Gustav Zeuner'}]

In [295]:
# Spot test to check if the conversion was successful
# s25 = {'Claude Hudson' 'Claude Hudson' 'Claude Hudson' 'Claude Hudson'};
# p25 = {'Elmer Loomis' 'Walther Nernst (C1920)' 'Jacobus van t Hoff (C1901)' 'William Magie'};

[relationship for relationship in relationship_list if relationship['pupil'] == 'Claude Hudson']

[{'teacher': 'Elmer Loomis', 'pupil': 'Claude Hudson'},
 {'teacher': 'Walther Nernst (C1920)', 'pupil': 'Claude Hudson'},
 {'teacher': 'Jacobus van t Hoff (C1901)', 'pupil': 'Claude Hudson'},
 {'teacher': 'William Magie', 'pupil': 'Claude Hudson'}]

In [296]:
# Check if each entry in the relationship list is contains both a teacher and a pupil
for relationship in relationship_list:
    if 'teacher' not in relationship or 'pupil' not in relationship:
        print(relationship)

In [297]:
# Save the data to a JSON file
output_file_path = './data/nobel-relationship.json'

In [298]:
with open(output_file_path, 'w') as output_file:
    json.dump(relationship_list, output_file, indent=2)