In [7]:
import json
from os import path, makedirs
import uuid

In [5]:
def initialize_dataset():
    """
    Return an object to use for inizialization of a new dataset.

    :return: Initial dictionary for any dataset
    """
    return {
        "value": {
            "total": 0,
            "size": 0,
            "language": "en",
            "data": []
        }
    }


def append_data(dataset, to_append):
    """
    Utility function to mask away some code and make it more readable

    :param dataset: Dataset to append data to
    :param to_append: Data to append
    """
    dataset['value']['data'].append(to_append)


def save_dataset(dataset, name, file_format):
    """
    Save the dataset given in input

    :param dataset: Dataset to save
    :param name: Name of the dataset
    :param file_format: Format the dataset should be saved in
    """
    filename = f'../datasets/{name}.{file_format}'
    makedirs(path.dirname(filename), exist_ok=True)
    with open(filename, 'w', encoding='utf-8') as f:
        if file_format == 'json':
            json.dump(dataset, f, indent=2)


def set_total_size(dataset):
    """
    Sets the values of the 'total' and 'size' fields in a dictionary

    :param dataset: Dataset to manipulate
    """
    dataset['value']['total'] = len(dataset['value']['data'])
    dataset['value']['size'] = len(dataset['value']['data'])


In [8]:
with open('../datasets/generated/course.json', encoding='utf-8') as f:
    phd_ids = set()
    with open("../datasets/formal_modeling/phd_student.json", encoding='utf-8') as f2:
        phd_students = json.load(f2)['value']['data']
        for phd in phd_students:
            phd_ids.add(phd['id'])
    
    old_course_data = json.load(f)['value']
    degree_program = initialize_dataset()
    deg_prog_lookup = {}
    course = initialize_dataset()

    for c in old_course_data['data']:
        if c['degreeProgram'] not in deg_prog_lookup:
            to_add = {'id': uuid.uuid4().hex, 'name': c['degreeProgram']}
            append_data(degree_program, to_add)
            deg_prog_lookup[to_add['name']] = to_add['id']
            c['degreeProgramId'] = to_add['id']
        else:
            c['degreeProgramId'] = deg_prog_lookup[c['degreeProgram']]

        del c['degreeProgram']
        if c['assistantId'] in phd_ids:
            c['assistant_phd'] = c['assistantId']
            c['assistant_professor'] = ""
        else:
            c['assistant_professor'] = c['assistantId']
            c['assistant_phd'] = ""
        del c['assistantId']
        append_data(course, c)

    set_total_size(degree_program)
    set_total_size(course)

    save_dataset(degree_program, 'formal_modeling/degree_program', 'json')
    save_dataset(course, 'formal_modeling/course', 'json')