In [5]:
import json
from os import path, makedirs

In [7]:
def initialize_dataset():
    """
    Return an object to use for inizialization of a new dataset.

    :return: Initial dictionary for any dataset
    """
    return {
        "value": {
            "total": 0,
            "size": 0,
            "language": "en",
            "data": []
        }
    }


def append_data(dataset, to_append):
    """
    Utility function to mask away some code and make it more readable

    :param dataset: Dataset to append data to
    :param to_append: Data to append
    """
    dataset['value']['data'].append(to_append)


def save_dataset(dataset, name, file_format):
    """
    Save the dataset given in input

    :param dataset: Dataset to save
    :param name: Name of the dataset
    :param file_format: Format the dataset should be saved in
    """
    filename = f'../datasets/{name}.{file_format}'
    makedirs(path.dirname(filename), exist_ok=True)
    with open(filename, 'w', encoding='utf-8') as f:
        if file_format == 'json':
            json.dump(dataset, f, indent=2)


def set_total_size(dataset):
    """
    Sets the values of the 'total' and 'size' fields in a dictionary

    :param dataset: Dataset to manipulate
    """
    dataset['value']['total'] = len(dataset['value']['data'])
    dataset['value']['size'] = len(dataset['value']['data'])

def extract_departments(subjects):
    """
    Create a dictionary with all the departments in the University of Trento

    :param subjects: Json file of courses in italian
    :param subjects_english: Json file of courses in english
    :return: A dictionary with the departments
    """
    print("Extracting all departments...")
    departments = initialize_dataset()
    already_added = set()
    for course in subjects['value']['data']:
        for department in course['department']:
            if department['unitId'] not in already_added:
                append_data(departments, {
                    'id': department['unitId'],
                    'name': department['unitName']
                })
                already_added.add(department['unitId'])
    del already_added
    departments['value']['total'] = len(departments['value']['data'])
    departments['value']['size'] = len(departments['value']['data'])

    return departments

In [8]:
with open("../datasets/original/course_en.json", 'r', encoding='utf-8') as fp:
    courses = json.load(fp)
departments = extract_departments(courses)
save_dataset(departments, 'generated/departments', 'json')

Extracting all departments...
