In [1]:
from os import makedirs, path
import json
import uuid

In [2]:
with open('../datasets/original/course_en.json', 'r', encoding='utf-8') as fp:
    courses = json.load(fp)

In [3]:
def clean_string(input_string):
    """
    Utility function to clean the input string.
    The filters are chosen by hand when scraping the values.
    NOTE: Only add filters, don't remove them

    :param input_string: String to be cleaned
    :return: Cleaned string
    """
    return input_string.strip().replace(u"\xa0â€‹", "").replace(u"â€‹", "").replace(u"\n", "").replace(u"\t", "").replace(u"\xa0", u" ").replace(u"\u200b", "").replace(u"\u00e0", "à")


def initialize_dataset():
    """
    Return an object to use for inizialization of a new dataset.

    :return: Initial dictionary for any dataset
    """
    return {
        "value": {
            "total": 0,
            "size": 0,
            "language": "en",
            "data": []
        }
    }


def append_data(dataset, to_append):
    """
    Utility function to mask away some code and make it more readable

    :param dataset: Dataset to append data to
    :param to_append: Data to append
    """
    dataset['value']['data'].append(to_append)


def save_dataset(dataset, name, file_format):
    """
    Save the dataset given in input

    :param dataset: Dataset to save
    :param name: Name of the dataset
    :param file_format: Format the dataset should be saved in
    """
    filename = f'./datasets/{name}.{file_format}'
    makedirs(path.dirname(filename), exist_ok=True)
    with open(filename, 'w', encoding='utf-8') as f:
        if file_format == 'json':
            json.dump(dataset, f, indent=2)


def set_total_size(dataset):
    """
    Sets the values of the 'total' and 'size' fields in a dictionary

    :param dataset: Dataset to manipulate
    """
    dataset['value']['total'] = len(dataset['value']['data'])
    dataset['value']['size'] = len(dataset['value']['data'])


In [37]:
information, partitions, teaching_units = scrape_esse3(courses['value']['data'][0]['webSite'])

partition_professors_dataset = initialize_dataset()
for partition in partitions:
    for i in range(len(partition['teacher']['name'])):
        for j in range(len(courses['value']['data'][0]['professor'])):
            if " ".join([
                courses['value']['data'][0]['professor'][j]['name'],
                courses['value']['data'][0]['professor'][j]['surname']
                ]).lower() == partition['teacher']['name'][i].lower():
                append_data(partition_professors_dataset, {
                    'partitionId': partition['id'],
                    'professorId': courses['value']['data'][0]['professor'][j]['id'],
                    'tenured': partition['teacher']['tenured'][i]
                })
del partition['teacher']
print(partition_professors_dataset)

{'value': {'total': 0, 'size': 0, 'language': 'en', 'data': [{'partitionId': '37d87fb889394df087c23e65fa01b6ca', 'professorId': '26ddd5336d198053e6cffad8ffe2862f', 'tenured': False}, {'partitionId': '37d87fb889394df087c23e65fa01b6ca', 'professorId': '7cc92da07568a577d6b2682d602350e5', 'tenured': True}]}}
