In [11]:
def get_bachelors_programs_json(): 
    """
    Выдает словарь, где ключами являются названия образовательных программ, 
    а значениями — словари с признаками для каждой программы.
    """
    import re
    import requests
    from bs4 import BeautifulSoup
    import json
    link = "https://www.hse.ru/education/programs"
    r = requests.get(link)
    page = BeautifulSoup(r.text, 'html.parser')
    data_programs_bac_json = {}
    
    for program__title in page.body.find_all("div", re.compile("education_bachelor")):
        field = program__title.h3.text #направление подготовки 07.00.00 Архитектура

        for item in program__title.find_all("div", class_="edu-programm__item small"):
            program = str(item.a.text) # название образовательной программы
            campus = item.find("div", class_="edu-programm__campus").text
            faculty = item.find("div", class_="edu-programm__unit").text #факультет
            duration = item.find("div", class_="edu-programm__edu").text.split()[0] #продолжительность обучения
            form = item.find("span", class_="edu-programm__edu_offline").text #форма обучения - Очная\Заочная
            link = item.find("a").get("href") 

            data_programs_bac_json[program] = {
                "campus": campus,
                "faculty": faculty,
                "duration": duration,
                "form": form,
                "link": link,
                "field": field
            }

            # Этот блок обрабатывает количество мест.

            openings_free_list = item.find_all("div", "edu-programm__place_free") 
            openings_free_list = [n.text.strip().replace("\xa0", " ") for n in openings_free_list]
            if len(openings_free_list) > 1:
                openings_free_sum = int(openings_free_list[0].split()[0])+int(openings_free_list[-1].split()[0])
            else:
                try:
                    openings_free_sum = int(openings_free_list[0].split()[0])
                except IndexError: 
                    openings_free_sum = 0

            openings_paid = item.find_all("div", class_="edu-programm__place_paid")
            openings_paid = [n.text.strip().replace("\xa0", " ") for n in openings_paid]
            if len(openings_paid) > 1:
                openings_paid_rus = int(openings_paid[0].split()[0])
                openings_paid_foreign = int(openings_paid[-1].split()[0])
                data_programs_bac_json[program]["openings"] = {
                    "free": openings_free_sum,
                    "paid_rus": openings_paid_rus,
                    "paid_foreign": openings_paid_foreign}
            else:
                openings_paid_rus = int(openings_paid[0].split()[0])
                data_programs_bac_json[program]["openings"] = {
                    "free": openings_free_sum,
                    "paid_rus": openings_paid_rus}

    return data_programs_bac_json


In [13]:
def get_masters_programs_json():
    """
    Выдает словарь, где ключами являются названия образовательных программ, 
    а значениями — словари с признаками для каждой программы.
    """
    import re
    import requests
    from bs4 import BeautifulSoup
    import json
    link = "https://www.hse.ru/education/programs"
    r = requests.get(link)
    page = BeautifulSoup(r.text, 'html.parser')
    
    data_programs_mag_json = {}
    for program__title in page.body.find_all("div", re.compile("education_magister")):
        field = program__title.h3.text #направление подготовки 07.00.00 Архитектура

        for item in program__title.find_all("div", class_="edu-programm__item small"):
            program = str(item.a.text) # название образовательной программы
            campus = item.find("div", class_="edu-programm__campus").text
            faculty = item.find("div", class_="edu-programm__unit").text #факультет
            duration = item.find("div", class_="edu-programm__edu").text.split()[0] #продолжительность обучения
            form = item.find("span", class_="edu-programm__edu_offline").text #форма обучения - Очная\Заочная
            link = item.find("a").get("href") 

            data_programs_mag_json[program] = {
                    "campus": campus,
                    "faculty": faculty,
                    "duration": duration,
                    "form": form,
                    "link": link,
                    "field": field
                }


            # Этот блок обрабатывает количество мест.
            #.text.strip().replace("\xa0", " ").split()[0])

            openings_free_list = item.find_all("div", "edu-programm__place_free") 
            openings_free_list = [n.text.strip().replace("\xa0", " ") for n in openings_free_list]
            if len(openings_free_list) > 1:
                openings_free_sum = int(openings_free_list[0].split()[0])+int(openings_free_list[-1].split()[0])
            elif len(openings_free_list) == 0:
                openings_free_sum = 0
            else:
                openings_free_sum = int(openings_free_list[0].split()[0])

            openings_paid = item.find_all("div", class_="edu-programm__place_paid")
            openings_paid = [n.text.strip().replace("\xa0", " ") for n in openings_paid]
            if len(openings_paid) > 1:
                openings_paid_rus = int(openings_paid[0].split()[0])
                openings_paid_foreign = int(openings_paid[-1].split()[0])
                data_programs_mag_json[program]["openings"] = {
                    "free": openings_free_sum,
                    "paid_rus": openings_paid_rus,
                    "paid_foreign": openings_paid_foreign}
            else:
                openings_paid_rus = int(openings_paid[0].split()[0])
                data_programs_mag_json[program]["openings"] = {
                    "free": openings_free_sum,
                    "paid_rus": openings_paid_rus}



    # [int(s) for s in str.split() if s.isdigit()]

    return data_programs_mag_json
