In [1]:
import pandas as pd
from pathlib import Path
from tqdm import tqdm
from professions_list import ALL_PROFESSIONS, PROFESSIONS_LIST
from hh_resume_parser import HHResumeParser
from hh_vacancy_parser import HHVacancyParser

In [2]:
def get_unloaded_position():
    def get_loaded_position(prefix=None):
        path = Path('.')
        files = list(path.glob(f'{prefix}_*.csv'))
        loaded_position = []
        for file in files:
            position = str(file).split('_')[1]
            loaded_position.append(position)
        return loaded_position

    loaded_resumes = get_loaded_position('resumes')
    loaded_vacancies = get_loaded_position('vacancies')
    loaded_position = (set(loaded_resumes) | set(loaded_vacancies))
    unloaded_position = list(set(PROFESSIONS_LIST['it_tech']) - loaded_position)
    return sorted(unloaded_position)

In [3]:
unloaded_position = get_unloaded_position()

In [4]:
resume_parser = HHResumeParser(timeout=60, max_retries=5)
vacancy_parser = HHVacancyParser(timeout=60, max_retries=5)

2025-10-15 18:51:40,833 - HHResumeParser - INFO - Парсер инициализирован: timeout=60s, max_retries=5, max_404_errors=5
2025-10-15 18:51:40,833 - HHVacancyParser - INFO - Парсер инициализирован: timeout=60s, max_retries=5, max_404_errors=5


In [None]:
for prof in tqdm(unloaded_position):
    df_resumes = resume_parser.load_resumes(
        search_terms=[prof],
        areas=['Москва'],
        pages=250,
        items_on_page=20,
        delay=2
    )

    resume_parser.save_to_files(df_resumes, resume_name=prof)

    df_vacancies = vacancy_parser.load_vacancies(
        search_terms=[prof],
        areas=['Москва'],
        pages=250,
        per_page=20,
        delay=2
    )

    vacancy_parser.save_to_files(df_vacancies, vacancy_name=prof)

  0%|                                                      | 0/31 [00:00<?, ?it/s]2025-10-15 18:51:40,847 - HHResumeParser - INFO - Поиск по запросу: 'Game Developer'
2025-10-15 18:51:40,848 - HHResumeParser - INFO - Страница 1/250
2025-10-15 18:51:40,848 - HHResumeParser - INFO - Запрос поиска резюме: https://hh.ru/search/resume
2025-10-15 18:51:41,517 - HHResumeParser - INFO - Страница поиска резюме успешно загружена
2025-10-15 18:51:41,592 - HHResumeParser - INFO - Найдено элементов резюме по data-qa: 20
2025-10-15 18:51:41,592 - HHResumeParser - INFO - Загрузка детальной страницы (попытка 1): https://hh.ru/resume/3f21fcef00027c9ca80039ed1f435876353051?query=Game+Developer&searchRid=17605435009436815e454f46398e7029&hhtmFrom=resume_search_result
2025-10-15 18:51:42,115 - HHResumeParser - INFO - Успешно собрано деталей: 10
2025-10-15 18:51:43,122 - HHResumeParser - INFO - Загрузка детальной страницы (попытка 1): https://hh.ru/resume/094fdd9b0006f596340039ed1f714c77456d4c?query=Game+De

In [None]:
df_resumes

In [None]:
def concat_df(prefix=None):
    path = Path('.')
    files = list(path.glob(f'{prefix}_*.csv'))

    dfs = [pd.read_csv(file) for file in files]
    total_df = pd.concat(dfs, ignore_index=True)

    return total_df

In [None]:
resumes_df = concat_df('resumes')

In [None]:
resumes_df.shape

In [None]:
vacancies_df = concat_df('vacancies')

In [None]:
vacancies_df.shape

### 