In [1]:
import os
import pandas as pd
from pathlib import Path
from tqdm import tqdm
from dotenv import load_dotenv
from professions_list import ALL_PROFESSIONS, PROFESSIONS_LIST
from hh_resume_parser_db import HHResumeParser
from clickhouse_driver import Client

In [2]:
load_dotenv()

CLICKHOUSE_HOST: str = os.getenv("CLICKHOUSE_HOST")
CLICKHOUSE_USER: str = os.getenv("CLICKHOUSE_USER")
CLICKHOUSE_PASSWORD: str = os.getenv("CLICKHOUSE_PASSWORD")
CLICKHOUSE_DATABASE: str = os.getenv("CLICKHOUSE_DATABASE")

In [3]:
clickhouse = Client(host=CLICKHOUSE_HOST, port='9000', user=CLICKHOUSE_USER, password=CLICKHOUSE_PASSWORD, database=CLICKHOUSE_DATABASE, settings={'use_numpy': True})

In [4]:
def get_unloaded_position(clickhouse):
    loaded_position = clickhouse.query_dataframe("""
    select search_query
    from hh_resumes
    """)
    loaded_position = set(loaded_position['search_query'].tolist())
    unloaded_position = list(set(PROFESSIONS_LIST['it_tech']) - loaded_position)
    return sorted(unloaded_position)

In [5]:
unloaded_position = get_unloaded_position(clickhouse)

In [6]:
resume_parser = HHResumeParser(timeout=60, max_retries=5)

In [None]:
for prof in tqdm(unloaded_position):
    df_resumes = resume_parser.load_resumes(
        search_terms=[prof],
        areas=['Москва'],
        pages=250,
        items_on_page=20,
        delay=2
    )

    resume_parser.save_to_clickhouse(df_resumes, clickhouse)

  0%|                                                                                                                                                                                                                                                                          | 0/63 [00:00<?, ?it/s]
Сбор резюме:   0%|                                                                                                                                                                                                                                                            | 0/250 [00:00<?, ?it/s][A
Сбор резюме:   0%|▊                                                                                                                                                                                                | 1/250 [00:00<00:00, 1933.75it/s, Резюме=0, Запрос=1C-программист, Страница=1/250][A

### 