**О программе**
> парсинг вакансий по специализации "Аналитик по данным" любой квалификации<br>

**В программе реализовано**<br>
✅ проксирование запросов<br>
✅ логирование парсинга<br>
✅ обработка ошибок<br>
✅ автоскроллинг вниз<br>
✅ ожидание загрузки элементов<br>
✅ сохранение вакансий в csv/ mysql/ postgresql/ mongodb<br>

In [None]:
!pip install undetected-chromedriver
!pip install pandas

In [None]:
import time
import pandas as pd
import logging
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC

In [None]:
logging.basicConfig(
    format='%(asctime)s - %(levelname)s - %(message)s',
    level=logging.INFO
)

def init_driver(proxy=None):
    options = Options()
    options.add_argument("--headless")
    options.add_argument("--disable-blink-features=AutomationControlled")
    if proxy:
        options.add_argument(f'--proxy-server={proxy}')
    
    try:
        driver = webdriver.Chrome(options=options)
        logging.info("Драйвер успешно запущен")
        return driver
    except Exception as e:
        logging.error(f"Ошибка запуска драйвера: {e}")
        return None


def scroll_to_bottom(driver):

    last_height = driver.execute_script("return document.body.scrollHeight")

    while True:
        driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
        time.sleep(2)
        new_height = driver.execute_script("return document.body.scrollHeight")
        if new_height == last_height:
            break
        last_height = new_height
    logging.info("Прокрутка завершена")


def wait_for_cards(driver):
    try:
        WebDriverWait(driver, 10).until(
            EC.presence_of_element_located((By.CLASS_NAME, "vacancy-card"))
        )
    except Exception as e:
        logging.error(f"Вакансии не найдены: {e}")


def get_vacancies(driver, url):
    result = []
    try:
        driver.get(url)
        wait_for_cards(driver)
        scroll_to_bottom(driver)

        cards = driver.find_elements(By.CLASS_NAME, "vacancy-card")
        logging.info(f"Найдено карточек: {len(cards)}")

        for card in cards:
            try:
                title = card.find_element(By.CLASS_NAME, "vacancy-card__title").text.strip()
                company_name = card.find_element(By.CLASS_NAME, "vacancy-card__company-title").text.strip()
                tags = [tag.text.strip() for tag in card.find_elements(By.CLASS_NAME, "inline-list__item")]

                result.append({
                    "title": title,
                    "company_name": company_name,
                    "tags": ", ".join(tags)
                })
            except Exception as e:
                logging.warning(f"Ошибка при обработке карточки: {e}")
    except Exception as e:
        logging.error(f"Ошибка загрузки страницы: {e}")
    return result


if __name__ == "__main__":
    proxy_ip = None  # можно указать например: "123.456.789.101:8080"
    driver = init_driver(proxy=proxy_ip)

    if driver:
        url = "https://career.habr.com/vacancies?skills[]=Python"
        vacancies = get_vacancies(driver, url)

        if vacancies:
            df = pd.DataFrame(vacancies)
            df.to_csv("habr_vacancies.csv", index=False)
            logging.info("Данные сохранены в habr_vacancies.xlsx")
        else:
            logging.warning("Данные не были получены")

        driver.quit()


**сохранение вакансий в MySQL**

In [None]:
import sqlite3
import logging

def save_to_sqlite(data):
    conn = sqlite3.connect("habr_vacancies.db")
    cursor = conn.cursor()

    cursor.execute("""
    CREATE TAVBLE IF NOT EXISTS da_vacancies(
                   title TEXT,
                   company_name TEXT,
                   tags TEXT,
                   date_posted TEXT,
                   scrapped_at TEXT)
                   """)
    for item in data:
        cursor.execute("""INSERT INTO da_vacancies(title, company_name, tags, date_posted, scrapped_at)
                       VALUES (?,?, ?, ?, ?)""",
                       (item['title'], item['company_name'], item['tags'], item['date_posted'], item['scrapped_at']))
        conn.commit()
        conn.close()
        logging.info("Data successfully saved to SQLite database")


**сохранение вакансий в postgresql**

In [None]:
import psycopg2
import logging

def save_to_postgresql(data, user, pass):
    try:
        conn = psycopg2.connect(dbname="habr_career_vacancies_da",
                                user=user,
                                password=pass,
                                host="localhost"
                                )
        
        cursor = conn.cursor()

        cursor.execute("""CREATE TABLE IF NOT EXISTS da_vacancies(id SERIAL PRIMARY KEY,
                    tite TEXT,
                    company_name TEXT,
                    tags  TEXT,
                    date_posted TEXT,
                    parsed_at TEXT)
                    """)
        for item in data:
            cursor.execute("INSERT VALUES INTO da_vacancies(title, company_name, tags, date_posted, parsed_at) VALUES(%s, %s, %s, %s, %s)", (item['title'],
                                                                                                                                            item['company_name'],
                                                                                                                                            item['date_posted'],
                                                                                                                                            item['scraped_at']))
        conn.commit()
        conn.close()
        logging.info(msg='Saved into PostgreSQL')
    
    except Exception as e:
        logging.error(msg=f"Ошибка {e}")

In [None]:
#  сохранение вакансий в MongoDB

def save_to_mongodb(data):
    try:
        client = MongoClient('mongodb://localhost:27017/')
        db = client['habr']
        collection = db['vacancies']

        collection.insert_many(data)
        logging.info(msg="Saved into MongoDB")
    except Exception as e:
        logging.error(f"MongoDB Error {e}")