# KOLESA.KZ parser (requests)
Короткий рабочий вариант — запустите ячейку 1 (установка), затем последовательно остальные ячейки.

In [8]:
from bs4 import BeautifulSoup
import requests
import time
import pandas as pd

HEADERS = {
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64)",
    "Accept-Language": "ru-RU,ru;q=0.9",
    "Referer": "https://kolesa.kz/"
}

def scrape_page(url):
    try:
        r = requests.get(url, headers=HEADERS, timeout=(10, 20))
        if r.status_code != 200:
            return []

        soup = BeautifulSoup(r.text, "html.parser")

        cards = soup.find_all("div", class_="a-card")
        results = []

        for card in cards:
            # title
            title_tag = card.find("h5", class_="a-card__title")
            title = title_tag.get_text(strip=True) if title_tag else None

            # price
            price_tag = card.find("span", class_="a-card__price")
            price = price_tag.get_text(strip=True).replace("\xa0", " ") if price_tag else None

            # description
            desc_tag = card.find("p", class_="a-card__description")
            description = desc_tag.get_text(strip=True) if desc_tag else None

            # city
            city_tag = card.find("span", {"data-test": "region"})
            city = city_tag.get_text(strip=True) if city_tag else None

            # views
            views_tag = card.find("span", class_="nb-views")
            views = views_tag.get_text(strip=True) if views_tag else None

            # link
            link_tag = card.find("a", class_="a-card__link")
            link = "https://kolesa.kz" + link_tag["href"] if link_tag else None

            results.append({
                "title": title,
                "price": price,
                "description": description,
                "city": city,
                "views": views,
                "url": link
            })

        return results

    except Exception as e:
        print("Ошибка:", e)
        return []


In [None]:
all_ads = []

for page in range(1, 501):
    if page == 1:
        url = "https://kolesa.kz/cars/"
    else:
        url = f"https://kolesa.kz/cars/?page={page}"

    data = scrape_page(url)

    print(f"Page {page}: {len(data)} ads")

    if not data:
        print("⛔ пусто — стоп")
        break

    all_ads.extend(data)
    time.sleep(2)

print("Всего объявлений:", len(all_ads))


Ошибка: HTTPSConnectionPool(host='kolesa.kz', port=443): Max retries exceeded with url: /cars/ (Caused by ConnectTimeoutError(<urllib3.connection.HTTPSConnection object at 0x12bca6c60>, 'Connection to kolesa.kz timed out. (connect timeout=10)'))
Page 1: 0 ads
⛔ пусто — стоп
Всего объявлений: 0


In [10]:
df = pd.DataFrame(all_ads)
df.head()

Unnamed: 0,title,price,description,city,views,url
0,ВАЗ (Lada) Lada 2121,2 000 000₸,"2008 г., Б/у внедорожник, 1.7 л, бензин, КПП м...",Алматы,0,https://kolesa.kz/a/show/207117446?search_id=9...
1,Mitsubishi Delica,8 500 000₸,"1995 г., Б/у минивэн, 3 л, бензин, Правый руль...",Алматы,0,https://kolesa.kz/a/show/205868127?search_id=9...
2,BMW 528,6 500 000₸,"2012 г., Б/у седан, 2 л, бензин, КПП автомат, ...",Караганда,0,https://kolesa.kz/a/show/206593170?search_id=9...
3,Toyota Camry,6 050 000₸,"2011 г., Б/у седан, 2.5 л, бензин, КПП автомат...",Актау,0,https://kolesa.kz/a/show/207108864?search_id=9...
4,Mazda 626,1 800 000₸,"1998 г., Б/у универсал, 1.8 л, бензин, КПП мех...",Шымкент,0,https://kolesa.kz/a/show/207105826?search_id=9...


In [13]:
print("Объявлений:", len(df))
df.info()


Объявлений: 2020
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2020 entries, 0 to 2019
Data columns (total 6 columns):
 #   Column       Non-Null Count  Dtype 
---  ------       --------------  ----- 
 0   title        2020 non-null   object
 1   price        2020 non-null   object
 2   description  2020 non-null   object
 3   city         2020 non-null   object
 4   views        2020 non-null   object
 5   url          2020 non-null   object
dtypes: object(6)
memory usage: 94.8+ KB


In [None]:
df.to_csv("../data/raw/kolesa_ads.csv", index=False, encoding="utf-8-sig")
df.to_excel("../data/raw/kolesa_ads.xlsx", index=False)

print("✅ Файлы сохранены")

✅ Файлы сохранены
