In [7]:
from bs4 import BeautifulSoup as BS4
import requests
import pandas as pd
import numpy as np
import seaborn as sns

In [8]:
HEADERS = {
    "User-Agent" : "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/89.0.4389.82 Safari/537.36",
    "Accept" : "*/*"
}

URL = 'https://auto.ria.com/uk/search/?indexName=auto&brand.id[0]=24&model.id[0]=239&country.import.usa.not=1&price.currency=1&abroad.not=0&custom.not=1&size=100'

In [9]:
def get_html(url, params=""):
    r = requests.get(url, headers=HEADERS, params=params)
    print(r.request.url)
    return r

def get_content(html):
    soup = BS4(html, 'html.parser')
    items = soup.find_all('section', class_='ticket-item')
    cars = []
    
    for item in items:
        cars.append({
            "brand": item.find('div', attrs = {"class": 'hide'}).get('data-mark-name'),
            "model": item.find('div', attrs = {"class": 'hide'}).get('data-model-name'),
            "year": item.find('div', attrs = {"class": 'hide'}).get('data-year'),
            "url": item.find('a', attrs = {"class": 'address'}).get('href'),
            "price_USD": item.find('div', attrs = {"class": 'price-ticket'}).get('data-main-price'),
            "km": item.find('i', attrs = {"title": 'Пробіг'}).next_element,
            "location": item.find('i', attrs = {"class": 'icon-location'}).next_element,
            "fuel": item.find('i', attrs = {"title": 'Тип палива'}).next_element,
            "transmission": item.find('i', attrs = {"title": 'Тип коробки передач'}).next_element
        })
    
    return cars

def parser():
    PAGES = input("До какой страницы парсить? ")
    PAGES = int(PAGES.strip())+1
    html = get_html(URL)
    if html.status_code == 200:
        cars = []
        for page in range(0, PAGES):
            print(f'Парсим страницу: {page}')
            html = get_html(URL, params={'page':page})
            cars.extend(get_content(html.text))
        return cars
    else:
        print("Ошибка получения страницы")

cars = parser()
cars_df = pd.DataFrame(cars, columns=['brand', 'model', 'year', 'url', 'price_USD', 'km', 'location', 'fuel', 'transmission'])
cars_df.head()

До какой страницы парсить? 6
https://auto.ria.com/uk/search/?indexName=auto&brand.id%5B0%5D=24&model.id%5B0%5D=239&country.import.usa.not=1&price.currency=1&abroad.not=0&custom.not=1&size=100
Парсим страницу: 0
https://auto.ria.com/uk/search/?indexName=auto&brand.id%5B0%5D=24&model.id%5B0%5D=239&country.import.usa.not=1&price.currency=1&abroad.not=0&custom.not=1&size=100&page=0
Парсим страницу: 1
https://auto.ria.com/uk/search/?indexName=auto&brand.id%5B0%5D=24&model.id%5B0%5D=239&country.import.usa.not=1&price.currency=1&abroad.not=0&custom.not=1&size=100&page=1
Парсим страницу: 2
https://auto.ria.com/uk/search/?indexName=auto&brand.id%5B0%5D=24&model.id%5B0%5D=239&country.import.usa.not=1&price.currency=1&abroad.not=0&custom.not=1&size=100&page=2
Парсим страницу: 3
https://auto.ria.com/uk/search/?indexName=auto&brand.id%5B0%5D=24&model.id%5B0%5D=239&country.import.usa.not=1&price.currency=1&abroad.not=0&custom.not=1&size=100&page=3
Парсим страницу: 4
https://auto.ria.com/uk/search/?i

Unnamed: 0,brand,model,year,url,price_USD,km,location,fuel,transmission
0,Ford,Fiesta,2012,https://auto.ria.com/uk/auto_ford_fiesta_30603...,222000,98 тис. км,Київ,"Бензин, 1.6 л.",Автомат
1,Ford,Fiesta,2012,https://auto.ria.com/uk/auto_ford_fiesta_30523...,7500,121 тис. км,Дніпро (Дніпропетровськ),"Газ / Бензин, 1.2 л.",Ручна / Механіка
2,Ford,Fiesta,2012,https://auto.ria.com/uk/auto_ford_fiesta_30571...,8499,48 тис. км,Дніпро (Дніпропетровськ),"Бензин, 1.25 л.",Ручна / Механіка
3,Ford,Fiesta,2016,https://auto.ria.com/uk/auto_ford_fiesta_30528...,9700,77 тис. км,Одеса,"Бензин, 1.25 л.",Ручна / Механіка
4,Ford,Fiesta,2018,https://auto.ria.com/uk/auto_ford_fiesta_30629...,7600,90 тис. км,Одеса,"Бензин, 1.6 л.",Автомат


In [10]:
cars_df.to_csv("ford_cars.csv")