In [1]:
pip install beautifulsoup4 

Note: you may need to restart the kernel to use updated packages.


In [28]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
import re


def scrape_olx(merek, yearstart, yearend, jenis_kendaraan):
    base_url = 'https://www.carsome.id/beli-mobil-bekas'
    search_params = f'/{merek}/{jenis_kendaraan}?year={yearstart},{yearend}'
    url = base_url + search_params

    response = requests.get(url)
    if response.status_code == 200:
        soup = BeautifulSoup(response.text, 'html.parser')
        car_listings = soup.find_all('a', class_='mod-b-card__title')
        
        data = []
        for car in car_listings:
            info = car.find_all('p')
            year_model = info[0].get_text(strip=True)
            
            # Menggunakan regex untuk memisahkan tahun dan merek
            match = re.match(r'(\d{4})\s*(\w+)', year_model)
            if match:
                year = match.group(1)
                brand = match.group(2)
            else:
                year = None
                brand = None
            
            model = info[1].get_text(strip=True)
            
            # Menemukan informasi km, tipe mobil, dan lokasi
            other_info = car.find_next_sibling('div', class_='mod-b-card__car-other').find_all('span')
            if len(other_info) >= 3:
                mileage = other_info[0].get_text(strip=True)
                car_type = other_info[1].get_text(strip=True)
                location = other_info[2].get_text(strip=True)
            else:
                mileage = None
                car_type = None
                location = None
            
            # Menemukan harga
            price_tag = car.find_next_sibling('div', class_='mod-card__price-cash')
            if price_tag:
                price = price_tag.get_text(strip=True)
            else:
                price = None
            
            data.append([year, brand, model, mileage, car_type, location, price])

        # Menyimpan data ke dalam DataFrame pandas
        df = pd.DataFrame(data, columns=['Year', 'Brand', 'Model', 'Mileage(KM)', 'Car Type', 'Location', 'Price'])

        # Menyimpan DataFrame ke dalam file Excel
        excel_file = 'C:/Users/ilham/OneDrive/Documents/car_data_fix.xlsx'
        df.to_excel(excel_file, index=False)
        print(f"Data telah disimpan ke dalam file Excel '{excel_file}'")
    else:
        print("Failed to retrieve page.")

merek = input("Masukkan merek kendaraan: ")
yearstart = input("Masukkan tahun awal kendaraan: ")
yearend = input("Masukkan tahun akhir kendaraan: ")
jenis_kendaraan = input("Masukkan jenis kendaraan :")

scrape_olx(merek, yearstart, yearend, jenis_kendaraan)

Masukkan merek kendaraan: honda
Masukkan tahun awal kendaraan: brio
Masukkan tahun akhir kendaraan: 2018
Masukkan jenis kendaraan :2022
Data telah disimpan ke dalam file Excel 'C:/Users/ilham/OneDrive/Documents/car_data1.xlsx'
