In [90]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
import re

def scrape_carsome(brand, tahun, jenis):
    url = f"https://www.carsome.id/beli-mobil-bekas/{brand}?bodyType={jenis}&year={tahun}"
    try:
        response = requests.get(url)
        response.raise_for_status()

        soup = BeautifulSoup(response.content, 'html.parser')
        cars = soup.find_all("div", class_="mod-b-card__footer")

        scrape_list = []
        for car in cars:
            car_spec = {}
            line_judul, line_model = car.find_all('p')[:2]
            tahun_mobil, brand_mobil = line_judul.get_text(strip=True).split(' ', 1)
            line_model = line_model.get_text(strip=True).replace('\n', '')  
            line_model = re.sub(' +', ' ', line_model) # menghapus spasi berlebih
            odometer = car.find("div", class_="mod-b-card__car-other").find('span').get_text(strip=True).split(' ', 1)
            transmisi = car.find("div", class_="mod-b-card__car-other").find_all('span')[1].get_text(strip=True)
            lokasi = car.find("div", class_="mod-b-card__car-other").find_all('span')[2].get_text(strip=True)
            harga = car.find("div", class_="mod-card__price-cash").get_text(strip=True).replace('Rp ', '').replace(' (Cash)', '')
            car_spec['Tahun'] = tahun_mobil.replace('\n', '')
            car_spec['Brand'] = brand_mobil
            car_spec['Model'] = line_model
            car_spec['Jenis'] = jenis.upper()  # Menambah kolom jenis mobil dari informasi yang diinput
            car_spec['Odometer'] = odometer[0]
            car_spec['Transmisi'] = transmisi
            car_spec['Lokasi'] = lokasi
            car_spec['Harga'] = harga
            scrape_list.append(car_spec)

        df = pd.DataFrame(scrape_list)
        return df
    except requests.exceptions.RequestException as e:
        print("Gagal mengakses halaman:", e)
        return pd.DataFrame()

# Masukkan merek, tahun, dan jenis kendaraan yang ingin dilakukan scraping
brand = input("Masukkan brand mobil: ").lower()
tahun = input("Masukkan tahun mobil: ")
jenis = input("Masukkan jenis kendaraan: ").lower()

# Lakukan scraping
result_df = scrape_carsome(brand, tahun, jenis)

# Tampilkan hasil scraping
if not result_df.empty:
    print("Hasil pencarian:")
    print(result_df)
else:
    print("Tidak ada hasil yang ditemukan untuk kriteria yang dimasukkan.")


Masukkan brand mobil:  mazda
Masukkan tahun mobil:  2015
Masukkan jenis kendaraan:  suv


Hasil pencarian:
  Tahun         Brand             Model Jenis Odometer  Transmisi  \
0  2015         Mazda  2 R SKYACTIV 1.5   SUV  117.682  Automatic   
1  2019         Mazda    CX-5 ELITE 2.5   SUV   29.440  Automatic   
2  2017         Mazda    CX-5 ELITE 2.5   SUV   44.889  Automatic   
3  2018         Mazda  CX-3 TOURING 2.0   SUV   73.975  Automatic   
4  2016         Mazda          2 GT 1.5   SUV   82.940  Automatic   
5  2015         Mazda       CX-5 GT 2.5   SUV  134.010  Automatic   
6  2021         Mazda          CX-3 1.5   SUV   34.284  Automatic   

              Lokasi        Harga  
0  Tangerang Selatan  165.500.000  
1             Bekasi  424.500.000  
2  Tangerang Selatan  372.500.000  
3  Tangerang Selatan  273.500.000  
4             Bekasi  192.000.000  
5  Tangerang Selatan  256.500.000  
6  Tangerang Selatan  312.000.000  


In [91]:
result_df

Unnamed: 0,Tahun,Brand,Model,Jenis,Odometer,Transmisi,Lokasi,Harga
0,2015,Mazda,2 R SKYACTIV 1.5,SUV,117.682,Automatic,Tangerang Selatan,165.500.000
1,2019,Mazda,CX-5 ELITE 2.5,SUV,29.44,Automatic,Bekasi,424.500.000
2,2017,Mazda,CX-5 ELITE 2.5,SUV,44.889,Automatic,Tangerang Selatan,372.500.000
3,2018,Mazda,CX-3 TOURING 2.0,SUV,73.975,Automatic,Tangerang Selatan,273.500.000
4,2016,Mazda,2 GT 1.5,SUV,82.94,Automatic,Bekasi,192.000.000
5,2015,Mazda,CX-5 GT 2.5,SUV,134.01,Automatic,Tangerang Selatan,256.500.000
6,2021,Mazda,CX-3 1.5,SUV,34.284,Automatic,Tangerang Selatan,312.000.000


In [92]:
# eksport hasil menjadi file csv
result_df.to_csv('scraping_sample.csv', sep=',')