In [13]:
import pandas as pd
import numpy as np
from datetime import datetime
import sqlite3
import requests
from bs4 import BeautifulSoup
import time
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.service import Service
from webdriver_manager.chrome import ChromeDriverManager

# Database setup
def init_db():
    conn = sqlite3.connect('cars.db')
    c = conn.cursor()
    c.execute('''CREATE TABLE IF NOT EXISTS listings
                 (vin TEXT, price INTEGER, mileage INTEGER, 
                  location TEXT, date_listed DATE, 
                  last_updated DATE, dealer TEXT)''')
    conn.commit()
    return conn

def setup_driver():
    options = webdriver.ChromeOptions()
    options.add_argument('--headless')
    return webdriver.Chrome(service=Service(ChromeDriverManager().install()), options=options)

def fetch_cars(make, model, zip_code):
    driver = setup_driver()
    url = f"https://www.cars.com/shopping/results/?stock_type=all&makes[]={make}&models[]={model}&zip={zip_code}"
    driver.get(url)
    time.sleep(3)
    return driver.page_source

# Setup database
conn = init_db()

In [15]:
make = "BMW"
model = "m240i"
zip_code = "11375"

html = fetch_cars(make, model, zip_code)
print(html[:500])

<html lang="en" class="ep-theme-spark"><head>
    <meta charset="utf-8">
    <meta http-equiv="X-UA-Compatible" content="IE=edge">
    <meta name="viewport" content="width=device-width, initial-scale=1.0">
<title data-suffix=" | Cars.com">New and Used Vehicles for Sale Near Forest Hills, NY | Cars.com</title>
    <meta name="description" content="Shop new and used vehicles for sale at Cars.com. Research, compare, and save listings, or contact sellers directly from 4,680 vehicles nationwide.">

<


In [17]:
def parse_listings(html):
    soup = BeautifulSoup(html, 'html.parser')
    cars = []
    
    listings = soup.select('div[data-tracking-type="srp-vehicle-card"]')
    
    for listing in listings:
        try:
            price = listing.select_one('.primary-price').text
            title = listing.select_one('h2').text
            mileage = listing.select_one('.mileage').text
            
            car = {
                'price': price.strip(),
                'title': title.strip(),
                'mileage': mileage.strip(),
                'date_checked': datetime.now()
            }
            cars.append(car)
        except:
            continue
            
    return cars

# Test it
html = fetch_cars('Toyota', 'Camry', '11375')
cars = parse_listings(html)
print(cars)

[{'price': '$13,997', 'title': '2010 Toyota Matrix Base', 'mileage': '22,616 mi.', 'date_checked': datetime.datetime(2025, 2, 1, 14, 27, 49, 838338)}, {'price': '$42,800', 'title': '2022 Toyota GR Supra 3.0 Premium', 'mileage': '46,618 mi.', 'date_checked': datetime.datetime(2025, 2, 1, 14, 27, 49, 838601)}, {'price': '$28,295', 'title': '2021 Toyota Avalon Hybrid Limited', 'mileage': '48,191 mi.', 'date_checked': datetime.datetime(2025, 2, 1, 14, 27, 49, 838801)}, {'price': '$41,998', 'title': '2025 Toyota Crown Signia XLE', 'mileage': '2,604 mi.', 'date_checked': datetime.datetime(2025, 2, 1, 14, 27, 49, 839022)}, {'price': '$56,341', 'title': '2020 Toyota Supra 3.0 Premium Launch Edition', 'mileage': '1,780 mi.', 'date_checked': datetime.datetime(2025, 2, 1, 14, 27, 49, 839218)}, {'price': '$10,698', 'title': '2017 Toyota Yaris iA Base', 'mileage': '56,384 mi.', 'date_checked': datetime.datetime(2025, 2, 1, 14, 27, 49, 839410)}, {'price': '$25,990', 'title': '2010 Toyota FJ Cruiser'