### Let's Start

# Step 1: Importing Libraries

In [1]:
import pandas as pd
from bs4 import BeautifulSoup
import requests
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from webdriver_manager.chrome import ChromeDriverManager
import time
import re

# Step 2: Prompt the user for input

In [2]:
brand = input("Enter the brand name (e.g., Toyota): ").strip()
city = input("Enter the city or location (e.g., Mumbai): ").strip()

# Step 3: Update the URL based on the user input

In [3]:

base_url = f'https://www.cars24.com/buy-used-{brand.lower()}-cars-'
city_slug = city.lower().replace(' ', '-')  # Convert city to a URL-friendly slug
cars24_url = f'{base_url}{city_slug}/'


# Step 4: Set up Selenium WebDriver

In [4]:

options = Options()
options.add_argument("--headless")  # Run in headless mode for efficiency
driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()), options=options)

# Open the page
driver.get(cars24_url)


# Step 5: Scroll to load dynamic content

In [5]:
last_height = driver.execute_script("return document.body.scrollHeight")

# Start an infinite loop to keep scrolling until no new content is loaded
while True:
    # Scroll down by 10,000 pixels
    driver.execute_script("window.scrollBy(0, 10000);")
    
    # Wait for 5 seconds to allow the page to load new content
    time.sleep(5)
    
    # Get the new height of the page after scrolling
    new_height = driver.execute_script("return document.body.scrollHeight")
    
    # Check if the new height is the same as the last height
    if new_height == last_height:
        # If heights are the same, no new content has been loaded, so exit the loop
        break
    
    # Update last_height with the new height for the next iteration
    last_height = new_height

# Get page source after scrolling
page_source = driver.page_source
driver.quit()

# Step 6: Extract car data

In [6]:
soup = BeautifulSoup(page_source, "html.parser")

cars_data = []

# Locate the car elements
car_elements = soup.find_all('div', class_='_2YB7p')

for car_element in car_elements:
    # Extract car model and clean it
    car_model_tag = car_element.find('h3', class_='_11dVb')
    if car_model_tag:
        car_model = car_model_tag.text
        year = car_model.split()[0]
        model = ' '.join(car_model.split()[1:]).replace(f'{brand} ', '')
    else:
        year = model = 'Unknown'

    # Extract car details (km run, fuel type, transmission type)
    details_tag = car_element.find('ul', class_='_3J2G-')
    if details_tag:
        details = details_tag.find_all('li')
        km_run = details[0].text.strip() 
        fuel_type = details[2].text.strip() 
        transmission = details[4].text.strip() 
    else:
        km_run = fuel_type = transmission = 'Unknown'

    # Extract car price
    price_tag = car_element.find('strong', class_='_3RL-I')
    price = price_tag.text if price_tag else 'Unknown'

    # Set location based on user input
    location = city

    cars_data.append({
        'Brand': brand,
        'Manufacturing Year': year,
        'Model': model,
        'Kilometers Driven': km_run,
        'Fuel Type': fuel_type,
        'Transmission Type': transmission,
        'Price': price,
        'Location': location
    })


# Step 7: Get the dataframe

In [7]:
# Create DataFrame
cars24_df = pd.DataFrame(cars_data)
cars24_df

Unnamed: 0,Brand,Manufacturing Year,Model,Kilometers Driven,Fuel Type,Transmission Type,Price,Location
0,toyota,2018,Toyota YARIS VX MT,"30,509 km",Petrol,Manual,₹7.83 Lakh,Mumbai
1,toyota,2021,Toyota URBAN CRUISER PREMIUM GRADE MT,"18,001 km",Petrol,Manual,₹8.95 Lakh,Mumbai
2,toyota,2011,Toyota Etios Liva G,"79,645 km",Petrol,Manual,₹2.28 Lakh,Mumbai
3,toyota,2022,Toyota URBAN CRUISER PREMIUM GRADE AT,"34,486 km",Petrol,Automatic,₹9.89 Lakh,Mumbai
4,toyota,2011,Toyota Etios Liva G,"77,595 km",Petrol,Manual,₹2.37 Lakh,Mumbai
5,toyota,2018,Toyota Corolla Altis VL CVT PETROL,"74,502 km",Petrol,Automatic,₹9.58 Lakh,Mumbai
6,toyota,2017,Toyota Corolla Altis VL CVT PETROL,"56,595 km",Petrol,Automatic,₹10.12 Lakh,Mumbai
7,toyota,2019,Toyota Glanza G CVT,"16,870 km",Petrol,Automatic,₹6.02 Lakh,Mumbai
8,toyota,2018,Toyota Corolla Altis VL CVT PETROL,"77,581 km",Petrol,Automatic,₹9.80 Lakh,Mumbai
9,toyota,2020,Toyota Glanza V CVT,"57,023 km",Petrol,Automatic,₹6.83 Lakh,Mumbai


# Step 8: Data Cleaning

In [8]:
def clean_kilometers(km_str):
    # Remove 'km' and any extra spaces
    str = km_str.replace(' km', '').strip()
    return int(str.replace(',', ''))

def convert_price(price_str):
    # Remove the '₹' symbol and 'Lakh' suffix
    price_str = price_str.replace('₹', '').replace(' Lakh', '').strip()
    
    # Convert the price to float and scale it up
    return int(float(price_str.replace(',', '')) * 100)*1000


# Apply cleaning functions to the DataFrame
cars24_df['Kilometers Driven'] = cars24_df['Kilometers Driven'].apply(clean_kilometers)
cars24_df['Price'] = cars24_df['Price'].apply(convert_price)

cars24_df


Unnamed: 0,Brand,Manufacturing Year,Model,Kilometers Driven,Fuel Type,Transmission Type,Price,Location
0,toyota,2018,Toyota YARIS VX MT,30509,Petrol,Manual,783000,Mumbai
1,toyota,2021,Toyota URBAN CRUISER PREMIUM GRADE MT,18001,Petrol,Manual,894000,Mumbai
2,toyota,2011,Toyota Etios Liva G,79645,Petrol,Manual,227000,Mumbai
3,toyota,2022,Toyota URBAN CRUISER PREMIUM GRADE AT,34486,Petrol,Automatic,989000,Mumbai
4,toyota,2011,Toyota Etios Liva G,77595,Petrol,Manual,237000,Mumbai
5,toyota,2018,Toyota Corolla Altis VL CVT PETROL,74502,Petrol,Automatic,958000,Mumbai
6,toyota,2017,Toyota Corolla Altis VL CVT PETROL,56595,Petrol,Automatic,1011000,Mumbai
7,toyota,2019,Toyota Glanza G CVT,16870,Petrol,Automatic,602000,Mumbai
8,toyota,2018,Toyota Corolla Altis VL CVT PETROL,77581,Petrol,Automatic,980000,Mumbai
9,toyota,2020,Toyota Glanza V CVT,57023,Petrol,Automatic,683000,Mumbai


# Step 9: Save the data

In [9]:
#Save the DataFrame to a CSV file
file_name = f'{brand.lower()}-{city.lower()}.csv'
cars24_df.to_csv(file_name, index=False)

# Display the DataFrame
cars24_df
print(f'Data saved to {file_name}')


Data saved to toyota-mumbai.csv
