<a href="https://colab.research.google.com/github/dilanka-heshan/Vehicle-Scraper/blob/main/vehicle_scraper_py.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
import time

# Base URL for search
base_url = "https://riyasewana.com/search?page="

# Define the number of pages to scrape
max_pages = 2  # You can change this to scrape more pages

# Data storage
vehicle_data = []

# Headers to mimic a browser request
headers = {
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
}

# Loop through pages
for page in range(1, max_pages + 1):
    print(f"Scraping page {page}...")
    try:
        response = requests.get(base_url + str(page), headers=headers)
        soup = BeautifulSoup(response.content, "html.parser")

        # Find all vehicle listings on the page
        listings = soup.find_all("li", class_="item")
        print(f"Found {len(listings)} listings on page {page}")

        for listing in listings:
            try:
                # Extract the link to the individual listing
                link_element = listing.find("a")
                if link_element and link_element.has_attr("href"):
                    link = "https://riyasewana.com" + link_element["href"] if not link_element["href"].startswith("http") else link_element["href"]
                    title = link_element.get_text(strip=True)

                    # Extract basic info from the listing preview
                    price_element = listing.select_one("div.boxintxt.b")
                    price = price_element.get_text(strip=True) if price_element else "N/A"

                    location_element = listing.select_one("div.boxintxt:not(.b):not(.s)")
                    location = location_element.get_text(strip=True) if location_element else "N/A"

                    date_element = listing.select_one("div.boxintxt.s")
                    ad_date = date_element.get_text(strip=True) if date_element else "N/A"

                    print(f"Scraping details from: {link}")

                    # Go to the detailed listing page
                    detail_response = requests.get(link, headers=headers)
                    detail_soup = BeautifulSoup(detail_response.content, "html.parser")

                    # Initialize a dictionary with the data we already have
                    car_data = {
                        "Title": title,
                        "Price": price,
                        "Location": location,
                        "Ad Date": ad_date,
                        "Link": link
                    }

                    # Extract contact number
                    contact_element = detail_soup.find("span", class_="moreph")
                    if contact_element:
                        car_data["Contact"] = contact_element.get_text(strip=True)

                    # Extract data from the table structure
                    # This matches the HTML structure you provided
                    table_rows = detail_soup.select("table.moret tr")

                    for row in table_rows:
                        # Find all moreh paragraphs (headers) in this row
                        headers = row.select("p.moreh")

                        for header in headers:
                            # Get the header text
                            header_text = header.get_text(strip=True)

                            # Find the parent td and then the next td
                            parent_td = header.parent
                            if parent_td:
                                # Find the next td element (which contains the value)
                                next_td = parent_td.find_next_sibling("td")
                                if next_td:
                                    # Extract the value
                                    value = next_td.get_text(strip=True)
                                    # Store in our data dictionary
                                    car_data[header_text] = value

                    # Additional processing for "Details" field which might contain line breaks
                    details_header = detail_soup.find("p", class_="moreh", string="Details")
                    if details_header:
                        parent_td = details_header.parent
                        if parent_td:
                            details_td = parent_td.find_next_sibling("td")
                            if details_td:
                                # Get all text including line breaks
                                details_text = details_td.get_text(strip=True).replace("\n", " ")
                                car_data["Details"] = details_text

                    # Print extracted data for debugging
                    print("Car Details:")
                    for key, value in car_data.items():
                        print(f"{key}: {value}")
                    print("-------------------")

                    # Add to our dataset
                    vehicle_data.append(car_data)

                    # Optional: Delay between requests to avoid being blocked
                    time.sleep(1)

            except Exception as e:
                print(f"Error scraping listing: {e}")
                continue

        # Delay between page requests to avoid rate-limiting
        time.sleep(2)

    except Exception as e:
        print(f"Error processing page {page}: {e}")
        continue

# Save data to a CSV file
if vehicle_data:
    df = pd.DataFrame(vehicle_data)
    df.to_csv("riyasewana_vehicle_data.csv", index=False)
    print("✅ Scraping complete! Data saved to 'riyasewana_vehicle_data.csv'")
    print(f"Total vehicles scraped: {len(vehicle_data)}")
else:
    print("No data was collected. Please check the website structure or your internet connection.")

Scraping page 1...
Found 44 listings on page 1
Scraping details from: https://riyasewana.com/buy/mitsubishi-montero-sale-marawila-9523354
Car Details:
Title: Mitsubishi Montero 2006 SUV
Price: Negotiable
Location: Marawila
Ad Date: 2025-03-15
Link: https://riyasewana.com/buy/mitsubishi-montero-sale-marawila-9523354
Contact: 077 785 5806
Get Leasing: Central Finance වෙතින් මාසික වාරිකය රු.1852 සිට ලීසිං පහසුකම්.
Make: Mitsubishi
Model: Montero
YOM: 2006
Mileage (km): 180000
Gear: Automatic
Fuel Type: Diesel
Options: AIR CONDITION, POWER STEERING, POWER MIRROR, POWER WINDOW
Engine (cc): 2800
Details: 🚗 Mitsubishi Montero Exceed 🚗~ Year Of Manufacture - 2004~ Year Of Register- 2006~ Diesel Converted (4M40)~ Original Book~ Original Interior~ Black Fabric Interior~ Teak Interior~ Line AC~ 7 Seater~ New Tyres~ Superb Condition Jeep~Hand-3,500,000/=~Speed Draft-9,500,000/=(Monthly Interest- 114,000/=)~ Price Can Be Negotiable After Inspection~ Location - Marawila~ 0777212406~0777855806~077446



Car Details:
Title: Honda Insight Ze2 2009 Car
Price: Rs. 7,450,000
Location: Galle
Ad Date: 2025-03-15
Link: https://riyasewana.com/buy/honda-insight-ze2-sale-galle-9524685
Contact: 076 413 3998
Get Leasing: Central Finance වෙතින් මාසික වාරිකය රු.1852 සිට ලීසිං පහසුකම්.
Make: Honda
Model: Insight Ze2
YOM: 2009
Mileage (km): 218000
Gear: Automatic
Fuel Type: Hybrid
Options: AIR CONDITION,POWER STEERING,POWER MIRROR,POWER WINDOW
Engine (cc): 13000
Details: අවම මූලික මුදල 👉 𝐑𝐬, 𝟏,𝟎𝟎𝟎,𝟎𝟎𝟎/-ඉක්මනින් විකිණීමට ඇත." 𝐀𝐯𝐚𝐢𝐥𝐚𝐛𝐥𝐞 𝐟𝐨𝐫 𝐐𝐮𝐢𝐜𝐤 𝐒𝐚𝐥𝐞 "𝐇𝐨𝐧𝐝𝐚 " 𝐙𝐄𝟐 " 𝐈𝐧𝐬𝐢𝐠𝐡𝐭 𝟐𝟎𝟎𝟗 𝐑𝐄𝐆 𝟐𝟎𝟏𝟏ලීසිං පහසුකම් සමගින් පමණි.🔴 076413399803rd Owner, 218,000 Km Done.𝐒𝐞𝐫𝐯𝐢𝐜𝐞 𝐑𝐞𝐜𝐨𝐫𝐝𝐬 𝐀𝐯𝐚𝐢𝐥𝐚𝐛𝐥𝐞.JBL Sound System,Auto AC, Auto Wiper,Retractable Winker Mirrors,Power Steering, Power Mirrors,Power Shutters, Central Locking,New Tyres,Alloy Wheels, Fog Lights,With many more Options.-------------------------------------𝟓𝟎% සිට 𝟗𝟎% දක්වා, ලීසිං පහසුකම්.𝐖𝐡𝐚𝐭𝐬𝐀𝐩𝐩 𝐮𝐬 𝐟𝐨𝐫 𝐦𝐨𝐫𝐞 𝐝𝐞𝐭𝐚𝐢𝐥𝐬.#honda #HondaInsight #insightforsale #Insight #in