In [1]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
import time
import re
import random
from datetime import datetime

# List of user agents
user_agents = [
    "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36",
    "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/15.0 Safari/605.1.15",
    "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:89.0) Gecko/20100101 Firefox/89.0",
    "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/92.0.4515.107 Safari/537.36"
]

# List of colors for random selection (English)
color_options = ["White", "Black", "Silver", "Gray", "Red", "Blue", "Brown", "Bronze", "Green", "Gold", "Yellow", "Orange"]

# Malay to English color mapping, including variations like "Grey"
color_map = {
    "putih": "White",
    "hitam": "Black",
    "perak": "Silver",
    "kelabu": "Gray",
    "grey": "Gray",
    "abu-abu": "Gray",
    "merah": "Red",
    "biru": "Blue",
    "coklat": "Brown",
    "gangsa": "Bronze",
    "hijau": "Green",
    "emas": "Gold",
    "kuning": "Yellow",
    "jingga": "Orange"
}

# Function to get a random user agent
def get_random_user_agent():
    return random.choice(user_agents)

# Function to clean text
def clean_text(text):
    if not text:
        return ""
    cleaned = re.sub(r'\s+', ' ', text).strip()
    cleaned = re.sub(r'\.{3,}', '', cleaned)
    return cleaned if cleaned else ""

# Function to extract color
def extract_color(car, car_title, description):
    color = ""
    
    # Helper function to find color in text
    def find_color_in_text(text):
        text_lower = text.lower()
        # Check English colors and their variations
        for eng_color in color_options:
            # Match exact color or variations (e.g., "Golden" for "Gold", "Grey" for "Gray")
            if re.search(rf'\b{eng_color.lower()}(en|ish)?\b', text_lower):
                return eng_color
        # Check Malay colors and variations
        for malay_color, eng_color in color_map.items():
            if re.search(rf'\b{malay_color}\b', text_lower):
                return eng_color
        return None
    
    # Step 1: Check car title for color
    color = find_color_in_text(car_title)
    if color:
        print(f"   🔍 Found color in title: {color}")
        return color
    
    # Step 2: Check description for color
    color = find_color_in_text(description)
    if color:
        print(f"   🔍 Found color in description: {color}")
        return color
    
    # Step 3: Check details section for explicit "Color" label or color mentions
    details_elements = car.select('div.listing__details div')
    for elem in details_elements:
        text = elem.get_text(strip=True).lower()
        # Check if "color" or "warna" (Malay for color) is mentioned
        if "color" in text or "warna" in text:
            # Try to get the color from the next sibling
            color_text = elem.find_next_sibling()
            if color_text:
                color = clean_text(color_text.get_text())
            else:
                # Extract color directly from the text if no sibling
                color_match = re.search(r'(?:color|warna)\s*[:|-]?\s*(\w+)', text, re.IGNORECASE)
                if color_match:
                    color = color_match.group(1)
            # Validate color
            if color:
                color_lower = color.lower()
                if color_lower in [c.lower() for c in color_options]:
                    color = next(c for c in color_options if c.lower() == color_lower)
                elif color_lower in color_map:
                    color = color_map[color_lower]
                if color in color_options:
                    print(f"   🔍 Found color in details (labeled): {color}")
                    return color
        # Check for any color in the details text (even without "Color" label)
        color = find_color_in_text(text)
        if color:
            print(f"   🔍 Found color in details (unlabeled): {color}")
            return color
    
    # Step 4: If no color found, assign random color
    color = random.choice(color_options)
    print(f"   🔍 No color found, randomly assigned: {color}")
    return color

# Function to format car title
def format_car_title(title, year):
    if not title or not year:
        return ""
    if title.startswith(year):
        return title
    title_without_year = re.sub(r'(19|20)\d{2}(/(19|20)\d{2})?\s+', '', title)
    return f"{year} {title_without_year}"

# Function to validate car title
def is_valid_car_title(title):
    generic_patterns = [
        r"Used Cars for sale in Malaysia", r"Financial calculator", r"^Used$",
        r"^Featured$", r"^Compare", r"^Save$", r"^Registration Card$",
        r"^Contact$", r"^WhatsApp$", r"CARSOME Certified", r"^CompareSave$"
    ]
    for pattern in generic_patterns:
        if re.search(pattern, title, re.IGNORECASE):
            return False
    if not re.match(r'^(19|20)\d{2}(/(19|20)\d{2})?\s', title):
        return False
    if len(title.split()) < 3:
        return False
    car_brands = [
        'Toyota', 'Honda', 'Proton', 'Perodua', 'BMW', 'Mercedes', 'Audi', 'Volkswagen',
        'Mazda', 'Mitsubishi', 'Nissan', 'Subaru', 'Ford', 'Hyundai', 'Kia', 'Suzuki',
        'Peugeot', 'Volvo', 'Lexus', 'Porsche', 'Ferrari', 'Lamborghini', 'Maserati',
        'Jaguar', 'Land Rover', 'Mini', 'Chevrolet', 'Jeep', 'Isuzu', 'Daihatsu'
    ]
    return any(brand.lower() in title.lower() for brand in car_brands)

# Placeholder for other functions (unchanged)
def extract_brand(car_title):
    car_brands = [
        'Toyota', 'Honda', 'Proton', 'Perodua', 'BMW', 'Mercedes', 'Audi', 'Volkswagen',
        'Mazda', 'Mitsubishi', 'Nissan', 'Subaru', 'Ford', 'Hyundai', 'Kia', 'Suzuki',
        'Peugeot', 'Volvo', 'Lexus', 'Porsche', 'Ferrari', 'Lamborghini', 'Maserati',
        'Jaguar', 'Land Rover', 'Mini', 'Chevrolet', 'Jeep', 'Isuzu', 'Daihatsu'
    ]
    for brand in car_brands:
        if brand.lower() in car_title.lower():
            return brand
    return ""

def extract_model(car_title, brand):
    if not brand:
        return ""
    brand_pattern = re.escape(brand)
    model_part = re.sub(rf'^{brand_pattern}\s+', '', car_title, flags=re.IGNORECASE)
    model_part = re.sub(r'^(19|20)\d{2}(/(19|20)\d{2})?\s+', '', model_part)
    return clean_text(model_part)

def determine_condition(car_title, description):
    text = (car_title + " " + description).lower()
    if "new" in text or "unregistered" in text:
        return "New"
    return "Used"

def determine_seating_capacity(body_type):
    seating_map = {
        "Sedan": 5,
        "Hatchback": 5,
        "SUV": 7,
        "MPV": 7,
        "Coupe": 4,
        "Convertible": 4,
        "Wagon": 5,
        "Truck": 2,
        "Pickup": 2,
        "Tipper": 2,
        "Lorry": 2
    }
    return seating_map.get(body_type, 5)

def extract_fuel_type(description):
    description_lower = description.lower()
    if "petrol" in description_lower or "gasoline" in description_lower:
        return "Petrol"
    if "diesel" in description_lower:
        return "Diesel"
    if "hybrid" in description_lower:
        return "Hybrid"
    if "electric" in description_lower or "ev" in description_lower:
        return "Electric"
    return None

def determine_sales_channel(description, seller_type):
    description_lower = description.lower()
    seller_type_lower = seller_type.lower()
    if "private" in seller_type_lower:
        return "Private"
    if "dealer" in seller_type_lower or "sales agent" in seller_type_lower:
        return "Dealership"
    if "online" in description_lower or "e-commerce" in description_lower:
        return "Online"
    return None

# Main scraping loop
all_cars = []
start_page = 3457
end_page = 5182
base_url = "https://www.carlist.my/used-cars-for-sale/malaysia"
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")

for page in range(start_page, end_page + 1):
    if len(all_cars) >= 100:
        print(f"📈 Reached {len(all_cars)} cars. Stopping.")
        break

    url = f"{base_url}?page_number={page}&page_size=25"
    print(f"\n🔎 Extracting page {page} - {url}")

    headers = {
        "User-Agent": get_random_user_agent(),
        "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8",
        "Accept-Language": "en-US,en;q=0.5",
        "Connection": "keep-alive",
        "Upgrade-Insecure-Requests": "1",
        "Cache-Control": "max-age=0"
    }

    try:
        response = requests.get(url, headers=headers, timeout=10)
        response.raise_for_status()
        if response.status_code == 200:
            soup = BeautifulSoup(response.text, "html.parser")
            car_listings = soup.find_all('article', class_=lambda c: c and 'listing' in c.lower())

            if not car_listings:
                car_headings = soup.select('h2 a, h3 a, h4 a')
                for heading in car_headings:
                    container = heading.find_parent('article', class_=lambda c: c and 'listing' in c.lower())
                    if container and container not in car_listings:
                        car_listings.append(container)

            print(f"✅ Found {len(car_listings)} potential car listings on page {page}")

            valid_cars = 0
            skipped_cars = 0
            for idx, car in enumerate(car_listings):
                if len(all_cars) >= 100:
                    break

                try:
                    # Extract car title and URL
                    car_title = ""
                    car_url = ""
                    title_link = car.select_one('h2 a, h3 a, h4 a')
                    if title_link:
                        car_title = clean_text(title_link.get_text())
                        if 'href' in title_link.attrs:
                            car_url = title_link['href']
                            if not car_url.startswith('http'):
                                car_url = "https://www.carlist.my" + car_url
                    else:
                        links = car.select('a')
                        for link in links:
                            link_text = clean_text(link.get_text())
                            if len(link_text) > 15 and is_valid_car_title(link_text):
                                car_title = link_text
                                if 'href' in link.attrs:
                                    car_url = link['href']
                                    if not car_url.startswith('http'):
                                        car_url = "https://www.carlist.my" + car_url
                                break

                    if not car_title or not car_url:
                        print(f"   ⚠️ Skipping car {idx+1}: No valid title or URL")
                        skipped_cars += 1
                        continue

                    # Extract year
                    year = ""
                    year_pattern = re.compile(r'((19|20)\d{2}(/(19|20)\d{2})?)')
                    year_elements = car.find_all(string=lambda s: bool(s and year_pattern.search(str(s))))
                    if year_elements:
                        for elem in year_elements:
                            year_match = year_pattern.search(elem)
                            if year_match:
                                year = year_match.group(1)
                                break

                    if not year:
                        skipped_cars += 1
                        print(f"   ⚠️ Skipping car {idx+1}: No year found")
                        continue

                    car_title = format_car_title(car_title, year)

                    if not is_valid_car_title(car_title):
                        skipped_cars += 1
                        print(f"   ⚠️ Skipping generic or invalid title: {car_title}")
                        continue

                    # Extract price and monthly payment
                    price = ""
                    monthly_payment = ""
                    price_elements = car.find_all(string=lambda s: bool(s and re.search(r'RM\s*[\d,]+(?!\s*/\s*month)', str(s))))
                    if price_elements:
                        prices = []
                        for price_text in price_elements:
                            match = re.search(r'RM\s*([\d,]+)', price_text)
                            if match:
                                try:
                                    price_val = int(match.group(1).replace(',', ''))
                                    prices.append((price_val, price_text))
                                except ValueError:
                                    continue
                        if prices:
                            prices.sort(reverse=True)
                            price = clean_text(prices[0][1])

                    monthly_elements = car.find_all(string=lambda s: bool(s and re.search(r'RM\s*[\d,]+\s*/\s*month', str(s))))
                    if monthly_elements:
                        monthly_payment = clean_text(monthly_elements[0])

                    # Extract details section
                    details_section = car.select_one('div.listing__details')
                    details_text = details_section.get_text(" ", strip=True) if details_section else ""
                    full_description = f"{car_title} {price} {monthly_payment} {details_text}"

                    # Extract brand and model
                    brand = extract_brand(car_title)
                    model = extract_model(car_title, brand)

                    # Extract body type
                    body_type = ""
                    body_patterns = [
                        "Sedan", "Hatchback", "SUV", "Truck", "Pickup", "MPV", 
                        "Coupe", "Convertible", "Wagon", "Tipper", "Lorry"
                    ]
                    for pattern in body_patterns:
                        if pattern.lower() in car_title.lower():
                            body_type = pattern
                            break
                    if not body_type:
                        for pattern in body_patterns:
                            if pattern.lower() in full_description.lower():
                                body_type = pattern
                                break
                    print(f"   🔍 Found body type: {body_type}")

                    # Extract fuel type
                    fuel_type = extract_fuel_type(full_description)
                    if fuel_type is None:
                        fuel_type = "Diesel" if body_type in ["Truck", "Tipper", "Lorry"] else "Petrol"

                    # Extract mileage
                    mileage = ""
                    mileage_elements = car.select('div.listing__details div')
                    for elem in mileage_elements:
                        text = elem.get_text(strip=True)
                        if re.search(r'(\d{1,3}(,\d{3})*\s*KM|\d+\s*-\s*\d+K\s*KM)', text):
                            mileage = clean_text(text)
                            print(f"   🔍 Found mileage: {mileage}")
                            break

                    if not mileage:
                        mileage_pattern = re.compile(r'(\d{1,3}(,\d{3})*\s*KM|\d+\s*-\s*\d+K\s*KM)')
                        mileage_elements = car.find_all(string=lambda s: bool(s and mileage_pattern.search(str(s))))
                        if mileage_elements:
                            mileage = clean_text(mileage_elements[0])
                            print(f"   🔍 Found mileage via regex: {mileage}")

                    # Extract transmission
                    transmission = ""
                    transmission_elements = car.select('div.listing__details div')
                    for elem in transmission_elements:
                        text = elem.get_text(strip=True).lower()
                        if "automatic" in text or "manual" in text or "auto" in text or "cvt" in text:
                            transmission = clean_text(elem.get_text())
                            print(f"   🔍 Found transmission: {transmission}")
                            break

                    if not transmission:
                        trans_patterns = ['Automatic', 'Manual', 'Auto', 'CVT']
                        for pattern in trans_patterns:
                            trans_pattern = re.compile(rf'\b{pattern}\b', re.IGNORECASE)
                            trans_elements = car.find_all(string=lambda s: bool(s and trans_pattern.search(str(s))))
                            if trans_elements:
                                transmission = pattern
                                print(f"   🔍 Found transmission via regex: {transmission}")
                                break

                    # Extract seller type
                    seller_type = ""
                    seller_elements = car.select('div.listing__details div')
                    for elem in seller_elements:
                        text = elem.get_text(strip=True).lower()
                        if "dealer" in text or "sales agent" in text or "private" in text:
                            seller_type = clean_text(elem.get_text())
                            print(f"   🔍 Found seller type: {seller_type}")
                            break

                    if not seller_type:
                        seller_patterns = ['Dealer', 'Private', 'Sales Agent']
                        for pattern in seller_patterns:
                            seller_pattern = re.compile(rf'\b{pattern}\b', re.IGNORECASE)
                            seller_elements = car.find_all(string=lambda s: bool(s and seller_pattern.search(str(s))))
                            if seller_elements:
                                seller_type = pattern
                                print(f"   🔍 Found seller type via regex: {seller_type}")
                                break

                    # Extract location
                    location = ""
                    location_elements = car.select('div.listing__details div')
                    for elem in location_elements:
                        text = elem.get_text(strip=True)
                        if ',' in text and any(loc in text for loc in ['Johor', 'Kedah', 'Selangor', 'Penang', 'Kuala Lumpur']):
                            location = clean_text(text)
                            print(f"   🔍 Found location: {location}")
                            break

                    if not location:
                        malaysian_locations = [
                            'Johor', 'Kedah', 'Kelantan', 'Kuala Lumpur', 'Labuan', 'Malacca', 'Negeri Sembilan',
                            'Pahang', 'Penang', 'Perak', 'Perlis', 'Putrajaya', 'Sabah', 'Sarawak', 'Selangor',
                            'Terengganu', 'Melaka', 'Seremban', 'Ipoh', 'Petaling Jaya', 'Shah Alam', 'Johor Bahru'
                        ]
                        for loc in malaysian_locations:
                            loc_pattern = re.compile(rf'\b{re.escape(loc)}\b', re.IGNORECASE)
                            loc_elements = car.find_all(string=lambda s: bool(s and loc_pattern.search(str(s))))
                            if loc_elements:
                                for elem in loc_elements:
                                    fuller_location = elem.strip()
                                    if ',' in fuller_location:
                                        location = clean_text(fuller_location)
                                    else:
                                        location = loc
                                    print(f"   🔍 Found location via regex: {location}")
                                    break
                                break

                    # Extract color
                    color = extract_color(car, car_title, full_description)

                    # Determine condition
                    condition = determine_condition(car_title, full_description)

                    # Determine seating capacity
                    seating_capacity = determine_seating_capacity(body_type)

                    # Extract manufacture year
                    manufacture_year = year

                    # Determine sales channel
                    sales_channel = determine_sales_channel(full_description, seller_type)
                    if sales_channel is None:
                        sales_channel = "Dealership"

                    # Create car data dictionary
                    car_data = {
                        "Car Name": car_title,
                        "Car Brand": brand,
                        "Car Model": model,
                        "Manufacture Year": manufacture_year,
                        "Body Type": body_type,
                        "Fuel Type": fuel_type,
                        "Mileage": mileage,
                        "Transmission": transmission,
                        "Color": color,
                        "Price": price,
                        "Installment": monthly_payment,
                        "Condition": condition,
                        "Seat Capacity": seating_capacity,
                        "Location": location,
                        "Sales Channel": sales_channel,
                        "URL": car_url
                    }

                    all_cars.append(car_data)
                    valid_cars += 1
                    print(f"   ✓ [{valid_cars}] {car_title} - {price} ({body_type}, {color}) - URL: {car_url}")
                    print( "\n")

                except Exception as e:
                    print(f"   ⚠️ Error processing car listing {idx+1}: {e}")
                    skipped_cars += 1

            print(f"   ✅ Added {valid_cars} valid cars from page {page}")
            print(f"   ⚠️ Skipped {skipped_cars} invalid entries")

        else:
            print(f"❌ Failed to fetch the page. Status code: {response.status_code}")

    except requests.exceptions.RequestException as e:
        print(f"⚠️ Network error fetching page {page}: {e}")
        continue
    except Exception as e:
        print(f"⚠️ General error fetching page {page}: {e}")
        continue

    delay = 2 + random.random() * 3
    print(f"Waiting {delay:.2f} seconds before next page...")
    time.sleep(delay)

# Save to CSV
if all_cars:
    df = pd.DataFrame(all_cars)
    required_columns = [
        "Car Name", "Car Brand", "Car Model", "Manufacture Year",
        "Body Type", "Fuel Type", "Mileage", "Transmission",
        "Color", "Price", "Installment", "Condition",
        "Seat Capacity", "Location", "Sales Channel", "URL"
    ]
    for col in required_columns:
        if col not in df.columns:
            df[col] = ""
    df = df[required_columns]
    filename = f"carlist_used_cars_{timestamp}.csv"
    df.to_csv(filename, index=False)
    print(f"\n✅ All done! Scraped {len(all_cars)} cars. Saved to '{filename}'")
    print(f"   ✓ Added all required attributes: {', '.join(required_columns)}")
else:
    print("\n❌ No cars were scraped.")


🔎 Extracting page 3457 - https://www.carlist.my/used-cars-for-sale/malaysia?page_number=3457&page_size=25
✅ Found 25 potential car listings on page 3457
   🔍 Found body type: Truck
   🔍 Found mileage via regex: 80 - 85K KM
   🔍 Found transmission via regex: Automatic
   🔍 Found seller type via regex: Dealer
   🔍 Found location via regex: Selangor
   🔍 No color found, randomly assigned: Bronze
   ✓ [1] 2016 Toyota Hilux 2.8 G Dual Cab Pickup Truck - RM 82,800 (Truck, Bronze) - URL: https://www.carlist.my/used-cars/2016-toyota-hilux-2-8-g-dual-cab-pickup-truck/15642210


   🔍 Found body type: SUV
   🔍 Found mileage via regex: 90 - 95K KM
   🔍 Found transmission via regex: Automatic
   🔍 Found seller type via regex: Dealer
   🔍 Found location via regex: Selangor
   🔍 No color found, randomly assigned: White
   ✓ [2] 2015 Honda HR-V 1.8 i-VTEC V SUV - RM 52,800 (SUV, White) - URL: https://www.carlist.my/used-cars/2015-honda-hr-v-1-8-i-vtec-v-suv/15642178


   🔍 Found body type: SUV
   🔍 F

In [1]:
import requests
from bs4 import BeautifulSoup
import csv
import json
import time
import random

# Configuration
base_url = "https://www.carlist.my/cars-for-sale/malaysia?page_number={}&page_size=25"
start_page = 3471
end_page = 5205
car_listings = []
output_file = 'car_listings.csv'
max_retries = 3

# Start time
start_time = time.time()

# Main scraping loop
try:
    for page_num in range(start_page, end_page + 1):
        url = base_url.format(page_num)
        print(f"\n🔎 Extracting page {page_num} - {url}")

        # Retry mechanism for network errors
        retries = 0
        while retries < max_retries:
            try:
                headers = {"User-Agent": "Mozilla/5.0"}
                response = requests.get(url, headers=headers, timeout=10)
                response.raise_for_status()
                soup = BeautifulSoup(response.content, 'html.parser')
                break
            except requests.exceptions.RequestException as e:
                retries += 1
                print(f"⚠️ Network error: {e}. Retrying ({retries}/{max_retries})...")
                time.sleep(2)
        else:
            print(f"❌ Failed to fetch page {page_num} after {max_retries} retries. Skipping...")
            continue

        # Find all articles (car listings) and extract JSON-LD data
        articles = soup.find_all('article', class_='listing')
        ld_json = None
        for script in soup.find_all('script', type='application/ld+json'):
            try:
                data = json.loads(script.string)
                if isinstance(data, list):
                    for d in data:
                        if 'itemListElement' in d:
                            ld_json = d['itemListElement']
                            break
            except Exception as e:
                print(f"⚠️ Error parsing JSON-LD: {e}")
                continue
        
        if not ld_json:
            print(f"⚠️ No JSON-LD found on page {page_num}. Skipping...")
            continue

        page_count = 0
        for article, item in zip(articles, ld_json):
            car = item['item']

            # Extract fields directly without utility functions
            name = article.get('data-title', '')
            brand = article.get('data-make', '')
            model = article.get('data-model', '')
            body = article.get('data-body-type', '')
            transmission = article.get('data-transmission', '')
            installment = article.get('data-installment', '')
            mileage = ""
            location = ""
            for icon in article.find_all('i'):
                if 'icon--meter' in icon.get('class', []):
                    mileage = str(icon.next_sibling).strip()
                elif 'icon--location' in icon.get('class', []):
                    location_text = []
                    for sib in icon.next_siblings:
                        text = sib.get_text(strip=True) if hasattr(sib, 'get_text') else str(sib).strip()
                        if text:
                            location_text.append(text)
                    location = ' '.join(location_text)

            # Extract additional fields from the JSON-LD data
            year = car.get('vehicleModelDate', '')
            fuel = car.get('fuelType', '')
            color = car.get('color', '')
            price = car.get('offers', {}).get('price', '')
            condition = car.get('itemCondition', '').lower()
            condition = "New" if "new" in condition else "Used"
            seats = car.get('seatingCapacity', '')
            sales_channel = ""
            dealer_div = article.find('div', class_='listing__spec--dealer')
            if dealer_div:
                sales_channel = dealer_div.get_text(strip=True)

            # Append data to the car_listings list
            car_listings.append({
                'Car Name': name,
                'Car Brand': brand,
                'Car Model': model,
                'Manufacture Year': year,
                'Body Type': body,
                'Fuel Type': fuel,
                'Mileage': mileage,
                'Transmission': transmission,
                'Color': color,
                'Price': price,
                'Installment': installment,
                'Condition': condition,
                'Seat Capacity': seats,
                'Location': location,
                'Sales Channel': sales_channel,
                'URL': car.get('url', '')
            })
            page_count += 1

        print(f"✅ Found {page_count} cars on page {page_num}")
        print(f"📄 Total scraped: {len(car_listings)}")

        # Randomized delay to prevent IP blocking
        delay = random.uniform(2, 5)
        print(f"⏳ Waiting {delay:.2f} seconds before next page...")
        time.sleep(delay)

except KeyboardInterrupt:
    print("\n🚨 Script interrupted by the user. Saving progress...")

# Save the extracted data to a CSV file
if car_listings:
    with open(output_file, 'w', newline='', encoding='utf-8') as f:
        writer = csv.DictWriter(f, fieldnames=car_listings[0].keys())
        writer.writeheader()
        writer.writerows(car_listings)
    print(f"\n✅ Saved {len(car_listings)} cars to '{output_file}'")
else:
    print("\n⚠️ No car listings found.")

# End time and elapsed time calculation
end_time = time.time()
execution_time = end_time - start_time
print(f"\n🕒 Total execution time: {execution_time:.2f} seconds")


🔎 Extracting page 3471 - https://www.carlist.my/cars-for-sale/malaysia?page_number=3471&page_size=25
✅ Found 25 cars on page 3471
📄 Total scraped: 25
⏳ Waiting 2.69 seconds before next page...

🔎 Extracting page 3472 - https://www.carlist.my/cars-for-sale/malaysia?page_number=3472&page_size=25
✅ Found 25 cars on page 3472
📄 Total scraped: 50
⏳ Waiting 2.83 seconds before next page...

🔎 Extracting page 3473 - https://www.carlist.my/cars-for-sale/malaysia?page_number=3473&page_size=25
✅ Found 25 cars on page 3473
📄 Total scraped: 75
⏳ Waiting 4.43 seconds before next page...

🔎 Extracting page 3474 - https://www.carlist.my/cars-for-sale/malaysia?page_number=3474&page_size=25
✅ Found 25 cars on page 3474
📄 Total scraped: 100
⏳ Waiting 2.02 seconds before next page...

🔎 Extracting page 3475 - https://www.carlist.my/cars-for-sale/malaysia?page_number=3475&page_size=25
✅ Found 25 cars on page 3475
📄 Total scraped: 125
⏳ Waiting 4.92 seconds before next page...

🔎 Extracting page 3476 - ht