In [1]:
import requests
from bs4 import BeautifulSoup
import csv
import re

In [10]:
base_url = 'https://www.exchangeandmart.co.uk/ad/'
start_ad_id = 30880000
end_ad_id = 30900000

In [11]:
with open('car_listings_6.csv', 'a', newline='', encoding='utf-8') as csv_file:
    fieldnames = ['Make', 'Model', 'Price', 'Year', 'Engine Size','Mileage', 'Fuel Type', 'Transmission', 'Color', 'Body Type', 'Wheel Drive', 'Doors', 'Seats', 'Engine Power', 'Top Speed', 'Acceleration', 'CO2 Rating']
    writer = csv.DictWriter(csv_file, fieldnames=fieldnames)
    if csv_file.tell() == 0:
        writer.writeheader()

    for ad_id in range(start_ad_id, end_ad_id + 1):
        url = f'{base_url}{ad_id}'
        response = requests.get(url)

        try:
            response.raise_for_status()

            html = response.text
            soup = BeautifulSoup(html, 'html.parser')

            car_listing = soup.find('div', class_='car-name clearfix')

            # Initialize variables with None
            make = model = engine_size = price = year = fuel_type = mileage = transmission = color = body_type = mpg = engine_power = top_speed = acceleration = co2_rating = seats = doors = wheel_drive = None

            if car_listing:
                # Extract make and model
                make_model = car_listing.find('span', class_='ttl')
                if make_model:
                    make, model = make_model.get_text(strip=True).split(maxsplit=1)

                # Extract price
                price_span = car_listing.find('span', itemprop='price')
                price = price_span.get_text(strip=True) if price_span else None

                # Extract details from adDetails
                ad_details = soup.find('div', id='adDetails')
                if ad_details:
                    def get_text_safe(element):
                        return element.text if element else None
                                
                    year = get_text_safe(ad_details.find('div', class_='adDetsItem').find('span'))
                    engine_size = get_text_safe(ad_details.find_all('div', class_='adDetsItem')[1].find('span'))
                    mileage = get_text_safe(ad_details.find_all('div', class_='adDetsItem')[2].find('span'))
                    fuel_type = get_text_safe(ad_details.find_all('div', class_='adDetsItem')[3].find('span'))
                    transmission = get_text_safe(ad_details.find_all('div', class_='adDetsItem')[4].find('span'))
                    color = get_text_safe(ad_details.find_all('div', class_='adDetsItem')[5].find('span'))
                    body_type = get_text_safe(ad_details.find_all('div', class_='adDetsItem')[6].find('span'))
                    
                
                    # Extract details from adSpec
                    ad_spec = soup.find('div', {'id': 'adSpec', 'expand': 'adSpec'})
                    if ad_spec:
                        wheel_drive_elem = ad_spec.find('span', string='Wheel drive:')
                        wheel_drive = wheel_drive_elem.find_next('span').get_text(strip=True) if wheel_drive_elem and wheel_drive_elem.find_next('span') else None

                        doors_elem = ad_spec.find('span', string='Doors:')
                        doors = doors_elem.find_next('span').get_text(strip=True) if doors_elem and doors_elem.find_next('span') else None

                        seats_elem = ad_spec.find('span', string='Seats:')
                        seats = seats_elem.find_next('span').get_text(strip=True) if seats_elem and seats_elem.find_next('span') else None

                        engine_power_elem = ad_spec.find('span', string='Engine power:')
                        engine_power = engine_power_elem.find_next('span').get_text(strip=True) if engine_power_elem and engine_power_elem.find_next('span') else None

                        top_speed_elem = ad_spec.find('span', string='Top speed:')
                        top_speed = top_speed_elem.find_next('span').get_text(strip=True) if top_speed_elem and top_speed_elem.find_next('span') else None

                        acceleration_elem = ad_spec.find('span', string='Acceleration (0-62 mph):')
                        acceleration = acceleration_elem.find_next('span').get_text(strip=True) if acceleration_elem and acceleration_elem.find_next('span') else None

                        co2_rating_elem = ad_spec.find('span', string='CO2 rating:')
                        co2_rating = co2_rating_elem.find_next('span').get_text(strip=True) if co2_rating_elem and co2_rating_elem.find_next('span') else None
                        
                        
                        # Write to CSV
                        writer.writerow({
                            'Make': make,
                            'Model': model,
                            'Price': price,
                            'Year': year,
                            'Engine Size': engine_size,
                            'Mileage': mileage,
                            'Fuel Type': fuel_type,
                            'Transmission': transmission,
                            'Color': color,
                            'Body Type': body_type,
                            'Wheel Drive': wheel_drive,
                            'Doors': doors,
                            'Seats': seats,
                            'Engine Power': engine_power,
                            'Top Speed': top_speed,
                            'Acceleration': acceleration,
                            'CO2 Rating': co2_rating,
                        })

                        print(f"Data for ad ID {ad_id} has been written to CSV.")

                    else:
                        writer.writerow({
                            'Make': make,
                            'Model': model,
                            'Price': price,
                            'Year': year,
                            'Engine Size': engine_size,
                            'Mileage': mileage,
                            'Fuel Type': fuel_type,
                            'Transmission': transmission,
                            'Color': color,
                            'Body Type': body_type,
                            'Wheel Drive': wheel_drive,
                            'Doors': doors,
                            'Seats': seats,
                            'Engine Power': engine_power,
                            'Top Speed': top_speed,
                            'Acceleration': acceleration,
                            'CO2 Rating': co2_rating,
                        })
                        wheel_drive = doors = seats = engine_power = top_speed = acceleration = co2_rating = None
        except requests.HTTPError as e:
            if e.response.status_code == 404:
                print(f"Error 404: Page not found for ad ID {ad_id}")
            else:
                print(f"Error {e.response.status_code} for ad ID {ad_id}")

        except Exception as e:
            print(f"An unexpected error occurred for ad ID {ad_id}: {e}")


KeyboardInterrupt: 