Daihatsu Car CSV

In [None]:
import requests
from bs4 import BeautifulSoup
import csv
from urllib.parse import urljoin

def scrape_car_info(url):
    car_info = {
        "Name": None,
        "Price": None,
        "Year": None,
        "Mileage": None,
        "Fuel Type": None,
        "Transmission": None,
        "Registered In": None,
        "Color": None,
        "Assembly": None,
        "Engine Capacity": None,
        "Body Type": None,
        "Features": [],
        "Pictures": []
    }

    try:
        response = requests.get(url)
        if response.status_code == 200:
            soup = BeautifulSoup(response.content, 'html.parser')

            well_parent_h1 = soup.select_one('.well h1')
            if well_parent_h1:
                car_info["Name"] = well_parent_h1.text.strip()

            table_element = soup.find(class_='table table-bordered text-center table-engine-detail fs16')
            if table_element:
                td_tags = table_element.find_all('td')
                if len(td_tags) >= 4:
                    car_info["Year"] = td_tags[0].text.strip()
                    car_info["Mileage"] = td_tags[1].text.strip()
                    car_info["Fuel Type"] = td_tags[2].text.strip()
                    car_info["Transmission"] = td_tags[3].text.strip()

            price_box_element = soup.find(class_='price-box')
            if price_box_element:
                car_info["Price"] = price_box_element.text.strip()

            featured_list = soup.find(class_='list-unstyled ul-featured clearfix')
            car_feature_list = soup.find(class_='list-unstyled car-feature-list nomargin')

            if featured_list:
                featured_li_elements = featured_list.find_all('li')
                car_info["Registered In"] = featured_li_elements[1].text.strip() if len(featured_li_elements) > 1 else None
                car_info["Color"] = featured_li_elements[3].text.strip() if len(featured_li_elements) > 3 else None
                car_info["Assembly"] = featured_li_elements[5].text.strip() if len(featured_li_elements) > 5 else None
                car_info["Engine Capacity"] = featured_li_elements[7].text.strip() if len(featured_li_elements) > 7 else None
                car_info["Body Type"] = featured_li_elements[9].text.strip() if len(featured_li_elements) > 9 else None
                car_info["Features"].extend([li_element.text.strip() for i, li_element in enumerate(featured_li_elements) if i not in [1, 3, 5, 7, 9]])

            if car_feature_list:
                feature_li_elements = car_feature_list.find_all('li')
                car_info["Features"].extend([li_element.text.strip() for li_element in feature_li_elements])

            # Extract image URLs using a pattern in the src attribute
            image_links = []

            container = soup.find('ul', class_='gallery light-gallery list-unstyled cS-hidden')

            if container:
                images = container.find_all('li')

                for image in images:
                    if image.has_attr('data-src'):
                        image_links.append(image['data-src'])

            car_info["Pictures"] = image_links

    except Exception as e:
        print(f"An error occurred while scraping car info: {e}")

    return car_info

def get_daihatsu_car_links(base_url, num_pages):
    all_links = []

    try:
        for page in range(1, num_pages + 1):
            url = f'{base_url}?page={page}'

            response = requests.get(url)

            if response.status_code == 200:
                soup = BeautifulSoup(response.text, 'html.parser')

                links = [a['href'] for a in soup.find_all('a', href=True) if 'daihatsu' in a.get('href', '').lower() and 'for-sale-in' in a.get('href', '').lower()]

                # Ensure that the obtained URLs are complete using urljoin
                base_url_with_scheme = base_url if base_url.startswith(('http://', 'https://')) else 'https://' + base_url
                links = [urljoin(base_url_with_scheme, link) for link in links]

                all_links.extend(links)
            else:
                print(f"Error: Unable to fetch the webpage. Status code: {response.status_code}")
    except Exception as e:
        print(f"An error occurred: {e}")

    return all_links


# Example usage for the first 3 pages
base_url = 'https://www.pakwheels.com/used-cars/daihatsu/12'
num_pages = 101

daihatsu_car_links = get_daihatsu_car_links(base_url, num_pages)

all_car_info = []

for link in daihatsu_car_links:
    car_info = scrape_car_info(link)
    all_car_info.append(car_info)

# Write the information to a CSV file
csv_file_path = 'AllDaihatsuCarInfo.csv'
with open(csv_file_path, 'w', newline='', encoding='utf-8') as csv_file:
    csv_writer = csv.writer(csv_file)

    # Write header row
    header = ["Name", "Price", "Year", "Mileage", "Fuel Type", "Transmission", "Registered In", "Color", "Assembly", "Engine Capacity", "Body Type", "Features", "Pictures"]
    csv_writer.writerow(header)

    # Write data rows
    for car_info in all_car_info:
        row = [
            car_info["Name"],
            car_info["Price"],
            car_info["Year"],
            car_info["Mileage"],
            car_info["Fuel Type"],
            car_info["Transmission"],
            car_info["Registered In"],
            car_info["Color"],
            car_info["Assembly"],
            car_info["Engine Capacity"],
            car_info["Body Type"],
            ', '.join(car_info["Features"]),  # Assuming features are a list
            ', '.join(car_info["Pictures"])   # Assuming pictures are a list
        ]
        csv_writer.writerow(row)

print(f'CSV File Created: {csv_file_path}')

CSV File Created: AllDaihatsuCarInfo.csv


Nissan Cars CSV

In [None]:
import requests
from bs4 import BeautifulSoup
import csv
from urllib.parse import urljoin

def scrape_car_info(url):
    car_info = {
        "Name": None,
        "Price": None,
        "Year": None,
        "Mileage": None,
        "Fuel Type": None,
        "Transmission": None,
        "Registered In": None,
        "Color": None,
        "Assembly": None,
        "Engine Capacity": None,
        "Body Type": None,
        "Features": [],
        "Pictures": []
    }

    try:
        response = requests.get(url)
        if response.status_code == 200:
            soup = BeautifulSoup(response.content, 'html.parser')

            well_parent_h1 = soup.select_one('.well h1')
            if well_parent_h1:
                car_info["Name"] = well_parent_h1.text.strip()

            table_element = soup.find(class_='table table-bordered text-center table-engine-detail fs16')
            if table_element:
                td_tags = table_element.find_all('td')
                if len(td_tags) >= 4:
                    car_info["Year"] = td_tags[0].text.strip()
                    car_info["Mileage"] = td_tags[1].text.strip()
                    car_info["Fuel Type"] = td_tags[2].text.strip()
                    car_info["Transmission"] = td_tags[3].text.strip()

            price_box_element = soup.find(class_='price-box')
            if price_box_element:
                car_info["Price"] = price_box_element.text.strip()

            featured_list = soup.find(class_='list-unstyled ul-featured clearfix')
            car_feature_list = soup.find(class_='list-unstyled car-feature-list nomargin')

            if featured_list:
                featured_li_elements = featured_list.find_all('li')
                car_info["Registered In"] = featured_li_elements[1].text.strip() if len(featured_li_elements) > 1 else None
                car_info["Color"] = featured_li_elements[3].text.strip() if len(featured_li_elements) > 3 else None
                car_info["Assembly"] = featured_li_elements[5].text.strip() if len(featured_li_elements) > 5 else None
                car_info["Engine Capacity"] = featured_li_elements[7].text.strip() if len(featured_li_elements) > 7 else None
                car_info["Body Type"] = featured_li_elements[9].text.strip() if len(featured_li_elements) > 9 else None
                car_info["Features"].extend([li_element.text.strip() for i, li_element in enumerate(featured_li_elements) if i not in [1, 3, 5, 7, 9]])

            if car_feature_list:
                feature_li_elements = car_feature_list.find_all('li')
                car_info["Features"].extend([li_element.text.strip() for li_element in feature_li_elements])

            # Extract image URLs using a pattern in the src attribute
            image_links = []

            container = soup.find('ul', class_='gallery light-gallery list-unstyled cS-hidden')

            if container:
                images = container.find_all('li')

                for image in images:
                    if image.has_attr('data-src'):
                        image_links.append(image['data-src'])

            car_info["Pictures"] = image_links

    except Exception as e:
        print(f"An error occurred while scraping car info: {e}")

    return car_info

def get_Nissan_links(base_url, num_pages):
    all_links = []

    try:
        for page in range(1, num_pages + 1):
            url = f'{base_url}?page={page}'

            response = requests.get(url)

            if response.status_code == 200:
                soup = BeautifulSoup(response.text, 'html.parser')

                links = [a['href'] for a in soup.find_all('a', href=True) if 'nissan' in a.get('href', '').lower() and 'for-sale-in' in a.get('href', '').lower()]

                # Ensure that the obtained URLs are complete using urljoin
                base_url_with_scheme = base_url if base_url.startswith(('http://', 'https://')) else 'https://' + base_url
                links = [urljoin(base_url_with_scheme, link) for link in links]

                all_links.extend(links)
            else:
                print(f"Error: Unable to fetch the webpage. Status code: {response.status_code}")
    except Exception as e:
        print(f"An error occurred: {e}")

    return all_links


# Example usage for the first 3 pages
base_url = 'https://www.pakwheels.com/used-cars/nissan/26'
num_pages = 57

nissan_car_links = get_nissan_car_links(base_url, num_pages)

all_car_info = []

for link in nissan_car_links:
    car_info = scrape_car_info(link)
    all_car_info.append(car_info)

# Write the information to a CSV file
csv_file_path = 'AllNissanCarInfo.csv'
with open(csv_file_path, 'w', newline='', encoding='utf-8') as csv_file:
    csv_writer = csv.writer(csv_file)

    # Write header row
    header = ["Name", "Price", "Year", "Mileage", "Fuel Type", "Transmission", "Registered In", "Color", "Assembly", "Engine Capacity", "Body Type", "Features", "Pictures"]
    csv_writer.writerow(header)

    # Write data rows
    for car_info in all_car_info:
        row = [
            car_info["Name"],
            car_info["Price"],
            car_info["Year"],
            car_info["Mileage"],
            car_info["Fuel Type"],
            car_info["Transmission"],
            car_info["Registered In"],
            car_info["Color"],
            car_info["Assembly"],
            car_info["Engine Capacity"],
            car_info["Body Type"],
            ', '.join(car_info["Features"]),  # Assuming features are a list
            ', '.join(car_info["Pictures"])   # Assuming pictures are a list
        ]
        csv_writer.writerow(row)

print(f'CSV File Created: {csv_file_path}')

CSV File Created: AllNissanCarInfo.csv


Audi Cars CSV


In [None]:
import requests
from bs4 import BeautifulSoup
import csv
from urllib.parse import urljoin

def scrape_car_info(url):
    car_info = {
        "Name": None,
        "Price": None,
        "Year": None,
        "Mileage": None,
        "Fuel Type": None,
        "Transmission": None,
        "Registered In": None,
        "Color": None,
        "Assembly": None,
        "Engine Capacity": None,
        "Body Type": None,
        "Features": [],
        "Pictures": []
    }

    try:
        response = requests.get(url)
        if response.status_code == 200:
            soup = BeautifulSoup(response.content, 'html.parser')

            well_parent_h1 = soup.select_one('.well h1')
            if well_parent_h1:
                car_info["Name"] = well_parent_h1.text.strip()

            table_element = soup.find(class_='table table-bordered text-center table-engine-detail fs16')
            if table_element:
                td_tags = table_element.find_all('td')
                if len(td_tags) >= 4:
                    car_info["Year"] = td_tags[0].text.strip()
                    car_info["Mileage"] = td_tags[1].text.strip()
                    car_info["Fuel Type"] = td_tags[2].text.strip()
                    car_info["Transmission"] = td_tags[3].text.strip()

            price_box_element = soup.find(class_='price-box')
            if price_box_element:
                car_info["Price"] = price_box_element.text.strip()

            featured_list = soup.find(class_='list-unstyled ul-featured clearfix')
            car_feature_list = soup.find(class_='list-unstyled car-feature-list nomargin')

            if featured_list:
                featured_li_elements = featured_list.find_all('li')
                car_info["Registered In"] = featured_li_elements[1].text.strip() if len(featured_li_elements) > 1 else None
                car_info["Color"] = featured_li_elements[3].text.strip() if len(featured_li_elements) > 3 else None
                car_info["Assembly"] = featured_li_elements[5].text.strip() if len(featured_li_elements) > 5 else None
                car_info["Engine Capacity"] = featured_li_elements[7].text.strip() if len(featured_li_elements) > 7 else None
                car_info["Body Type"] = featured_li_elements[9].text.strip() if len(featured_li_elements) > 9 else None
                car_info["Features"].extend([li_element.text.strip() for i, li_element in enumerate(featured_li_elements) if i not in [1, 3, 5, 7, 9]])

            if car_feature_list:
                feature_li_elements = car_feature_list.find_all('li')
                car_info["Features"].extend([li_element.text.strip() for li_element in feature_li_elements])

            # Extract image URLs using a pattern in the src attribute
            image_links = []

            container = soup.find('ul', class_='gallery light-gallery list-unstyled cS-hidden')

            if container:
                images = container.find_all('li')

                for image in images:
                    if image.has_attr('data-src'):
                        image_links.append(image['data-src'])

            car_info["Pictures"] = image_links

    except Exception as e:
        print(f"An error occurred while scraping car info: {e}")

    return car_info

def get_audi_links(base_url, num_pages):
    all_links = []

    try:
        for page in range(1, num_pages + 1):
            url = f'{base_url}?page={page}'

            response = requests.get(url)

            if response.status_code == 200:
                soup = BeautifulSoup(response.text, 'html.parser')

                links = [a['href'] for a in soup.find_all('a', href=True) if 'audi' in a.get('href', '').lower() and 'for-sale-in' in a.get('href', '').lower()]

                # Ensure that the obtained URLs are complete using urljoin
                base_url_with_scheme = base_url if base_url.startswith(('http://', 'https://')) else 'https://' + base_url
                links = [urljoin(base_url_with_scheme, link) for link in links]

                all_links.extend(links)
            else:
                print(f"Error: Unable to fetch the webpage. Status code: {response.status_code}")
    except Exception as e:
        print(f"An error occurred: {e}")

    return all_links


# Example usage for the first 3 pages
base_url = 'https://www.pakwheels.com/used-cars/search/-/mk_audi/?page=10'
num_pages = 10

audi_car_links = get_audi_car_links(base_url, num_pages)

all_car_info = []

for link in audi_car_links:
    car_info = scrape_car_info(link)
    all_car_info.append(car_info)

# Write the information to a CSV file
csv_file_path = 'AllAudiCarInfo.csv'
with open(csv_file_path, 'w', newline='', encoding='utf-8') as csv_file:
    csv_writer = csv.writer(csv_file)

    # Write header row
    header = ["Name", "Price", "Year", "Mileage", "Fuel Type", "Transmission", "Registered In", "Color", "Assembly", "Engine Capacity", "Body Type", "Features", "Pictures"]
    csv_writer.writerow(header)

    # Write data rows
    for car_info in all_car_info:
        row = [
            car_info["Name"],
            car_info["Price"],
            car_info["Year"],
            car_info["Mileage"],
            car_info["Fuel Type"],
            car_info["Transmission"],
            car_info["Registered In"],
            car_info["Color"],
            car_info["Assembly"],
            car_info["Engine Capacity"],
            car_info["Body Type"],
            ', '.join(car_info["Features"]),  # Assuming features are a list
            ', '.join(car_info["Pictures"])   # Assuming pictures are a list
        ]
        csv_writer.writerow(row)

print(f'CSV File Created: {csv_file_path}')

CSV File Created: AllAudiCarInfo.csv


BMW CAR CSV

In [None]:
import requests
from bs4 import BeautifulSoup
import csv
from urllib.parse import urljoin

def scrape_car_info(url):
    car_info = {
        "Name": None,
        "Price": None,
        "Year": None,
        "Mileage": None,
        "Fuel Type": None,
        "Transmission": None,
        "Registered In": None,
        "Color": None,
        "Assembly": None,
        "Engine Capacity": None,
        "Body Type": None,
        "Features": [],
        "Pictures": []
    }

    try:
        response = requests.get(url)
        if response.status_code == 200:
            soup = BeautifulSoup(response.content, 'html.parser')

            well_parent_h1 = soup.select_one('.well h1')
            if well_parent_h1:
                car_info["Name"] = well_parent_h1.text.strip()

            table_element = soup.find(class_='table table-bordered text-center table-engine-detail fs16')
            if table_element:
                td_tags = table_element.find_all('td')
                if len(td_tags) >= 4:
                    car_info["Year"] = td_tags[0].text.strip()
                    car_info["Mileage"] = td_tags[1].text.strip()
                    car_info["Fuel Type"] = td_tags[2].text.strip()
                    car_info["Transmission"] = td_tags[3].text.strip()

            price_box_element = soup.find(class_='price-box')
            if price_box_element:
                car_info["Price"] = price_box_element.text.strip()

            featured_list = soup.find(class_='list-unstyled ul-featured clearfix')
            car_feature_list = soup.find(class_='list-unstyled car-feature-list nomargin')

            if featured_list:
                featured_li_elements = featured_list.find_all('li')
                car_info["Registered In"] = featured_li_elements[1].text.strip() if len(featured_li_elements) > 1 else None
                car_info["Color"] = featured_li_elements[3].text.strip() if len(featured_li_elements) > 3 else None
                car_info["Assembly"] = featured_li_elements[5].text.strip() if len(featured_li_elements) > 5 else None
                car_info["Engine Capacity"] = featured_li_elements[7].text.strip() if len(featured_li_elements) > 7 else None
                car_info["Body Type"] = featured_li_elements[9].text.strip() if len(featured_li_elements) > 9 else None
                car_info["Features"].extend([li_element.text.strip() for i, li_element in enumerate(featured_li_elements) if i not in [1, 3, 5, 7, 9]])

            if car_feature_list:
                feature_li_elements = car_feature_list.find_all('li')
                car_info["Features"].extend([li_element.text.strip() for li_element in feature_li_elements])

            # Extract image URLs using a pattern in the src attribute
            image_links = []

            container = soup.find('ul', class_='gallery light-gallery list-unstyled cS-hidden')

            if container:
                images = container.find_all('li')

                for image in images:
                    if image.has_attr('data-src'):
                        image_links.append(image['data-src'])

            car_info["Pictures"] = image_links

    except Exception as e:
        print(f"An error occurred while scraping car info: {e}")

    return car_info

def get_bmw_links(base_url, num_pages):
    all_links = []

    try:
        for page in range(1, num_pages + 1):
            url = f'{base_url}?page={page}'

            response = requests.get(url)

            if response.status_code == 200:
                soup = BeautifulSoup(response.text, 'html.parser')

                links = [a['href'] for a in soup.find_all('a', href=True) if 'bmw' in a.get('href', '').lower() and 'for-sale-in' in a.get('href', '').lower()]

                # Ensure that the obtained URLs are complete using urljoin
                base_url_with_scheme = base_url if base_url.startswith(('http://', 'https://')) else 'https://' + base_url
                links = [urljoin(base_url_with_scheme, link) for link in links]

                all_links.extend(links)
            else:
                print(f"Error: Unable to fetch the webpage. Status code: {response.status_code}")
    except Exception as e:
        print(f"An error occurred: {e}")

    return all_links


# Example usage for the first 3 pages
base_url = 'https://www.pakwheels.com/used-cars/search/-/mk_bmw/'
num_pages = 7

bmw_car_links = get_bmw_car_links(base_url, num_pages)

all_car_info = []

for link in bmw_car_links:
    car_info = scrape_car_info(link)
    all_car_info.append(car_info)

# Write the information to a CSV file
csv_file_path = 'AllBmwCarInfo.csv'
with open(csv_file_path, 'w', newline='', encoding='utf-8') as csv_file:
    csv_writer = csv.writer(csv_file)

    # Write header row
    header = ["Name", "Price", "Year", "Mileage", "Fuel Type", "Transmission", "Registered In", "Color", "Assembly", "Engine Capacity", "Body Type", "Features", "Pictures"]
    csv_writer.writerow(header)

    # Write data rows
    for car_info in all_car_info:
        row = [
            car_info["Name"],
            car_info["Price"],
            car_info["Year"],
            car_info["Mileage"],
            car_info["Fuel Type"],
            car_info["Transmission"],
            car_info["Registered In"],
            car_info["Color"],
            car_info["Assembly"],
            car_info["Engine Capacity"],
            car_info["Body Type"],
            ', '.join(car_info["Features"]),  # Assuming features are a list
            ', '.join(car_info["Pictures"])   # Assuming pictures are a list
        ]
        csv_writer.writerow(row)

print(f'CSV File Created: {csv_file_path}')

CSV File Created: AllBmwCarInfo.csv


Buick Car CSV

In [None]:
import requests
from bs4 import BeautifulSoup
import csv
from urllib.parse import urljoin

def scrape_car_info(url):
    car_info = {
        "Name": None,
        "Price": None,
        "Year": None,
        "Mileage": None,
        "Fuel Type": None,
        "Transmission": None,
        "Registered In": None,
        "Color": None,
        "Assembly": None,
        "Engine Capacity": None,
        "Body Type": None,
        "Features": [],
        "Pictures": []
    }

    try:
        response = requests.get(url)
        if response.status_code == 200:
            soup = BeautifulSoup(response.content, 'html.parser')

            well_parent_h1 = soup.select_one('.well h1')
            if well_parent_h1:
                car_info["Name"] = well_parent_h1.text.strip()

            table_element = soup.find(class_='table table-bordered text-center table-engine-detail fs16')
            if table_element:
                td_tags = table_element.find_all('td')
                if len(td_tags) >= 4:
                    car_info["Year"] = td_tags[0].text.strip()
                    car_info["Mileage"] = td_tags[1].text.strip()
                    car_info["Fuel Type"] = td_tags[2].text.strip()
                    car_info["Transmission"] = td_tags[3].text.strip()

            price_box_element = soup.find(class_='price-box')
            if price_box_element:
                car_info["Price"] = price_box_element.text.strip()

            featured_list = soup.find(class_='list-unstyled ul-featured clearfix')
            car_feature_list = soup.find(class_='list-unstyled car-feature-list nomargin')

            if featured_list:
                featured_li_elements = featured_list.find_all('li')
                car_info["Registered In"] = featured_li_elements[1].text.strip() if len(featured_li_elements) > 1 else None
                car_info["Color"] = featured_li_elements[3].text.strip() if len(featured_li_elements) > 3 else None
                car_info["Assembly"] = featured_li_elements[5].text.strip() if len(featured_li_elements) > 5 else None
                car_info["Engine Capacity"] = featured_li_elements[7].text.strip() if len(featured_li_elements) > 7 else None
                car_info["Body Type"] = featured_li_elements[9].text.strip() if len(featured_li_elements) > 9 else None
                car_info["Features"].extend([li_element.text.strip() for i, li_element in enumerate(featured_li_elements) if i not in [1, 3, 5, 7, 9]])

            if car_feature_list:
                feature_li_elements = car_feature_list.find_all('li')
                car_info["Features"].extend([li_element.text.strip() for li_element in feature_li_elements])

            # Extract image URLs using a pattern in the src attribute
            image_links = []

            container = soup.find('ul', class_='gallery light-gallery list-unstyled cS-hidden')

            if container:
                images = container.find_all('li')

                for image in images:
                    if image.has_attr('data-src'):
                        image_links.append(image['data-src'])

            car_info["Pictures"] = image_links

    except Exception as e:
        print(f"An error occurred while scraping car info: {e}")

    return car_info

def get_buick_links(base_url, num_pages):
    all_links = []

    try:
        for page in range(1, num_pages + 1):
            url = f'{base_url}?page={page}'

            response = requests.get(url)

            if response.status_code == 200:
                soup = BeautifulSoup(response.text, 'html.parser')

                links = [a['href'] for a in soup.find_all('a', href=True) if 'buick' in a.get('href', '').lower() and 'for-sale-in' in a.get('href', '').lower()]

                # Ensure that the obtained URLs are complete using urljoin
                base_url_with_scheme = base_url if base_url.startswith(('http://', 'https://')) else 'https://' + base_url
                links = [urljoin(base_url_with_scheme, link) for link in links]

                all_links.extend(links)
            else:
                print(f"Error: Unable to fetch the webpage. Status code: {response.status_code}")
    except Exception as e:
        print(f"An error occurred: {e}")

    return all_links


# Example usage for the first 3 pages
base_url = 'https://www.pakwheels.com/used-cars/search/-/mk_buick/'
num_pages = 1

buick_car_links = get_buick_car_links(base_url, num_pages)

all_car_info = []

for link in buick_car_links:
    car_info = scrape_car_info(link)
    all_car_info.append(car_info)

# Write the information to a CSV file
csv_file_path = 'AllBuickCarInfo.csv'
with open(csv_file_path, 'w', newline='', encoding='utf-8') as csv_file:
    csv_writer = csv.writer(csv_file)

    # Write header row
    header = ["Name", "Price", "Year", "Mileage", "Fuel Type", "Transmission", "Registered In", "Color", "Assembly", "Engine Capacity", "Body Type", "Features", "Pictures"]
    csv_writer.writerow(header)

    # Write data rows
    for car_info in all_car_info:
        row = [
            car_info["Name"],
            car_info["Price"],
            car_info["Year"],
            car_info["Mileage"],
            car_info["Fuel Type"],
            car_info["Transmission"],
            car_info["Registered In"],
            car_info["Color"],
            car_info["Assembly"],
            car_info["Engine Capacity"],
            car_info["Body Type"],
            ', '.join(car_info["Features"]),  # Assuming features are a list
            ', '.join(car_info["Pictures"])   # Assuming pictures are a list
        ]
        csv_writer.writerow(row)

print(f'CSV File Created: {csv_file_path}')

CSV File Created: AllBuickCarInfo.csv


Changan Car CSV

In [None]:
import requests
from bs4 import BeautifulSoup
import csv
from urllib.parse import urljoin

def scrape_car_info(url):
    car_info = {
        "Name": None,
        "Price": None,
        "Year": None,
        "Mileage": None,
        "Fuel Type": None,
        "Transmission": None,
        "Registered In": None,
        "Color": None,
        "Assembly": None,
        "Engine Capacity": None,
        "Body Type": None,
        "Features": [],
        "Pictures": []
    }

    try:
        response = requests.get(url)
        if response.status_code == 200:
            soup = BeautifulSoup(response.content, 'html.parser')

            well_parent_h1 = soup.select_one('.well h1')
            if well_parent_h1:
                car_info["Name"] = well_parent_h1.text.strip()

            table_element = soup.find(class_='table table-bordered text-center table-engine-detail fs16')
            if table_element:
                td_tags = table_element.find_all('td')
                if len(td_tags) >= 4:
                    car_info["Year"] = td_tags[0].text.strip()
                    car_info["Mileage"] = td_tags[1].text.strip()
                    car_info["Fuel Type"] = td_tags[2].text.strip()
                    car_info["Transmission"] = td_tags[3].text.strip()

            price_box_element = soup.find(class_='price-box')
            if price_box_element:
                car_info["Price"] = price_box_element.text.strip()

            featured_list = soup.find(class_='list-unstyled ul-featured clearfix')
            car_feature_list = soup.find(class_='list-unstyled car-feature-list nomargin')

            if featured_list:
                featured_li_elements = featured_list.find_all('li')
                car_info["Registered In"] = featured_li_elements[1].text.strip() if len(featured_li_elements) > 1 else None
                car_info["Color"] = featured_li_elements[3].text.strip() if len(featured_li_elements) > 3 else None
                car_info["Assembly"] = featured_li_elements[5].text.strip() if len(featured_li_elements) > 5 else None
                car_info["Engine Capacity"] = featured_li_elements[7].text.strip() if len(featured_li_elements) > 7 else None
                car_info["Body Type"] = featured_li_elements[9].text.strip() if len(featured_li_elements) > 9 else None
                car_info["Features"].extend([li_element.text.strip() for i, li_element in enumerate(featured_li_elements) if i not in [1, 3, 5, 7, 9]])

            if car_feature_list:
                feature_li_elements = car_feature_list.find_all('li')
                car_info["Features"].extend([li_element.text.strip() for li_element in feature_li_elements])

            # Extract image URLs using a pattern in the src attribute
            image_links = []

            container = soup.find('ul', class_='gallery light-gallery list-unstyled cS-hidden')

            if container:
                images = container.find_all('li')

                for image in images:
                    if image.has_attr('data-src'):
                        image_links.append(image['data-src'])

            car_info["Pictures"] = image_links

    except Exception as e:
        print(f"An error occurred while scraping car info: {e}")

    return car_info

def get_changan_links(base_url, num_pages):
    all_links = []

    try:
        for page in range(1, num_pages + 1):
            url = f'{base_url}?page={page}'

            response = requests.get(url)

            if response.status_code == 200:
                soup = BeautifulSoup(response.text, 'html.parser')

                links = [a['href'] for a in soup.find_all('a', href=True) if 'changan' in a.get('href', '').lower() and 'for-sale-in' in a.get('href', '').lower()]

                # Ensure that the obtained URLs are complete using urljoin
                base_url_with_scheme = base_url if base_url.startswith(('http://', 'https://')) else 'https://' + base_url
                links = [urljoin(base_url_with_scheme, link) for link in links]

                all_links.extend(links)
            else:
                print(f"Error: Unable to fetch the webpage. Status code: {response.status_code}")
    except Exception as e:
        print(f"An error occurred: {e}")

    return all_links


# Example usage for the first 3 pages
base_url = 'https://www.pakwheels.com/used-cars/search/-/mk_changan/'
num_pages = 22

changan_car_links = get_changan_car_links(base_url, num_pages)

all_car_info = []

for link in changan_car_links:
    car_info = scrape_car_info(link)
    all_car_info.append(car_info)

# Write the information to a CSV file
csv_file_path = 'AllChanganCarInfo.csv'
with open(csv_file_path, 'w', newline='', encoding='utf-8') as csv_file:
    csv_writer = csv.writer(csv_file)

    # Write header row
    header = ["Name", "Price", "Year", "Mileage", "Fuel Type", "Transmission", "Registered In", "Color", "Assembly", "Engine Capacity", "Body Type", "Features", "Pictures"]
    csv_writer.writerow(header)

    # Write data rows
    for car_info in all_car_info:
        row = [
            car_info["Name"],
            car_info["Price"],
            car_info["Year"],
            car_info["Mileage"],
            car_info["Fuel Type"],
            car_info["Transmission"],
            car_info["Registered In"],
            car_info["Color"],
            car_info["Assembly"],
            car_info["Engine Capacity"],
            car_info["Body Type"],
            ', '.join(car_info["Features"]),  # Assuming features are a list
            ', '.join(car_info["Pictures"])   # Assuming pictures are a list
        ]
        csv_writer.writerow(row)

print(f'CSV File Created: {csv_file_path}')

CSV File Created: AllChanganCarInfo.csv


Chevrolet Car CSV

In [None]:
import requests
from bs4 import BeautifulSoup
import csv
from urllib.parse import urljoin

def scrape_car_info(url):
    car_info = {
        "Name": None,
        "Price": None,
        "Year": None,
        "Mileage": None,
        "Fuel Type": None,
        "Transmission": None,
        "Registered In": None,
        "Color": None,
        "Assembly": None,
        "Engine Capacity": None,
        "Body Type": None,
        "Features": [],
        "Pictures": []
    }

    try:
        response = requests.get(url)
        if response.status_code == 200:
            soup = BeautifulSoup(response.content, 'html.parser')

            well_parent_h1 = soup.select_one('.well h1')
            if well_parent_h1:
                car_info["Name"] = well_parent_h1.text.strip()

            table_element = soup.find(class_='table table-bordered text-center table-engine-detail fs16')
            if table_element:
                td_tags = table_element.find_all('td')
                if len(td_tags) >= 4:
                    car_info["Year"] = td_tags[0].text.strip()
                    car_info["Mileage"] = td_tags[1].text.strip()
                    car_info["Fuel Type"] = td_tags[2].text.strip()
                    car_info["Transmission"] = td_tags[3].text.strip()

            price_box_element = soup.find(class_='price-box')
            if price_box_element:
                car_info["Price"] = price_box_element.text.strip()

            featured_list = soup.find(class_='list-unstyled ul-featured clearfix')
            car_feature_list = soup.find(class_='list-unstyled car-feature-list nomargin')

            if featured_list:
                featured_li_elements = featured_list.find_all('li')
                car_info["Registered In"] = featured_li_elements[1].text.strip() if len(featured_li_elements) > 1 else None
                car_info["Color"] = featured_li_elements[3].text.strip() if len(featured_li_elements) > 3 else None
                car_info["Assembly"] = featured_li_elements[5].text.strip() if len(featured_li_elements) > 5 else None
                car_info["Engine Capacity"] = featured_li_elements[7].text.strip() if len(featured_li_elements) > 7 else None
                car_info["Body Type"] = featured_li_elements[9].text.strip() if len(featured_li_elements) > 9 else None
                car_info["Features"].extend([li_element.text.strip() for i, li_element in enumerate(featured_li_elements) if i not in [1, 3, 5, 7, 9]])

            if car_feature_list:
                feature_li_elements = car_feature_list.find_all('li')
                car_info["Features"].extend([li_element.text.strip() for li_element in feature_li_elements])

            # Extract image URLs using a pattern in the src attribute
            image_links = []

            container = soup.find('ul', class_='gallery light-gallery list-unstyled cS-hidden')

            if container:
                images = container.find_all('li')

                for image in images:
                    if image.has_attr('data-src'):
                        image_links.append(image['data-src'])

            car_info["Pictures"] = image_links

    except Exception as e:
        print(f"An error occurred while scraping car info: {e}")

    return car_info

def get_chevrolet_links(base_url, num_pages):
    all_links = []

    try:
        for page in range(1, num_pages + 1):
            url = f'{base_url}?page={page}'

            response = requests.get(url)

            if response.status_code == 200:
                soup = BeautifulSoup(response.text, 'html.parser')

                links = [a['href'] for a in soup.find_all('a', href=True) if 'chevrolet' in a.get('href', '').lower() and 'for-sale-in' in a.get('href', '').lower()]

                # Ensure that the obtained URLs are complete using urljoin
                base_url_with_scheme = base_url if base_url.startswith(('http://', 'https://')) else 'https://' + base_url
                links = [urljoin(base_url_with_scheme, link) for link in links]

                all_links.extend(links)
            else:
                print(f"Error: Unable to fetch the webpage. Status code: {response.status_code}")
    except Exception as e:
        print(f"An error occurred: {e}")

    return all_links


# Example usage for the first 3 pages
base_url = 'https://www.pakwheels.com/used-cars/search/-/mk_chevrolet/'
num_pages = 4

chevrolet_car_links = get_chevrolet_car_links(base_url, num_pages)

all_car_info = []

for link in chevrolet_car_links:
    car_info = scrape_car_info(link)
    all_car_info.append(car_info)

# Write the information to a CSV file
csv_file_path = 'AllChevroletCarInfo.csv'
with open(csv_file_path, 'w', newline='', encoding='utf-8') as csv_file:
    csv_writer = csv.writer(csv_file)

    # Write header row
    header = ["Name", "Price", "Year", "Mileage", "Fuel Type", "Transmission", "Registered In", "Color", "Assembly", "Engine Capacity", "Body Type", "Features", "Pictures"]
    csv_writer.writerow(header)

    # Write data rows
    for car_info in all_car_info:
        row = [
            car_info["Name"],
            car_info["Price"],
            car_info["Year"],
            car_info["Mileage"],
            car_info["Fuel Type"],
            car_info["Transmission"],
            car_info["Registered In"],
            car_info["Color"],
            car_info["Assembly"],
            car_info["Engine Capacity"],
            car_info["Body Type"],
            ', '.join(car_info["Features"]),  # Assuming features are a list
            ', '.join(car_info["Pictures"])   # Assuming pictures are a list
        ]
        csv_writer.writerow(row)

print(f'CSV File Created: {csv_file_path}')

CSV File Created: AllChevroletCarInfo.csv


Classic Cars CSV

In [None]:
import requests
from bs4 import BeautifulSoup
import csv
from urllib.parse import urljoin

def scrape_car_info(url):
    car_info = {
        "Name": None,
        "Price": None,
        "Year": None,
        "Mileage": None,
        "Fuel Type": None,
        "Transmission": None,
        "Registered In": None,
        "Color": None,
        "Assembly": None,
        "Engine Capacity": None,
        "Body Type": None,
        "Features": [],
        "Pictures": []
    }

    try:
        response = requests.get(url)
        if response.status_code == 200:
            soup = BeautifulSoup(response.content, 'html.parser')

            well_parent_h1 = soup.select_one('.well h1')
            if well_parent_h1:
                car_info["Name"] = well_parent_h1.text.strip()

            table_element = soup.find(class_='table table-bordered text-center table-engine-detail fs16')
            if table_element:
                td_tags = table_element.find_all('td')
                if len(td_tags) >= 4:
                    car_info["Year"] = td_tags[0].text.strip()
                    car_info["Mileage"] = td_tags[1].text.strip()
                    car_info["Fuel Type"] = td_tags[2].text.strip()
                    car_info["Transmission"] = td_tags[3].text.strip()

            price_box_element = soup.find(class_='price-box')
            if price_box_element:
                car_info["Price"] = price_box_element.text.strip()

            featured_list = soup.find(class_='list-unstyled ul-featured clearfix')
            car_feature_list = soup.find(class_='list-unstyled car-feature-list nomargin')

            if featured_list:
                featured_li_elements = featured_list.find_all('li')
                car_info["Registered In"] = featured_li_elements[1].text.strip() if len(featured_li_elements) > 1 else None
                car_info["Color"] = featured_li_elements[3].text.strip() if len(featured_li_elements) > 3 else None
                car_info["Assembly"] = featured_li_elements[5].text.strip() if len(featured_li_elements) > 5 else None
                car_info["Engine Capacity"] = featured_li_elements[7].text.strip() if len(featured_li_elements) > 7 else None
                car_info["Body Type"] = featured_li_elements[9].text.strip() if len(featured_li_elements) > 9 else None
                car_info["Features"].extend([li_element.text.strip() for i, li_element in enumerate(featured_li_elements) if i not in [1, 3, 5, 7, 9]])

            if car_feature_list:
                feature_li_elements = car_feature_list.find_all('li')
                car_info["Features"].extend([li_element.text.strip() for li_element in feature_li_elements])

            # Extract image URLs using a pattern in the src attribute
            image_links = []

            container = soup.find('ul', class_='gallery light-gallery list-unstyled cS-hidden')

            if container:
                images = container.find_all('li')

                for image in images:
                    if image.has_attr('data-src'):
                        image_links.append(image['data-src'])

            car_info["Pictures"] = image_links

    except Exception as e:
        print(f"An error occurred while scraping car info: {e}")

    return car_info

def get_classiccars_links(base_url, num_pages):
    all_links = []

    try:
        for page in range(1, num_pages + 1):
            url = f'{base_url}?page={page}'

            response = requests.get(url)

            if response.status_code == 200:
                soup = BeautifulSoup(response.text, 'html.parser')

                links = [a['href'] for a in soup.find_all('a', href=True) if 'classic-cars' in a.get('href', '').lower() and 'for-sale-in' in a.get('href', '').lower()]

                # Ensure that the obtained URLs are complete using urljoin
                base_url_with_scheme = base_url if base_url.startswith(('http://', 'https://')) else 'https://' + base_url
                links = [urljoin(base_url_with_scheme, link) for link in links]

                all_links.extend(links)
            else:
                print(f"Error: Unable to fetch the webpage. Status code: {response.status_code}")
    except Exception as e:
        print(f"An error occurred: {e}")

    return all_links


# Example usage for the first 3 pages
base_url = 'https://www.pakwheels.com/used-cars/search/-/mk_classic-cars/'
num_pages = 1

classiccars_car_links = get_classiccars_car_links(base_url, num_pages)

all_car_info = []

for link in classiccars_car_links:
    car_info = scrape_car_info(link)
    all_car_info.append(car_info)

# Write the information to a CSV file
csv_file_path = 'AllClassic-carsCarInfo.csv'
with open(csv_file_path, 'w', newline='', encoding='utf-8') as csv_file:
    csv_writer = csv.writer(csv_file)

    # Write header row
    header = ["Name", "Price", "Year", "Mileage", "Fuel Type", "Transmission", "Registered In", "Color", "Assembly", "Engine Capacity", "Body Type", "Features", "Pictures"]
    csv_writer.writerow(header)

    # Write data rows
    for car_info in all_car_info:
        row = [
            car_info["Name"],
            car_info["Price"],
            car_info["Year"],
            car_info["Mileage"],
            car_info["Fuel Type"],
            car_info["Transmission"],
            car_info["Registered In"],
            car_info["Color"],
            car_info["Assembly"],
            car_info["Engine Capacity"],
            car_info["Body Type"],
            ', '.join(car_info["Features"]),  # Assuming features are a list
            ', '.join(car_info["Pictures"])   # Assuming pictures are a list
        ]
        csv_writer.writerow(row)

print(f'CSV File Created: {csv_file_path}')

CSV File Created: AllClassic-carsCarInfo.csv


Daehan Cars CSV

In [None]:
import requests
from bs4 import BeautifulSoup
import csv
from urllib.parse import urljoin

def scrape_car_info(url):
    car_info = {
        "Name": None,
        "Price": None,
        "Year": None,
        "Mileage": None,
        "Fuel Type": None,
        "Transmission": None,
        "Registered In": None,
        "Color": None,
        "Assembly": None,
        "Engine Capacity": None,
        "Body Type": None,
        "Features": [],
        "Pictures": []
    }

    try:
        response = requests.get(url)
        if response.status_code == 200:
            soup = BeautifulSoup(response.content, 'html.parser')

            well_parent_h1 = soup.select_one('.well h1')
            if well_parent_h1:
                car_info["Name"] = well_parent_h1.text.strip()

            table_element = soup.find(class_='table table-bordered text-center table-engine-detail fs16')
            if table_element:
                td_tags = table_element.find_all('td')
                if len(td_tags) >= 4:
                    car_info["Year"] = td_tags[0].text.strip()
                    car_info["Mileage"] = td_tags[1].text.strip()
                    car_info["Fuel Type"] = td_tags[2].text.strip()
                    car_info["Transmission"] = td_tags[3].text.strip()

            price_box_element = soup.find(class_='price-box')
            if price_box_element:
                car_info["Price"] = price_box_element.text.strip()

            featured_list = soup.find(class_='list-unstyled ul-featured clearfix')
            car_feature_list = soup.find(class_='list-unstyled car-feature-list nomargin')

            if featured_list:
                featured_li_elements = featured_list.find_all('li')
                car_info["Registered In"] = featured_li_elements[1].text.strip() if len(featured_li_elements) > 1 else None
                car_info["Color"] = featured_li_elements[3].text.strip() if len(featured_li_elements) > 3 else None
                car_info["Assembly"] = featured_li_elements[5].text.strip() if len(featured_li_elements) > 5 else None
                car_info["Engine Capacity"] = featured_li_elements[7].text.strip() if len(featured_li_elements) > 7 else None
                car_info["Body Type"] = featured_li_elements[9].text.strip() if len(featured_li_elements) > 9 else None
                car_info["Features"].extend([li_element.text.strip() for i, li_element in enumerate(featured_li_elements) if i not in [1, 3, 5, 7, 9]])

            if car_feature_list:
                feature_li_elements = car_feature_list.find_all('li')
                car_info["Features"].extend([li_element.text.strip() for li_element in feature_li_elements])

            # Extract image URLs using a pattern in the src attribute
            image_links = []

            container = soup.find('ul', class_='gallery light-gallery list-unstyled cS-hidden')

            if container:
                images = container.find_all('li')

                for image in images:
                    if image.has_attr('data-src'):
                        image_links.append(image['data-src'])

            car_info["Pictures"] = image_links

    except Exception as e:
        print(f"An error occurred while scraping car info: {e}")

    return car_info

def get_daehan_links(base_url, num_pages):
    all_links = []

    try:
        for page in range(1, num_pages + 1):
            url = f'{base_url}?page={page}'

            response = requests.get(url)

            if response.status_code == 200:
                soup = BeautifulSoup(response.text, 'html.parser')

                links = [a['href'] for a in soup.find_all('a', href=True) if 'daehan' in a.get('href', '').lower() and 'for-sale-in' in a.get('href', '').lower()]

                # Ensure that the obtained URLs are complete using urljoin
                base_url_with_scheme = base_url if base_url.startswith(('http://', 'https://')) else 'https://' + base_url
                links = [urljoin(base_url_with_scheme, link) for link in links]

                all_links.extend(links)
            else:
                print(f"Error: Unable to fetch the webpage. Status code: {response.status_code}")
    except Exception as e:
        print(f"An error occurred: {e}")

    return all_links


# Example usage for the first 3 pages
base_url = 'https://www.pakwheels.com/used-cars/search/-/mk_daehan/'
num_pages = 1

daehan_car_links = get_daehan_car_links(base_url, num_pages)

all_car_info = []

for link in daehan_car_links:
    car_info = scrape_car_info(link)
    all_car_info.append(car_info)

# Write the information to a CSV file
csv_file_path = 'AllDaehanCarInfo.csv'
with open(csv_file_path, 'w', newline='', encoding='utf-8') as csv_file:
    csv_writer = csv.writer(csv_file)

    # Write header row
    header = ["Name", "Price", "Year", "Mileage", "Fuel Type", "Transmission", "Registered In", "Color", "Assembly", "Engine Capacity", "Body Type", "Features", "Pictures"]
    csv_writer.writerow(header)

    # Write data rows
    for car_info in all_car_info:
        row = [
            car_info["Name"],
            car_info["Price"],
            car_info["Year"],
            car_info["Mileage"],
            car_info["Fuel Type"],
            car_info["Transmission"],
            car_info["Registered In"],
            car_info["Color"],
            car_info["Assembly"],
            car_info["Engine Capacity"],
            car_info["Body Type"],
            ', '.join(car_info["Features"]),  # Assuming features are a list
            ', '.join(car_info["Pictures"])   # Assuming pictures are a list
        ]
        csv_writer.writerow(row)

print(f'CSV File Created: {csv_file_path}')

CSV File Created: AllDaehanCarInfo.csv


Datsun Car CSV

In [None]:
import requests
from bs4 import BeautifulSoup
import csv
from urllib.parse import urljoin

def scrape_car_info(url):
    car_info = {
        "Name": None,
        "Price": None,
        "Year": None,
        "Mileage": None,
        "Fuel Type": None,
        "Transmission": None,
        "Registered In": None,
        "Color": None,
        "Assembly": None,
        "Engine Capacity": None,
        "Body Type": None,
        "Features": [],
        "Pictures": []
    }

    try:
        response = requests.get(url)
        if response.status_code == 200:
            soup = BeautifulSoup(response.content, 'html.parser')

            well_parent_h1 = soup.select_one('.well h1')
            if well_parent_h1:
                car_info["Name"] = well_parent_h1.text.strip()

            table_element = soup.find(class_='table table-bordered text-center table-engine-detail fs16')
            if table_element:
                td_tags = table_element.find_all('td')
                if len(td_tags) >= 4:
                    car_info["Year"] = td_tags[0].text.strip()
                    car_info["Mileage"] = td_tags[1].text.strip()
                    car_info["Fuel Type"] = td_tags[2].text.strip()
                    car_info["Transmission"] = td_tags[3].text.strip()

            price_box_element = soup.find(class_='price-box')
            if price_box_element:
                car_info["Price"] = price_box_element.text.strip()

            featured_list = soup.find(class_='list-unstyled ul-featured clearfix')
            car_feature_list = soup.find(class_='list-unstyled car-feature-list nomargin')

            if featured_list:
                featured_li_elements = featured_list.find_all('li')
                car_info["Registered In"] = featured_li_elements[1].text.strip() if len(featured_li_elements) > 1 else None
                car_info["Color"] = featured_li_elements[3].text.strip() if len(featured_li_elements) > 3 else None
                car_info["Assembly"] = featured_li_elements[5].text.strip() if len(featured_li_elements) > 5 else None
                car_info["Engine Capacity"] = featured_li_elements[7].text.strip() if len(featured_li_elements) > 7 else None
                car_info["Body Type"] = featured_li_elements[9].text.strip() if len(featured_li_elements) > 9 else None
                car_info["Features"].extend([li_element.text.strip() for i, li_element in enumerate(featured_li_elements) if i not in [1, 3, 5, 7, 9]])

            if car_feature_list:
                feature_li_elements = car_feature_list.find_all('li')
                car_info["Features"].extend([li_element.text.strip() for li_element in feature_li_elements])

            # Extract image URLs using a pattern in the src attribute
            image_links = []

            container = soup.find('ul', class_='gallery light-gallery list-unstyled cS-hidden')

            if container:
                images = container.find_all('li')

                for image in images:
                    if image.has_attr('data-src'):
                        image_links.append(image['data-src'])

            car_info["Pictures"] = image_links

    except Exception as e:
        print(f"An error occurred while scraping car info: {e}")

    return car_info

def get_datsun_links(base_url, num_pages):
    all_links = []

    try:
        for page in range(1, num_pages + 1):
            url = f'{base_url}?page={page}'

            response = requests.get(url)

            if response.status_code == 200:
                soup = BeautifulSoup(response.text, 'html.parser')

                links = [a['href'] for a in soup.find_all('a', href=True) if 'datsun' in a.get('href', '').lower() and 'for-sale-in' in a.get('href', '').lower()]

                # Ensure that the obtained URLs are complete using urljoin
                base_url_with_scheme = base_url if base_url.startswith(('http://', 'https://')) else 'https://' + base_url
                links = [urljoin(base_url_with_scheme, link) for link in links]

                all_links.extend(links)
            else:
                print(f"Error: Unable to fetch the webpage. Status code: {response.status_code}")
    except Exception as e:
        print(f"An error occurred: {e}")

    return all_links

# Example usage for the first 3 pages
base_url = 'https://www.pakwheels.com/used-cars/search/-/mk_datsun/'
num_pages = 1

datsun_car_links = get_datsun_car_links(base_url, num_pages)

all_car_info = []

for link in datsun_car_links:
    car_info = scrape_car_info(link)
    all_car_info.append(car_info)

# Write the information to a CSV file
csv_file_path = 'AllDatsunCarInfo.csv'
with open(csv_file_path, 'w', newline='', encoding='utf-8') as csv_file:
    csv_writer = csv.writer(csv_file)

    # Write header row
    header = ["Name", "Price", "Year", "Mileage", "Fuel Type", "Transmission", "Registered In", "Color", "Assembly", "Engine Capacity", "Body Type", "Features", "Pictures"]
    csv_writer.writerow(header)

    # Write data rows
    for car_info in all_car_info:
        row = [
            car_info["Name"],
            car_info["Price"],
            car_info["Year"],
            car_info["Mileage"],
            car_info["Fuel Type"],
            car_info["Transmission"],
            car_info["Registered In"],
            car_info["Color"],
            car_info["Assembly"],
            car_info["Engine Capacity"],
            car_info["Body Type"],
            ', '.join(car_info["Features"]),  # Assuming features are a list
            ', '.join(car_info["Pictures"])   # Assuming pictures are a list
        ]
        csv_writer.writerow(row)

print(f'CSV File Created: {csv_file_path}')



CSV File Created: AllDatsunCarInfo.csv


FAW Car CSV

In [None]:
import requests
from bs4 import BeautifulSoup
import csv
from urllib.parse import urljoin

def scrape_car_info(url):
    car_info = {
        "Name": None,
        "Price": None,
        "Year": None,
        "Mileage": None,
        "Fuel Type": None,
        "Transmission": None,
        "Registered In": None,
        "Color": None,
        "Assembly": None,
        "Engine Capacity": None,
        "Body Type": None,
        "Features": [],
        "Pictures": []
    }

    try:
        response = requests.get(url)
        if response.status_code == 200:
            soup = BeautifulSoup(response.content, 'html.parser')

            well_parent_h1 = soup.select_one('.well h1')
            if well_parent_h1:
                car_info["Name"] = well_parent_h1.text.strip()

            table_element = soup.find(class_='table table-bordered text-center table-engine-detail fs16')
            if table_element:
                td_tags = table_element.find_all('td')
                if len(td_tags) >= 4:
                    car_info["Year"] = td_tags[0].text.strip()
                    car_info["Mileage"] = td_tags[1].text.strip()
                    car_info["Fuel Type"] = td_tags[2].text.strip()
                    car_info["Transmission"] = td_tags[3].text.strip()

            price_box_element = soup.find(class_='price-box')
            if price_box_element:
                car_info["Price"] = price_box_element.text.strip()

            featured_list = soup.find(class_='list-unstyled ul-featured clearfix')
            car_feature_list = soup.find(class_='list-unstyled car-feature-list nomargin')

            if featured_list:
                featured_li_elements = featured_list.find_all('li')
                car_info["Registered In"] = featured_li_elements[1].text.strip() if len(featured_li_elements) > 1 else None
                car_info["Color"] = featured_li_elements[3].text.strip() if len(featured_li_elements) > 3 else None
                car_info["Assembly"] = featured_li_elements[5].text.strip() if len(featured_li_elements) > 5 else None
                car_info["Engine Capacity"] = featured_li_elements[7].text.strip() if len(featured_li_elements) > 7 else None
                car_info["Body Type"] = featured_li_elements[9].text.strip() if len(featured_li_elements) > 9 else None
                car_info["Features"].extend([li_element.text.strip() for i, li_element in enumerate(featured_li_elements) if i not in [1, 3, 5, 7, 9]])

            if car_feature_list:
                feature_li_elements = car_feature_list.find_all('li')
                car_info["Features"].extend([li_element.text.strip() for li_element in feature_li_elements])

            # Extract image URLs using a pattern in the src attribute
            image_links = []

            container = soup.find('ul', class_='gallery light-gallery list-unstyled cS-hidden')

            if container:
                images = container.find_all('li')

                for image in images:
                    if image.has_attr('data-src'):
                        image_links.append(image['data-src'])

            car_info["Pictures"] = image_links

    except Exception as e:
        print(f"An error occurred while scraping car info: {e}")

    return car_info

def get_faw_links(base_url, num_pages):
    all_links = []

    try:
        for page in range(1, num_pages + 1):
            url = f'{base_url}?page={page}'

            response = requests.get(url)

            if response.status_code == 200:
                soup = BeautifulSoup(response.text, 'html.parser')

                links = [a['href'] for a in soup.find_all('a', href=True) if 'faw' in a.get('href', '').lower() and 'for-sale-in' in a.get('href', '').lower()]

                # Ensure that the obtained URLs are complete using urljoin
                base_url_with_scheme = base_url if base_url.startswith(('http://', 'https://')) else 'https://' + base_url
                links = [urljoin(base_url_with_scheme, link) for link in links]

                all_links.extend(links)
            else:
                print(f"Error: Unable to fetch the webpage. Status code: {response.status_code}")
    except Exception as e:
        print(f"An error occurred: {e}")

    return all_links

# Example usage for the first 3 pages
base_url = 'https://www.pakwheels.com/used-cars/search/-/mk_faw/'
num_pages = 1

faw_car_links = get_faw_car_links(base_url, num_pages)

all_car_info = []

for link in faw_car_links:
    car_info = scrape_car_info(link)
    all_car_info.append(car_info)

# Write the information to a CSV file
csv_file_path = 'AllFawCarInfo.csv'
with open(csv_file_path, 'w', newline='', encoding='utf-8') as csv_file:
    csv_writer = csv.writer(csv_file)

    # Write header row
    header = ["Name", "Price", "Year", "Mileage", "Fuel Type", "Transmission", "Registered In", "Color", "Assembly", "Engine Capacity", "Body Type", "Features", "Pictures"]
    csv_writer.writerow(header)

    # Write data rows
    for car_info in all_car_info:
        row = [
            car_info["Name"],
            car_info["Price"],
            car_info["Year"],
            car_info["Mileage"],
            car_info["Fuel Type"],
            car_info["Transmission"],
            car_info["Registered In"],
            car_info["Color"],
            car_info["Assembly"],
            car_info["Engine Capacity"],
            car_info["Body Type"],
            ', '.join(car_info["Features"]),  # Assuming features are a list
            ', '.join(car_info["Pictures"])   # Assuming pictures are a list
        ]
        csv_writer.writerow(row)

print(f'CSV File Created: {csv_file_path}')




CSV File Created: AllFawCarInfo.csv


Ford Car CSV

In [None]:
import requests
from bs4 import BeautifulSoup
import csv
from urllib.parse import urljoin

def scrape_car_info(url):
    car_info = {
        "Name": None,
        "Price": None,
        "Year": None,
        "Mileage": None,
        "Fuel Type": None,
        "Transmission": None,
        "Registered In": None,
        "Color": None,
        "Assembly": None,
        "Engine Capacity": None,
        "Body Type": None,
        "Features": [],
        "Pictures": []
    }

    try:
        response = requests.get(url)
        if response.status_code == 200:
            soup = BeautifulSoup(response.content, 'html.parser')

            well_parent_h1 = soup.select_one('.well h1')
            if well_parent_h1:
                car_info["Name"] = well_parent_h1.text.strip()

            table_element = soup.find(class_='table table-bordered text-center table-engine-detail fs16')
            if table_element:
                td_tags = table_element.find_all('td')
                if len(td_tags) >= 4:
                    car_info["Year"] = td_tags[0].text.strip()
                    car_info["Mileage"] = td_tags[1].text.strip()
                    car_info["Fuel Type"] = td_tags[2].text.strip()
                    car_info["Transmission"] = td_tags[3].text.strip()

            price_box_element = soup.find(class_='price-box')
            if price_box_element:
                car_info["Price"] = price_box_element.text.strip()

            featured_list = soup.find(class_='list-unstyled ul-featured clearfix')
            car_feature_list = soup.find(class_='list-unstyled car-feature-list nomargin')

            if featured_list:
                featured_li_elements = featured_list.find_all('li')
                car_info["Registered In"] = featured_li_elements[1].text.strip() if len(featured_li_elements) > 1 else None
                car_info["Color"] = featured_li_elements[3].text.strip() if len(featured_li_elements) > 3 else None
                car_info["Assembly"] = featured_li_elements[5].text.strip() if len(featured_li_elements) > 5 else None
                car_info["Engine Capacity"] = featured_li_elements[7].text.strip() if len(featured_li_elements) > 7 else None
                car_info["Body Type"] = featured_li_elements[9].text.strip() if len(featured_li_elements) > 9 else None
                car_info["Features"].extend([li_element.text.strip() for i, li_element in enumerate(featured_li_elements) if i not in [1, 3, 5, 7, 9]])

            if car_feature_list:
                feature_li_elements = car_feature_list.find_all('li')
                car_info["Features"].extend([li_element.text.strip() for li_element in feature_li_elements])

            # Extract image URLs using a pattern in the src attribute
            image_links = []

            container = soup.find('ul', class_='gallery light-gallery list-unstyled cS-hidden')

            if container:
                images = container.find_all('li')

                for image in images:
                    if image.has_attr('data-src'):
                        image_links.append(image['data-src'])

            car_info["Pictures"] = image_links

    except Exception as e:
        print(f"An error occurred while scraping car info: {e}")

    return car_info

def get_ford_links(base_url, num_pages):
    all_links = []

    try:
        for page in range(1, num_pages + 1):
            url = f'{base_url}?page={page}'

            response = requests.get(url)

            if response.status_code == 200:
                soup = BeautifulSoup(response.text, 'html.parser')

                links = [a['href'] for a in soup.find_all('a', href=True) if 'ford' in a.get('href', '').lower() and 'for-sale-in' in a.get('href', '').lower()]

                # Ensure that the obtained URLs are complete using urljoin
                base_url_with_scheme = base_url if base_url.startswith(('http://', 'https://')) else 'https://' + base_url
                links = [urljoin(base_url_with_scheme, link) for link in links]

                all_links.extend(links)
            else:
                print(f"Error: Unable to fetch the webpage. Status code: {response.status_code}")
    except Exception as e:
        print(f"An error occurred: {e}")

    return all_links

# Example usage for the first 3 pages
base_url = 'https://www.pakwheels.com/used-cars/search/-/mk_ford/'
num_pages = 1

ford_car_links = get_ford_car_links(base_url, num_pages)

all_car_info = []

for link in ford_car_links:
    car_info = scrape_car_info(link)
    all_car_info.append(car_info)

# Write the information to a CSV file
csv_file_path = 'AllFordCarInfo.csv'
with open(csv_file_path, 'w', newline='', encoding='utf-8') as csv_file:
    csv_writer = csv.writer(csv_file)

    # Write header row
    header = ["Name", "Price", "Year", "Mileage", "Fuel Type", "Transmission", "Registered In", "Color", "Assembly", "Engine Capacity", "Body Type", "Features", "Pictures"]
    csv_writer.writerow(header)

    # Write data rows
    for car_info in all_car_info:
        row = [
            car_info["Name"],
            car_info["Price"],
            car_info["Year"],
            car_info["Mileage"],
            car_info["Fuel Type"],
            car_info["Transmission"],
            car_info["Registered In"],
            car_info["Color"],
            car_info["Assembly"],
            car_info["Engine Capacity"],
            car_info["Body Type"],
            ', '.join(car_info["Features"]),  # Assuming features are a list
            ', '.join(car_info["Pictures"])   # Assuming pictures are a list
        ]
        csv_writer.writerow(row)

print(f'CSV File Created: {csv_file_path}')





CSV File Created: AllFordCarInfo.csv


GUGO Car CSV

In [None]:
import requests
from bs4 import BeautifulSoup
import csv
from urllib.parse import urljoin

def scrape_car_info(url):
    car_info = {
        "Name": None,
        "Price": None,
        "Year": None,
        "Mileage": None,
        "Fuel Type": None,
        "Transmission": None,
        "Registered In": None,
        "Color": None,
        "Assembly": None,
        "Engine Capacity": None,
        "Body Type": None,
        "Features": [],
        "Pictures": []
    }

    try:
        response = requests.get(url)
        if response.status_code == 200:
            soup = BeautifulSoup(response.content, 'html.parser')

            well_parent_h1 = soup.select_one('.well h1')
            if well_parent_h1:
                car_info["Name"] = well_parent_h1.text.strip()

            table_element = soup.find(class_='table table-bordered text-center table-engine-detail fs16')
            if table_element:
                td_tags = table_element.find_all('td')
                if len(td_tags) >= 4:
                    car_info["Year"] = td_tags[0].text.strip()
                    car_info["Mileage"] = td_tags[1].text.strip()
                    car_info["Fuel Type"] = td_tags[2].text.strip()
                    car_info["Transmission"] = td_tags[3].text.strip()

            price_box_element = soup.find(class_='price-box')
            if price_box_element:
                car_info["Price"] = price_box_element.text.strip()

            featured_list = soup.find(class_='list-unstyled ul-featured clearfix')
            car_feature_list = soup.find(class_='list-unstyled car-feature-list nomargin')

            if featured_list:
                featured_li_elements = featured_list.find_all('li')
                car_info["Registered In"] = featured_li_elements[1].text.strip() if len(featured_li_elements) > 1 else None
                car_info["Color"] = featured_li_elements[3].text.strip() if len(featured_li_elements) > 3 else None
                car_info["Assembly"] = featured_li_elements[5].text.strip() if len(featured_li_elements) > 5 else None
                car_info["Engine Capacity"] = featured_li_elements[7].text.strip() if len(featured_li_elements) > 7 else None
                car_info["Body Type"] = featured_li_elements[9].text.strip() if len(featured_li_elements) > 9 else None
                car_info["Features"].extend([li_element.text.strip() for i, li_element in enumerate(featured_li_elements) if i not in [1, 3, 5, 7, 9]])

            if car_feature_list:
                feature_li_elements = car_feature_list.find_all('li')
                car_info["Features"].extend([li_element.text.strip() for li_element in feature_li_elements])

            # Extract image URLs using a pattern in the src attribute
            image_links = []

            container = soup.find('ul', class_='gallery light-gallery list-unstyled cS-hidden')

            if container:
                images = container.find_all('li')

                for image in images:
                    if image.has_attr('data-src'):
                        image_links.append(image['data-src'])

            car_info["Pictures"] = image_links

    except Exception as e:
        print(f"An error occurred while scraping car info: {e}")

    return car_info

def get_gugo_links(base_url, num_pages):
    all_links = []

    try:
        for page in range(1, num_pages + 1):
            url = f'{base_url}?page={page}'

            response = requests.get(url)

            if response.status_code == 200:
                soup = BeautifulSoup(response.text, 'html.parser')

                links = [a['href'] for a in soup.find_all('a', href=True) if 'gugo' in a.get('href', '').lower() and 'for-sale-in' in a.get('href', '').lower()]

                # Ensure that the obtained URLs are complete using urljoin
                base_url_with_scheme = base_url if base_url.startswith(('http://', 'https://')) else 'https://' + base_url
                links = [urljoin(base_url_with_scheme, link) for link in links]

                all_links.extend(links)
            else:
                print(f"Error: Unable to fetch the webpage. Status code: {response.status_code}")
    except Exception as e:
        print(f"An error occurred: {e}")

    return all_links

# Example usage for the first 3 pages
base_url = 'https://www.pakwheels.com/used-cars/search/-/mk_gugo/'
num_pages = 1

gugo_car_links = get_gugo_car_links(base_url, num_pages)

all_car_info = []

for link in gugo_car_links:
    car_info = scrape_car_info(link)
    all_car_info.append(car_info)

# Write the information to a CSV file
csv_file_path = 'AllGugoCarInfo.csv'
with open(csv_file_path, 'w', newline='', encoding='utf-8') as csv_file:
    csv_writer = csv.writer(csv_file)

    # Write header row
    header = ["Name", "Price", "Year", "Mileage", "Fuel Type", "Transmission", "Registered In", "Color", "Assembly", "Engine Capacity", "Body Type", "Features", "Pictures"]
    csv_writer.writerow(header)

    # Write data rows
    for car_info in all_car_info:
        row = [
            car_info["Name"],
            car_info["Price"],
            car_info["Year"],
            car_info["Mileage"],
            car_info["Fuel Type"],
            car_info["Transmission"],
            car_info["Registered In"],
            car_info["Color"],
            car_info["Assembly"],
            car_info["Engine Capacity"],
            car_info["Body Type"],
            ', '.join(car_info["Features"]),  # Assuming features are a list
            ', '.join(car_info["Pictures"])   # Assuming pictures are a list
        ]
        csv_writer.writerow(row)

print(f'CSV File Created: {csv_file_path}')






CSV File Created: AllGugoCarInfo.csv


HAVAL Car CSV

In [None]:
import requests
from bs4 import BeautifulSoup
import csv
from urllib.parse import urljoin

def scrape_car_info(url):
    car_info = {
        "Name": None,
        "Price": None,
        "Year": None,
        "Mileage": None,
        "Fuel Type": None,
        "Transmission": None,
        "Registered In": None,
        "Color": None,
        "Assembly": None,
        "Engine Capacity": None,
        "Body Type": None,
        "Features": [],
        "Pictures": []
    }

    try:
        response = requests.get(url)
        if response.status_code == 200:
            soup = BeautifulSoup(response.content, 'html.parser')

            well_parent_h1 = soup.select_one('.well h1')
            if well_parent_h1:
                car_info["Name"] = well_parent_h1.text.strip()

            table_element = soup.find(class_='table table-bordered text-center table-engine-detail fs16')
            if table_element:
                td_tags = table_element.find_all('td')
                if len(td_tags) >= 4:
                    car_info["Year"] = td_tags[0].text.strip()
                    car_info["Mileage"] = td_tags[1].text.strip()
                    car_info["Fuel Type"] = td_tags[2].text.strip()
                    car_info["Transmission"] = td_tags[3].text.strip()

            price_box_element = soup.find(class_='price-box')
            if price_box_element:
                car_info["Price"] = price_box_element.text.strip()

            featured_list = soup.find(class_='list-unstyled ul-featured clearfix')
            car_feature_list = soup.find(class_='list-unstyled car-feature-list nomargin')

            if featured_list:
                featured_li_elements = featured_list.find_all('li')
                car_info["Registered In"] = featured_li_elements[1].text.strip() if len(featured_li_elements) > 1 else None
                car_info["Color"] = featured_li_elements[3].text.strip() if len(featured_li_elements) > 3 else None
                car_info["Assembly"] = featured_li_elements[5].text.strip() if len(featured_li_elements) > 5 else None
                car_info["Engine Capacity"] = featured_li_elements[7].text.strip() if len(featured_li_elements) > 7 else None
                car_info["Body Type"] = featured_li_elements[9].text.strip() if len(featured_li_elements) > 9 else None
                car_info["Features"].extend([li_element.text.strip() for i, li_element in enumerate(featured_li_elements) if i not in [1, 3, 5, 7, 9]])

            if car_feature_list:
                feature_li_elements = car_feature_list.find_all('li')
                car_info["Features"].extend([li_element.text.strip() for li_element in feature_li_elements])

            # Extract image URLs using a pattern in the src attribute
            image_links = []

            container = soup.find('ul', class_='gallery light-gallery list-unstyled cS-hidden')

            if container:
                images = container.find_all('li')

                for image in images:
                    if image.has_attr('data-src'):
                        image_links.append(image['data-src'])

            car_info["Pictures"] = image_links

    except Exception as e:
        print(f"An error occurred while scraping car info: {e}")

    return car_info

def get_haval_links(base_url, num_pages):
    all_links = []

    try:
        for page in range(1, num_pages + 1):
            url = f'{base_url}?page={page}'

            response = requests.get(url)

            if response.status_code == 200:
                soup = BeautifulSoup(response.text, 'html.parser')

                links = [a['href'] for a in soup.find_all('a', href=True) if 'haval' in a.get('href', '').lower() and 'for-sale-in' in a.get('href', '').lower()]

                # Ensure that the obtained URLs are complete using urljoin
                base_url_with_scheme = base_url if base_url.startswith(('http://', 'https://')) else 'https://' + base_url
                links = [urljoin(base_url_with_scheme, link) for link in links]

                all_links.extend(links)
            else:
                print(f"Error: Unable to fetch the webpage. Status code: {response.status_code}")
    except Exception as e:
        print(f"An error occurred: {e}")

    return all_links

# Example usage for the first 3 pages
base_url = 'https://www.pakwheels.com/used-cars/search/-/mk_haval/'
num_pages = 5

haval_car_links = get_haval_car_links(base_url, num_pages)

all_car_info = []

for link in haval_car_links:
    car_info = scrape_car_info(link)
    all_car_info.append(car_info)

# Write the information to a CSV file
csv_file_path = 'AllHavalCarInfo.csv'
with open(csv_file_path, 'w', newline='', encoding='utf-8') as csv_file:
    csv_writer = csv.writer(csv_file)

    # Write header row
    header = ["Name", "Price", "Year", "Mileage", "Fuel Type", "Transmission", "Registered In", "Color", "Assembly", "Engine Capacity", "Body Type", "Features", "Pictures"]
    csv_writer.writerow(header)

    # Write data rows
    for car_info in all_car_info:
        row = [
            car_info["Name"],
            car_info["Price"],
            car_info["Year"],
            car_info["Mileage"],
            car_info["Fuel Type"],
            car_info["Transmission"],
            car_info["Registered In"],
            car_info["Color"],
            car_info["Assembly"],
            car_info["Engine Capacity"],
            car_info["Body Type"],
            ', '.join(car_info["Features"]),  # Assuming features are a list
            ', '.join(car_info["Pictures"])   # Assuming pictures are a list
        ]
        csv_writer.writerow(row)

print(f'CSV File Created: {csv_file_path}')







CSV File Created: AllHavalCarInfo.csv


Hummer Car CSV

In [None]:
import requests
from bs4 import BeautifulSoup
import csv
from urllib.parse import urljoin

def scrape_car_info(url):
    car_info = {
        "Name": None,
        "Price": None,
        "Year": None,
        "Mileage": None,
        "Fuel Type": None,
        "Transmission": None,
        "Registered In": None,
        "Color": None,
        "Assembly": None,
        "Engine Capacity": None,
        "Body Type": None,
        "Features": [],
        "Pictures": []
    }

    try:
        response = requests.get(url)
        if response.status_code == 200:
            soup = BeautifulSoup(response.content, 'html.parser')

            well_parent_h1 = soup.select_one('.well h1')
            if well_parent_h1:
                car_info["Name"] = well_parent_h1.text.strip()

            table_element = soup.find(class_='table table-bordered text-center table-engine-detail fs16')
            if table_element:
                td_tags = table_element.find_all('td')
                if len(td_tags) >= 4:
                    car_info["Year"] = td_tags[0].text.strip()
                    car_info["Mileage"] = td_tags[1].text.strip()
                    car_info["Fuel Type"] = td_tags[2].text.strip()
                    car_info["Transmission"] = td_tags[3].text.strip()

            price_box_element = soup.find(class_='price-box')
            if price_box_element:
                car_info["Price"] = price_box_element.text.strip()

            featured_list = soup.find(class_='list-unstyled ul-featured clearfix')
            car_feature_list = soup.find(class_='list-unstyled car-feature-list nomargin')

            if featured_list:
                featured_li_elements = featured_list.find_all('li')
                car_info["Registered In"] = featured_li_elements[1].text.strip() if len(featured_li_elements) > 1 else None
                car_info["Color"] = featured_li_elements[3].text.strip() if len(featured_li_elements) > 3 else None
                car_info["Assembly"] = featured_li_elements[5].text.strip() if len(featured_li_elements) > 5 else None
                car_info["Engine Capacity"] = featured_li_elements[7].text.strip() if len(featured_li_elements) > 7 else None
                car_info["Body Type"] = featured_li_elements[9].text.strip() if len(featured_li_elements) > 9 else None
                car_info["Features"].extend([li_element.text.strip() for i, li_element in enumerate(featured_li_elements) if i not in [1, 3, 5, 7, 9]])

            if car_feature_list:
                feature_li_elements = car_feature_list.find_all('li')
                car_info["Features"].extend([li_element.text.strip() for li_element in feature_li_elements])

            # Extract image URLs using a pattern in the src attribute
            image_links = []

            container = soup.find('ul', class_='gallery light-gallery list-unstyled cS-hidden')

            if container:
                images = container.find_all('li')

                for image in images:
                    if image.has_attr('data-src'):
                        image_links.append(image['data-src'])

            car_info["Pictures"] = image_links

    except Exception as e:
        print(f"An error occurred while scraping car info: {e}")

    return car_info

def get_hummer_links(base_url, num_pages):
    all_links = []

    try:
        for page in range(1, num_pages + 1):
            url = f'{base_url}?page={page}'

            response = requests.get(url)

            if response.status_code == 200:
                soup = BeautifulSoup(response.text, 'html.parser')

                links = [a['href'] for a in soup.find_all('a', href=True) if 'hummer' in a.get('href', '').lower() and 'for-sale-in' in a.get('href', '').lower()]

                # Ensure that the obtained URLs are complete using urljoin
                base_url_with_scheme = base_url if base_url.startswith(('http://', 'https://')) else 'https://' + base_url
                links = [urljoin(base_url_with_scheme, link) for link in links]

                all_links.extend(links)
            else:
                print(f"Error: Unable to fetch the webpage. Status code: {response.status_code}")
    except Exception as e:
        print(f"An error occurred: {e}")

    return all_links

# Example usage for the first 3 pages
base_url = 'https://www.pakwheels.com/used-cars/search/-/mk_hummer/'
num_pages = 1

hummer_car_links = get_hummer_car_links(base_url, num_pages)

all_car_info = []

for link in hummer_car_links:
    car_info = scrape_car_info(link)
    all_car_info.append(car_info)

# Write the information to a CSV file
csv_file_path = 'AllHummerCarInfo.csv'
with open(csv_file_path, 'w', newline='', encoding='utf-8') as csv_file:
    csv_writer = csv.writer(csv_file)

    # Write header row
    header = ["Name", "Price", "Year", "Mileage", "Fuel Type", "Transmission", "Registered In", "Color", "Assembly", "Engine Capacity", "Body Type", "Features", "Pictures"]
    csv_writer.writerow(header)

    # Write data rows
    for car_info in all_car_info:
        row = [
            car_info["Name"],
            car_info["Price"],
            car_info["Year"],
            car_info["Mileage"],
            car_info["Fuel Type"],
            car_info["Transmission"],
            car_info["Registered In"],
            car_info["Color"],
            car_info["Assembly"],
            car_info["Engine Capacity"],
            car_info["Body Type"],
            ', '.join(car_info["Features"]),  # Assuming features are a list
            ', '.join(car_info["Pictures"])   # Assuming pictures are a list
        ]
        csv_writer.writerow(row)

print(f'CSV File Created: {csv_file_path}')




CSV File Created: AllHummerCarInfo.csv


TOYOTA CAR CSV

In [None]:
import requests
from bs4 import BeautifulSoup
import csv
from urllib.parse import urljoin

def scrape_car_info(url):
    car_info = {
        "Name": None,
        "Price": None,
        "Year": None,
        "Mileage": None,
        "Fuel Type": None,
        "Transmission": None,
        "Registered In": None,
        "Color": None,
        "Assembly": None,
        "Engine Capacity": None,
        "Body Type": None,
        "Features": [],
        "Pictures": []
    }

    try:
        response = requests.get(url)
        if response.status_code == 200:
            soup = BeautifulSoup(response.content, 'html.parser')

            well_parent_h1 = soup.select_one('.well h1')
            if well_parent_h1:
                car_info["Name"] = well_parent_h1.text.strip()

            table_element = soup.find(class_='table table-bordered text-center table-engine-detail fs16')
            if table_element:
                td_tags = table_element.find_all('td')
                if len(td_tags) >= 4:
                    car_info["Year"] = td_tags[0].text.strip()
                    car_info["Mileage"] = td_tags[1].text.strip()
                    car_info["Fuel Type"] = td_tags[2].text.strip()
                    car_info["Transmission"] = td_tags[3].text.strip()

            price_box_element = soup.find(class_='price-box')
            if price_box_element:
                car_info["Price"] = price_box_element.text.strip()

            featured_list = soup.find(class_='list-unstyled ul-featured clearfix')
            car_feature_list = soup.find(class_='list-unstyled car-feature-list nomargin')

            if featured_list:
                featured_li_elements = featured_list.find_all('li')
                car_info["Registered In"] = featured_li_elements[1].text.strip() if len(featured_li_elements) > 1 else None
                car_info["Color"] = featured_li_elements[3].text.strip() if len(featured_li_elements) > 3 else None
                car_info["Assembly"] = featured_li_elements[5].text.strip() if len(featured_li_elements) > 5 else None
                car_info["Engine Capacity"] = featured_li_elements[7].text.strip() if len(featured_li_elements) > 7 else None
                car_info["Body Type"] = featured_li_elements[9].text.strip() if len(featured_li_elements) > 9 else None
                car_info["Features"].extend([li_element.text.strip() for i, li_element in enumerate(featured_li_elements) if i not in [1, 3, 5, 7, 9]])

            if car_feature_list:
                feature_li_elements = car_feature_list.find_all('li')
                car_info["Features"].extend([li_element.text.strip() for li_element in feature_li_elements])

            # Extract image URLs using a pattern in the src attribute
            image_links = []

            container = soup.find('ul', class_='gallery light-gallery list-unstyled cS-hidden')

            if container:
                images = container.find_all('li')

                for image in images:
                    if image.has_attr('data-src'):
                        image_links.append(image['data-src'])

            car_info["Pictures"] = image_links

    except Exception as e:
        print(f"An error occurred while scraping car info: {e}")

    return car_info

def get_toyota_car_links(base_url, num_pages):
    all_links = []

    try:
        for page in range(1, num_pages + 1):
            url = f'{base_url}?page={page}'

            response = requests.get(url)

            if response.status_code == 200:
                soup = BeautifulSoup(response.text, 'html.parser')

                links = [a['href'] for a in soup.find_all('a', href=True) if 'toyota' in a.get('href', '').lower() and 'for-sale-in' in a.get('href', '').lower()]

                # Ensure that the obtained URLs are complete using urljoin
                base_url_with_scheme = base_url if base_url.startswith(('http://', 'https://')) else 'https://' + base_url
                links = [urljoin(base_url_with_scheme, link) for link in links]

                all_links.extend(links)
            else:
                print(f"Error: Unable to fetch the webpage. Status code: {response.status_code}")
    except Exception as e:
        print(f"An error occurred: {e}")

    return all_links


# Example usage for the first 3 pages
base_url = 'https://www.pakwheels.com/used-cars/toyota/33'
num_pages = 760

toyota_car_links = get_toyota_car_links(base_url, num_pages)

all_car_info = []

for link in toyota_car_links:
    car_info = scrape_car_info(link)
    all_car_info.append(car_info)

# Write the information to a CSV file
csv_file_path = 'AllToyotaCarInfo.csv'
with open(csv_file_path, 'w', newline='', encoding='utf-8') as csv_file:
    csv_writer = csv.writer(csv_file)

    # Write header row
    header = ["Name", "Price", "Year", "Mileage", "Fuel Type", "Transmission", "Registered In", "Color", "Assembly", "Engine Capacity", "Body Type", "Features", "Pictures"]
    csv_writer.writerow(header)

    # Write data rows
    for car_info in all_car_info:
        row = [
            car_info["Name"],
            car_info["Price"],
            car_info["Year"],
            car_info["Mileage"],
            car_info["Fuel Type"],
            car_info["Transmission"],
            car_info["Registered In"],
            car_info["Color"],
            car_info["Assembly"],
            car_info["Engine Capacity"],
            car_info["Body Type"],
            ', '.join(car_info["Features"]),  # Assuming features are a list
            ', '.join(car_info["Pictures"])   # Assuming pictures are a list
        ]
        csv_writer.writerow(row)

print(f'CSV File Created: {csv_file_path}')

SUZUKI CARS CSV

In [None]:
import requests
from bs4 import BeautifulSoup
import csv
from urllib.parse import urljoin

def scrape_car_info(url):
    car_info = {
        "Name": None,
        "Price": None,
        "Year": None,
        "Mileage": None,
        "Fuel Type": None,
        "Transmission": None,
        "Registered In": None,
        "Color": None,
        "Assembly": None,
        "Engine Capacity": None,
        "Body Type": None,
        "Features": [],
        "Pictures": []
    }

    try:
        response = requests.get(url)
        if response.status_code == 200:
            soup = BeautifulSoup(response.content, 'html.parser')

            well_parent_h1 = soup.select_one('.well h1')
            if well_parent_h1:
                car_info["Name"] = well_parent_h1.text.strip()

            table_element = soup.find(class_='table table-bordered text-center table-engine-detail fs16')
            if table_element:
                td_tags = table_element.find_all('td')
                if len(td_tags) >= 4:
                    car_info["Year"] = td_tags[0].text.strip()
                    car_info["Mileage"] = td_tags[1].text.strip()
                    car_info["Fuel Type"] = td_tags[2].text.strip()
                    car_info["Transmission"] = td_tags[3].text.strip()

            price_box_element = soup.find(class_='price-box')
            if price_box_element:
                car_info["Price"] = price_box_element.text.strip()

            featured_list = soup.find(class_='list-unstyled ul-featured clearfix')
            car_feature_list = soup.find(class_='list-unstyled car-feature-list nomargin')

            if featured_list:
                featured_li_elements = featured_list.find_all('li')
                car_info["Registered In"] = featured_li_elements[1].text.strip() if len(featured_li_elements) > 1 else None
                car_info["Color"] = featured_li_elements[3].text.strip() if len(featured_li_elements) > 3 else None
                car_info["Assembly"] = featured_li_elements[5].text.strip() if len(featured_li_elements) > 5 else None
                car_info["Engine Capacity"] = featured_li_elements[7].text.strip() if len(featured_li_elements) > 7 else None
                car_info["Body Type"] = featured_li_elements[9].text.strip() if len(featured_li_elements) > 9 else None
                car_info["Features"].extend([li_element.text.strip() for i, li_element in enumerate(featured_li_elements) if i not in [1, 3, 5, 7, 9]])

            if car_feature_list:
                feature_li_elements = car_feature_list.find_all('li')
                car_info["Features"].extend([li_element.text.strip() for li_element in feature_li_elements])

            # Extract image URLs using a pattern in the src attribute
            image_links = []

            container = soup.find('ul', class_='gallery light-gallery list-unstyled cS-hidden')

            if container:
                images = container.find_all('li')

                for image in images:
                    if image.has_attr('data-src'):
                        image_links.append(image['data-src'])

            car_info["Pictures"] = image_links

    except Exception as e:
        print(f"An error occurred while scraping car info: {e}")

    return car_info

def get_suzuki_car_links(base_url, num_pages):
    all_links = []

    try:
        for page in range(1, num_pages + 1):
            url = f'{base_url}?page={page}'

            response = requests.get(url)

            if response.status_code == 200:
                soup = BeautifulSoup(response.text, 'html.parser')

                links = [a['href'] for a in soup.find_all('a', href=True) if 'suzuki' in a.get('href', '').lower() and 'for-sale-in' in a.get('href', '').lower()]

                # Ensure that the obtained URLs are complete using urljoin
                base_url_with_scheme = base_url if base_url.startswith(('http://', 'https://')) else 'https://' + base_url
                links = [urljoin(base_url_with_scheme, link) for link in links]

                all_links.extend(links)
            else:
                print(f"Error: Unable to fetch the webpage. Status code: {response.status_code}")
    except Exception as e:
        print(f"An error occurred: {e}")

    return all_links


# Example usage for the first 3 pages
base_url = 'https://www.pakwheels.com/used-cars/suzuki/32'
num_pages = 572

suzuki_car_links = get_suzuki_car_links(base_url, num_pages)

all_car_info = []

for link in suzuki_car_links:
    car_info = scrape_car_info(link)
    all_car_info.append(car_info)

# Write the information to a CSV file
csv_file_path = 'AllSuzukiCarInfo.csv'
with open(csv_file_path, 'w', newline='', encoding='utf-8') as csv_file:
    csv_writer = csv.writer(csv_file)

    # Write header row
    header = ["Name", "Price", "Year", "Mileage", "Fuel Type", "Transmission", "Registered In", "Color", "Assembly", "Engine Capacity", "Body Type", "Features", "Pictures"]
    csv_writer.writerow(header)

    # Write data rows
    for car_info in all_car_info:
        row = [
            car_info["Name"],
            car_info["Price"],
            car_info["Year"],
            car_info["Mileage"],
            car_info["Fuel Type"],
            car_info["Transmission"],
            car_info["Registered In"],
            car_info["Color"],
            car_info["Assembly"],
            car_info["Engine Capacity"],
            car_info["Body Type"],
            ', '.join(car_info["Features"]),  # Assuming features are a list
            ', '.join(car_info["Pictures"])   # Assuming pictures are a list
        ]
        csv_writer.writerow(row)

print(f'CSV File Created: {csv_file_path}')

ALL OTHER CARS CSV

In [None]:
import requests
from bs4 import BeautifulSoup
import csv
from urllib.parse import urljoin

def scrape_car_info(url):
    car_info = {
        "Name": None,
        "Price": None,
        "Year": None,
        "Mileage": None,
        "Fuel Type": None,
        "Transmission": None,
        "Registered In": None,
        "Color": None,
        "Assembly": None,
        "Engine Capacity": None,
        "Body Type": None,
        "Features": [],
        "Pictures": []
    }

    try:
        response = requests.get(url)
        if response.status_code == 200:
            soup = BeautifulSoup(response.content, 'html.parser')

            well_parent_h1 = soup.select_one('.well h1')
            if well_parent_h1:
                car_info["Name"] = well_parent_h1.text.strip()

            table_element = soup.find(class_='table table-bordered text-center table-engine-detail fs16')
            if table_element:
                td_tags = table_element.find_all('td')
                if len(td_tags) >= 4:
                    car_info["Year"] = td_tags[0].text.strip()
                    car_info["Mileage"] = td_tags[1].text.strip()
                    car_info["Fuel Type"] = td_tags[2].text.strip()
                    car_info["Transmission"] = td_tags[3].text.strip()

            price_box_element = soup.find(class_='price-box')
            if price_box_element:
                car_info["Price"] = price_box_element.text.strip()

            featured_list = soup.find(class_='list-unstyled ul-featured clearfix')
            car_feature_list = soup.find(class_='list-unstyled car-feature-list nomargin')

            if featured_list:
                featured_li_elements = featured_list.find_all('li')
                car_info["Registered In"] = featured_li_elements[1].text.strip() if len(featured_li_elements) > 1 else None
                car_info["Color"] = featured_li_elements[3].text.strip() if len(featured_li_elements) > 3 else None
                car_info["Assembly"] = featured_li_elements[5].text.strip() if len(featured_li_elements) > 5 else None
                car_info["Engine Capacity"] = featured_li_elements[7].text.strip() if len(featured_li_elements) > 7 else None
                car_info["Body Type"] = featured_li_elements[9].text.strip() if len(featured_li_elements) > 9 else None
                car_info["Features"].extend([li_element.text.strip() for i, li_element in enumerate(featured_li_elements) if i not in [1, 3, 5, 7, 9]])

            if car_feature_list:
                feature_li_elements = car_feature_list.find_all('li')
                car_info["Features"].extend([li_element.text.strip() for li_element in feature_li_elements])

            # Extract image URLs using a pattern in the src attribute
            image_links = []

            container = soup.find('ul', class_='gallery light-gallery list-unstyled cS-hidden')

            if container:
                images = container.find_all('li')

                for image in images:
                    if image.has_attr('data-src'):
                        image_links.append(image['data-src'])

            car_info["Pictures"] = image_links

    except Exception as e:
        print(f"An error occurred while scraping car info: {e}")

    return car_info

def get_other_car_links(base_url, num_pages):
    all_links = []

    try:
        for page in range(1, num_pages + 1):
            url = f'{base_url}?page={page}'

            response = requests.get(url)

            if response.status_code == 200:
                soup = BeautifulSoup(response.text, 'html.parser')

                links = [a['href'] for a in soup.find_all('a', href=True) if any(brand.lower() in a.get('href', '').lower() for brand in ['Isuzu', 'JMC', 'Jaguar', 'KIA', 'Lexus', 'MINI', 'Mazda', 'Mitsubishi', 'Porsche', 'Prince', 'Proton', 'Range Rover', 'Rinco', 'Seres', 'Sogo', 'SsangYong', 'Subaru', 'Tesla', 'United', 'Volkswagen', 'Volvo', 'Willys', 'ZOTYE', 'Alfa Romeo', 'BAIC', 'Bentley', 'Cadillac', 'Chery', 'Chrysler', 'DFSK', 'Daewoo', 'Fiat', 'GMC', 'Genesis', 'Hino', 'Hyundai', 'JAC', 'JW Forland', 'Jeep', 'Land Rover', 'MG', 'Master', 'Mercedes Benz', 'Mushtaq', 'ORA', 'Peugeot', 'Power']) and 'for-sale-in' in a.get('href', '').lower()]


                # Ensure that the obtained URLs are complete using urljoin
                base_url_with_scheme = base_url if base_url.startswith(('http://', 'https://')) else 'https://' + base_url
                links = [urljoin(base_url_with_scheme, link) for link in links]

                all_links.extend(links)
            else:
                print(f"Error: Unable to fetch the webpage. Status code: {response.status_code}")
    except Exception as e:
        print(f"An error occurred: {e}")

    return all_links


# Example usage for the first 3 pages
base_url = 'https://www.pakwheels.com/used-cars/search/-/mk_alfa-romeo/mk_baic/mk_bentley/mk_cadillac/mk_chery/mk_chrysler/mk_dfsk/mk_daewoo/mk_dodge/mk_fiat/mk_gmc/mk_genesis/mk_hino/mk_hyundai/mk_isuzu/mk_jac/mk_jmc/mk_jw-forland/mk_jaguar/mk_jeep/mk_kia/mk_land-rover/mk_lexus/mk_mg/mk_mini/mk_master/mk_mazda/mk_mercedes-benz/mk_mitsubishi/mk_mushtaq/mk_ora/mk_others/mk_peugeot/mk_porsche/mk_power/mk_prince/mk_proton/mk_range-rover/mk_rinco/mk_seres/mk_sogo/mk_ssangyong/mk_subaru/mk_tesla/mk_united/mk_volkswagen/mk_volvo/mk_willys/mk_zotye/'
num_pages = 279

other_car_links = get_other_car_links(base_url, num_pages)

all_car_info = []

for link in other_car_links:
    car_info = scrape_car_info(link)
    all_car_info.append(car_info)

# Write the information to a CSV file
csv_file_path = 'AllOtherCarInfo.csv'
with open(csv_file_path, 'w', newline='', encoding='utf-8') as csv_file:
    csv_writer = csv.writer(csv_file)

    # Write header row
    header = ["Name", "Price", "Year", "Mileage", "Fuel Type", "Transmission", "Registered In", "Color", "Assembly", "Engine Capacity", "Body Type", "Features", "Pictures"]
    csv_writer.writerow(header)

    # Write data rows
    for car_info in all_car_info:
        row = [
            car_info["Name"],
            car_info["Price"],
            car_info["Year"],
            car_info["Mileage"],
            car_info["Fuel Type"],
            car_info["Transmission"],
            car_info["Registered In"],
            car_info["Color"],
            car_info["Assembly"],
            car_info["Engine Capacity"],
            car_info["Body Type"],
            ', '.join(car_info["Features"]),  # Assuming features are a list
            ', '.join(car_info["Pictures"])   # Assuming pictures are a list
        ]
        csv_writer.writerow(row)

print(f'CSV File Created: {csv_file_path}')

HONDA CARS CSV

In [None]:
import requests
from bs4 import BeautifulSoup
import csv
from urllib.parse import urljoin

def scrape_car_info(url):
    car_info = {
        "Name": None,
        "Price": None,
        "Year": None,
        "Mileage": None,
        "Fuel Type": None,
        "Transmission": None,
        "Registered In": None,
        "Color": None,
        "Assembly": None,
        "Engine Capacity": None,
        "Body Type": None,
        "Features": [],
        "Pictures": []
    }

    try:
        response = requests.get(url)
        if response.status_code == 200:
            soup = BeautifulSoup(response.content, 'html.parser')

            well_parent_h1 = soup.select_one('.well h1')
            if well_parent_h1:
                car_info["Name"] = well_parent_h1.text.strip()

            table_element = soup.find(class_='table table-bordered text-center table-engine-detail fs16')
            if table_element:
                td_tags = table_element.find_all('td')
                if len(td_tags) >= 4:
                    car_info["Year"] = td_tags[0].text.strip()
                    car_info["Mileage"] = td_tags[1].text.strip()
                    car_info["Fuel Type"] = td_tags[2].text.strip()
                    car_info["Transmission"] = td_tags[3].text.strip()

            price_box_element = soup.find(class_='price-box')
            if price_box_element:
                car_info["Price"] = price_box_element.text.strip()

            featured_list = soup.find(class_='list-unstyled ul-featured clearfix')
            car_feature_list = soup.find(class_='list-unstyled car-feature-list nomargin')

            if featured_list:
                featured_li_elements = featured_list.find_all('li')
                car_info["Registered In"] = featured_li_elements[1].text.strip() if len(featured_li_elements) > 1 else None
                car_info["Color"] = featured_li_elements[3].text.strip() if len(featured_li_elements) > 3 else None
                car_info["Assembly"] = featured_li_elements[5].text.strip() if len(featured_li_elements) > 5 else None
                car_info["Engine Capacity"] = featured_li_elements[7].text.strip() if len(featured_li_elements) > 7 else None
                car_info["Body Type"] = featured_li_elements[9].text.strip() if len(featured_li_elements) > 9 else None
                car_info["Features"].extend([li_element.text.strip() for i, li_element in enumerate(featured_li_elements) if i not in [1, 3, 5, 7, 9]])

            if car_feature_list:
                feature_li_elements = car_feature_list.find_all('li')
                car_info["Features"].extend([li_element.text.strip() for li_element in feature_li_elements])

            # Extract image URLs using a pattern in the src attribute
            image_links = []

            container = soup.find('ul', class_='gallery light-gallery list-unstyled cS-hidden')

            if container:
                images = container.find_all('li')

                for image in images:
                    if image.has_attr('data-src'):
                        image_links.append(image['data-src'])

            car_info["Pictures"] = image_links

    except Exception as e:
        print(f"An error occurred while scraping car info: {e}")

    return car_info

def get_honda_car_links(base_url, num_pages):
    all_links = []

    try:
        for page in range(1, num_pages + 1):
            url = f'{base_url}?page={page}'

            response = requests.get(url)

            if response.status_code == 200:
                soup = BeautifulSoup(response.text, 'html.parser')

                links = [a['href'] for a in soup.find_all('a', href=True) if 'honda' in a.get('href', '').lower() and 'for-sale-in' in a.get('href', '').lower()]

                # Ensure that the obtained URLs are complete using urljoin
                base_url_with_scheme = base_url if base_url.startswith(('http://', 'https://')) else 'https://' + base_url
                links = [urljoin(base_url_with_scheme, link) for link in links]

                all_links.extend(links)
            else:
                print(f"Error: Unable to fetch the webpage. Status code: {response.status_code}")
    except Exception as e:
        print(f"An error occurred: {e}")

    return all_links


# Example usage for the first 3 pages
base_url = 'https://www.pakwheels.com/used-cars/honda/14'
num_pages = 454

honda_car_links = get_honda_car_links(base_url, num_pages)

all_car_info = []

for link in honda_car_links:
    car_info = scrape_car_info(link)
    all_car_info.append(car_info)

# Write the information to a CSV file
csv_file_path = 'AllHondaCarInfo.csv'
with open(csv_file_path, 'w', newline='', encoding='utf-8') as csv_file:
    csv_writer = csv.writer(csv_file)

    # Write header row
    header = ["Name", "Price", "Year", "Mileage", "Fuel Type", "Transmission", "Registered In", "Color", "Assembly", "Engine Capacity", "Body Type", "Features", "Pictures"]
    csv_writer.writerow(header)

    # Write data rows
    for car_info in all_car_info:
        row = [
            car_info["Name"],
            car_info["Price"],
            car_info["Year"],
            car_info["Mileage"],
            car_info["Fuel Type"],
            car_info["Transmission"],
            car_info["Registered In"],
            car_info["Color"],
            car_info["Assembly"],
            car_info["Engine Capacity"],
            car_info["Body Type"],
            ', '.join(car_info["Features"]),  # Assuming features are a list
            ', '.join(car_info["Pictures"])   # Assuming pictures are a list
        ]
        csv_writer.writerow(row)

print(f'CSV File Created: {csv_file_path}')