In [21]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.edge.service import Service
from selenium.webdriver.edge.options import Options
import pandas as pd
import time

# Path to msedgedriver.exe
driver_path = "C:/windrivers/msedgedriver.exe"  # Replace with the correct path

# Configure Edge WebDriver
options = Options()
options.add_argument("--headless")  # Optional: Run without showing the browser
options.add_argument("--disable-gpu")
service = Service(driver_path)
driver = webdriver.Edge(service=service, options=options)

# Function to scrape a single page
def scrape_page(url):
    driver.get(url)
    time.sleep(3)  # Wait for the page to load fully

    # Locate all article elements for listings
    listings = driver.find_elements(By.TAG_NAME, "article")

    data = []
    for listing in listings:
        try:
            # Extract title
            title = listing.find_element(By.TAG_NAME, "a").get_attribute("title")
        except:
            title = "N/A"
        
        try:
            # Extract price
            price = listing.find_element(By.CLASS_NAME, "absolute").text
        except:
            price = "N/A"
        
        try:
            # Extract link
            link = listing.find_element(By.TAG_NAME, "a").get_attribute("href")
        except:
            link = "N/A"
        
        try:
            # Extract image URL
            image_url = listing.find_element(By.TAG_NAME, "img").get_attribute("src")
        except:
            image_url = "N/A"
        
        # Append extracted data
        data.append({
            "Title": title,
            "Price": price,
            "Link": link,
            "Image URL": image_url
        })
    
    return data

# Scrape multiple pages
def scrape_multiple_pages(base_url, start_page=1, end_page=5):
    all_data = []
    for page in range(start_page, end_page + 1):
        print(f"Scraping page {page}...")
        url = f"{base_url}?page={page}"
        page_data = scrape_page(url)
        all_data.extend(page_data)
    return all_data

# Scraping the data
base_url = "https://www.tayara.tn/ads/c/Véhicules/"
data = scrape_multiple_pages(base_url, start_page=1, end_page=5)

# Save to a CSV file
df = pd.DataFrame(data)
df.to_csv("tayara_vehicles.csv", index=False, encoding="utf-8-sig")

# Close the WebDriver
driver.quit()

# Preview the data
print(df.head())


Scraping page 1...
Scraping page 2...
Scraping page 3...
Scraping page 4...
Scraping page 5...
  Title Price                                               Link  \
0              https://www.tayara.tn/item/669697bfb2871d2d8db...   
1              https://www.tayara.tn/item/671c7da1fadab5dc3dd...   
2              https://www.tayara.tn/item/66e5394cb3c14db342c...   
3              https://www.tayara.tn/item/671c842799465ac42a6...   
4              https://www.tayara.tn/item/671c7b3499465ac42a6...   

                                           Image URL  
0  https://www.tayara.tn/mediaGateway/resize-imag...  
1  https://www.tayara.tn/mediaGateway/resize-imag...  
2  https://www.tayara.tn/mediaGateway/resize-imag...  
3  https://www.tayara.tn/mediaGateway/resize-imag...  
4  https://www.tayara.tn/mediaGateway/resize-imag...  


In [23]:
df


Unnamed: 0,Title,Price,Link,Image URL
0,,,https://www.tayara.tn/item/669697bfb2871d2d8db...,https://www.tayara.tn/mediaGateway/resize-imag...
1,,,https://www.tayara.tn/item/671c7da1fadab5dc3dd...,https://www.tayara.tn/mediaGateway/resize-imag...
2,,,https://www.tayara.tn/item/66e5394cb3c14db342c...,https://www.tayara.tn/mediaGateway/resize-imag...
3,,,https://www.tayara.tn/item/671c842799465ac42a6...,https://www.tayara.tn/mediaGateway/resize-imag...
4,,,https://www.tayara.tn/item/671c7b3499465ac42a6...,https://www.tayara.tn/mediaGateway/resize-imag...
...,...,...,...,...
187,,,https://www.tayara.tn/item/67472e490d8480a1e01...,https://www.tayara.tn/mediaGateway/resize-imag...
188,,,https://www.tayara.tn/item/67472e1f3c20a99fb55...,https://www.tayara.tn/mediaGateway/resize-imag...
189,,,https://www.tayara.tn/item/67472de33c20a99fb55...,https://www.tayara.tn/mediaGateway/resize-imag...
190,,,https://www.tayara.tn/item/67472cee0d8480a1e01...,https://www.tayara.tn/mediaGateway/resize-imag...


In [25]:
import os
import requests

def download_images_from_csv(df, image_column, output_folder):
    """
    Downloads images from URLs stored in a specified column of a DataFrame.

    Args:
        df (pd.DataFrame): The DataFrame containing the image URLs.
        image_column (str): The column name in the DataFrame that contains image URLs.
        output_folder (str): The folder where the images will be saved.

    Returns:
        None
    """
    # Ensure the output folder exists
    os.makedirs(output_folder, exist_ok=True)

    # Loop through the DataFrame and download images
    for index, row in df.iterrows():
        image_url = row[image_column]
        if image_url != "N/A" and isinstance(image_url, str):  # Skip invalid or missing URLs
            try:
                # Get the image data
                response = requests.get(image_url, stream=True)
                response.raise_for_status()  # Check for errors

                # Save the image
                image_name = f"{index + 1}.jpg"  # Name the images by row index
                image_path = os.path.join(output_folder, image_name)
                with open(image_path, "wb") as file:
                    for chunk in response.iter_content(1024):
                        file.write(chunk)

                print(f"Downloaded: {image_name} from {image_url}")
            except Exception as e:
                print(f"Failed to download {image_url}: {e}")

# Example usage:
# Assuming `df` is your DataFrame containing a column "Image URL"
# download_images_from_csv(df, image_column="Image URL", output_folder="downloaded_images")


In [27]:
download_images_from_csv(df, image_column="Image URL", output_folder="vehicules")


Downloaded: 1.jpg from https://www.tayara.tn/mediaGateway/resize-image?img=b4/b4931c58-7f4a-478c-8b95-01afca1de5f7&w=300
Downloaded: 2.jpg from https://www.tayara.tn/mediaGateway/resize-image?img=9b/9ba09ac3-7659-4714-9deb-49b49a5933ae&w=300
Downloaded: 3.jpg from https://www.tayara.tn/mediaGateway/resize-image?img=49/4987e4d9-61fa-4b62-b1ce-7075bb11ea26&w=300
Downloaded: 4.jpg from https://www.tayara.tn/mediaGateway/resize-image?img=ae/aee8f1da-6293-4fcf-af5b-e282c616a7e3&w=300
Downloaded: 5.jpg from https://www.tayara.tn/mediaGateway/resize-image?img=8b/8b0447e3-2b31-4787-8013-52c0d123c75b&w=300
Downloaded: 6.jpg from https://www.tayara.tn/mediaGateway/resize-image?img=63/63335b9b-0295-4f4a-a1ea-aa405c1f390b&w=300
Downloaded: 7.jpg from https://www.tayara.tn/mediaGateway/resize-image?img=fc/fccb0e34-693f-4fd4-bb98-77d31fbca914&w=100&w=100
Downloaded: 8.jpg from https://www.tayara.tn/mediaGateway/resize-image?img=62/62c2b9d6-371c-4f03-a162-1559be370c04&w=300
Downloaded: 9.jpg from htt