In [14]:
# Import required libraries
import os
import requests
import pandas as pd
import re

# Define the function to download images
def download_images_from_csv_file(csv_file, image_column, output_folder, fixed_width=1920):
    """
    Reads a CSV file containing image URLs, modifies the URL to set `w=` to a specific size,
    and downloads the images.

    Args:
        csv_file (str): Path to the CSV file containing the image URLs.
        image_column (str): The column name in the CSV that contains the image URLs.
        output_folder (str): The folder where the images will be saved.
        fixed_width (int): The desired width for images (e.g., 1920).

    Returns:
        None
    """
    # Read the CSV into a DataFrame
    df = pd.read_csv(csv_file)
    print(f"Loaded {len(df)} rows from {csv_file}")

    # Ensure the output folder exists
    os.makedirs(output_folder, exist_ok=True)

    # Loop through the DataFrame and download images
    for index, row in df.iterrows():
        image_url = row[image_column]
        if image_url != "N/A" and isinstance(image_url, str):  # Skip invalid or missing URLs
            try:
                # Modify the `w=` parameter in the URL to the desired width
                modified_url = re.sub(r"w=\d+", f"w={fixed_width}", image_url)
                print(f"Original URL: {image_url}", flush=True)  # Debugging
                print(f"Modified URL: {modified_url}", flush=True)  # Debugging

                # Get the image data
                response = requests.get(modified_url, stream=True)
                response.raise_for_status()  # Check for errors

                # Save the image
                image_name = f"{index + 1}.jpg"  # Name the images by row index
                image_path = os.path.join(output_folder, image_name)
                with open(image_path, "wb") as file:
                    for chunk in response.iter_content(1024):
                        file.write(chunk)

                print(f"Downloaded: {image_name} from {modified_url}")
            except Exception as e:
                print(f"Failed to download {image_url}: {e}")

# Step 1: Specify the CSV file path
csv_file = "tayara_vehicles.csv"  # Replace with your CSV file path

# Step 2: Specify the column name containing image URLs
image_column = "Image URL"  # Replace with the column name in your CSV

# Step 3: Specify the output folder for downloaded images
output_folder = "downloaded_images"  # Folder to save the images

# Step 4: Call the function to download images
download_images_from_csv_file(csv_file, image_column, output_folder, fixed_width=1920)


Loaded 1542 rows from tayara_vehicles.csv
Original URL: https://www.tayara.tn/mediaGateway/resize-image?img=ee/ee45cdd9-012a-4655-b9e6-c4adbbaeb1f8&w=300
Modified URL: https://www.tayara.tn/mediaGateway/resize-image?img=ee/ee45cdd9-012a-4655-b9e6-c4adbbaeb1f8&w=1920
Downloaded: 1.jpg from https://www.tayara.tn/mediaGateway/resize-image?img=ee/ee45cdd9-012a-4655-b9e6-c4adbbaeb1f8&w=1920
Original URL: https://www.tayara.tn/mediaGateway/resize-image?img=b4/b4931c58-7f4a-478c-8b95-01afca1de5f7&w=300
Modified URL: https://www.tayara.tn/mediaGateway/resize-image?img=b4/b4931c58-7f4a-478c-8b95-01afca1de5f7&w=1920
Downloaded: 2.jpg from https://www.tayara.tn/mediaGateway/resize-image?img=b4/b4931c58-7f4a-478c-8b95-01afca1de5f7&w=1920
Original URL: https://www.tayara.tn/mediaGateway/resize-image?img=9b/9ba09ac3-7659-4714-9deb-49b49a5933ae&w=300
Modified URL: https://www.tayara.tn/mediaGateway/resize-image?img=9b/9ba09ac3-7659-4714-9deb-49b49a5933ae&w=1920
Downloaded: 3.jpg from https://www.taya