In [6]:
import os
import sys
import requests
from PIL import Image
from io import BytesIO
import csv
from tqdm import tqdm

In [7]:
# Ruta al archivo CSV
csv_file_path = 'dataset_images_diffusion.csv'

# Ruta de la carpeta donde se guardarán las imágenes procesadas
output_folder_path = 'dataset/fake'

# Crear la carpeta si no existe
if not os.path.exists(output_folder_path):
    os.makedirs(output_folder_path)

In [8]:
# Función para procesar cada imagen
def process_image(image_url, output_folder):
    try:
        response = requests.get(image_url, stream=True)
        response.raise_for_status()  # Lanza una excepción si la respuesta es un error

        # Recorta y guarda la imagen
        with Image.open(BytesIO(response.content)) as image:
            cropped_image = image.crop((0, 0, image.width, image.height - 50))
            file_name = os.path.basename(image_url)
            save_path = os.path.join(output_folder, file_name)
            cropped_image.save(save_path)

    except requests.exceptions.HTTPError as e:
        print(f"Error HTTP al descargar {image_url}: {e}", file=sys.stderr)

In [9]:
with open(csv_file_path, newline='') as f:
    # Contar el total de líneas para la barra de progreso
    total_lines = sum(1 for line in f)
    f.seek(0)  # Regresar al inicio del archivo
    
    image_urls = csv.reader(f)
    for image_url in tqdm(image_urls, total=total_lines, desc='Descargando imágenes', unit='imagen', file=sys.stdout):
        if image_url:  # Si la fila no está vacía
            process_image(image_url[0], output_folder_path)

Descargando imágenes:   2%|▏         | 16/1000 [00:04<04:23,  3.74imagen/s]

Error HTTP al descargar https://imgcdn.stablediffusionweb.com/2024/4/2/d142fb4a-9024-4e88-8160-98a017067917.jpg: 404 Client Error: Not Found for url: https://imgcdn.stablediffusionweb.com/2024/4/2/d142fb4a-9024-4e88-8160-98a017067917.jpg


Descargando imágenes:  12%|█▏        | 122/1000 [01:27<13:08,  1.11imagen/s]

Error HTTP al descargar https://imgcdn.stablediffusionweb.com/2024/4/2/dcddffbf-b42e-4cf4-866d-82071c5c182c.jpg: 404 Client Error: Not Found for url: https://imgcdn.stablediffusionweb.com/2024/4/2/dcddffbf-b42e-4cf4-866d-82071c5c182c.jpg


Descargando imágenes:  31%|███       | 311/1000 [04:49<10:21,  1.11imagen/s]

Error HTTP al descargar https://imgcdn.stablediffusionweb.com/2024/4/2/6d8e6005-097c-4ab9-83fa-1cb0fe3da56b.jpg: 404 Client Error: Not Found for url: https://imgcdn.stablediffusionweb.com/2024/4/2/6d8e6005-097c-4ab9-83fa-1cb0fe3da56b.jpg


Descargando imágenes:  36%|███▌      | 356/1000 [05:38<10:36,  1.01imagen/s]

Error HTTP al descargar https://imgcdn.stablediffusionweb.com/2024/4/2/15043ff9-939b-4459-aa28-27fe311ea8ec.jpg: 404 Client Error: Not Found for url: https://imgcdn.stablediffusionweb.com/2024/4/2/15043ff9-939b-4459-aa28-27fe311ea8ec.jpg


Descargando imágenes:  38%|███▊      | 380/1000 [06:02<08:36,  1.20imagen/s]

Error HTTP al descargar https://imgcdn.stablediffusionweb.com/2024/4/2/f413d318-f94f-4fa2-b4c1-2fc0055461be.jpg: 404 Client Error: Not Found for url: https://imgcdn.stablediffusionweb.com/2024/4/2/f413d318-f94f-4fa2-b4c1-2fc0055461be.jpg


Descargando imágenes:  61%|██████▏   | 613/1000 [09:57<04:33,  1.42imagen/s]

Error HTTP al descargar https://imgcdn.stablediffusionweb.com/2024/4/2/6a9ce216-fb87-4a97-acac-e56adc23046d.jpg: 404 Client Error: Not Found for url: https://imgcdn.stablediffusionweb.com/2024/4/2/6a9ce216-fb87-4a97-acac-e56adc23046d.jpg


Descargando imágenes:  62%|██████▏   | 623/1000 [10:06<05:25,  1.16imagen/s]

Error HTTP al descargar https://imgcdn.stablediffusionweb.com/2024/4/2/0f52f517-90f0-4ce3-9c01-0bcdf65377c7.jpg: 404 Client Error: Not Found for url: https://imgcdn.stablediffusionweb.com/2024/4/2/0f52f517-90f0-4ce3-9c01-0bcdf65377c7.jpg


Descargando imágenes:  62%|██████▎   | 625/1000 [10:07<04:21,  1.44imagen/s]

Error HTTP al descargar https://imgcdn.stablediffusionweb.com/2024/4/2/5995afbb-d06c-4815-8ac7-f52f9b05316f.jpg: 404 Client Error: Not Found for url: https://imgcdn.stablediffusionweb.com/2024/4/2/5995afbb-d06c-4815-8ac7-f52f9b05316f.jpg


Descargando imágenes: 100%|██████████| 1000/1000 [16:04<00:00,  1.04imagen/s]
