**************************************************
Recorre una estructura de carpetas con subcarpetas, calcula estadísticas básicas y las escribe en un archivo texto

Ref: chatGPT 4
**************************************************

Carga los paquetes de apoyo

In [34]:
import os
from datetime import datetime
from PIL import Image, UnidentifiedImageError
import warnings
from PIL.Image import DecompressionBombWarning

Función para obtener estadísticas de las imágenes (cantidad de imágenes, tamaño en MB, fechas representadas) en directorios por año

In [35]:
def get_jpeg_stats(directory):
    total_images = 0
    total_size = 0
    image_dates = []

    for root, dirs, files in os.walk(directory):
        for file in files:
            if file.lower().endswith(('.jpg', '.jpeg')):
                total_images += 1
                file_path = os.path.join(root, file)
                total_size += os.path.getsize(file_path)

                # Get image date
                try:
                    with warnings.catch_warnings(record=True) as caught_warnings:
                        warnings.simplefilter("always", UserWarning)
                        warnings.simplefilter("error", DecompressionBombWarning)
                        try:
                            image = Image.open(file_path)
                        except DecompressionBombWarning as e:
                            print(f"Skipping image due to warning: {file_path} - {e}")
                            continue

                        for w in caught_warnings:
                            if "Truncated File Read" in str(w.message):
                                print(f"Skipping image due to warning: {file_path} - {w.message}")
                                continue

                    exif_data = image._getexif()
                    if exif_data and 36867 in exif_data:
                        date_string = exif_data[36867]
                        try:
                            date_object = datetime.strptime(date_string, '%Y:%m:%d %H:%M:%S')
                            image_dates.append(date_object.date())
                        except ValueError:
                            print(f"Skipping image with invalid date: {file_path}")
                except UnidentifiedImageError:
                    print(f"Skipping unidentified image file: {file_path}")

    total_size_mb = total_size / (1024 * 1024)
    unique_image_dates = sorted(list(set(image_dates)))
    return total_images, total_size_mb, unique_image_dates

Función para escribir resultados en archivo texto

In [36]:
def write_results_to_file(output_file, directory, total_images, total_size_mb, image_dates):
    with open(output_file, 'w') as f:
        f.write(f"Directorio: {directory}\n")
        f.write(f"Total imágenes JPEG: {total_images}\n")
        f.write(f"Total tamaño en MB: {total_size_mb:.2f}\n")
        f.write("Fechas únicas de imagen:\n")
        for date in image_dates:
            f.write(f"{date}\n")

Procesar un solo año

In [32]:
directory = '/media/dd_fotos/multimedios/f/zArch/_2006/'
output_file = 'salida_2006.txt'
total_images, total_size_mb, unique_image_dates = get_jpeg_stats(directory)
write_results_to_file(output_file, directory, total_images, total_size_mb, unique_image_dates)

Skipping unidentified image file: /media/dd_fotos/multimedios/f/zArch/_2006/2006-01-21 a/altaRes/IMG_1229.JPG
Skipping unidentified image file: /media/dd_fotos/multimedios/f/zArch/_2006/2006-02-11 a/IMG_1838.JPG
Skipping unidentified image file: /media/dd_fotos/multimedios/f/zArch/_2006/2006-02-11 a/IMG_1838p.jpg
Skipping unidentified image file: /media/dd_fotos/multimedios/f/zArch/_2006/2006-02-11 a/IMG_1839.JPG
Skipping unidentified image file: /media/dd_fotos/multimedios/f/zArch/_2006/2006-02-11 a/IMG_1839p.jpg
Skipping unidentified image file: /media/dd_fotos/multimedios/f/zArch/_2006/2006-02-11 a/IMG_1840.JPG
Skipping unidentified image file: /media/dd_fotos/multimedios/f/zArch/_2006/2006-02-11 a/IMG_1840p.jpg
Skipping unidentified image file: /media/dd_fotos/multimedios/f/zArch/_2006/2006-02-11 a/IMG_1841.JPG
Skipping unidentified image file: /media/dd_fotos/multimedios/f/zArch/_2006/2006-02-11 a/IMG_1841p.jpg
Skipping unidentified image file: /media/dd_fotos/multimedios/f/zArch/



Skipping unidentified image file: /media/dd_fotos/multimedios/f/zArch/_2006/zOtros/dulia/DSCN0063.JPG
Skipping unidentified image file: /media/dd_fotos/multimedios/f/zArch/_2006/zOtros/dulia/DSCN0064.JPG
Skipping unidentified image file: /media/dd_fotos/multimedios/f/zArch/_2006/zOtros/dulia/DSCN0065.JPG
Skipping unidentified image file: /media/dd_fotos/multimedios/f/zArch/_2006/zOtros/dulia/DSCN0066.JPG
Skipping unidentified image file: /media/dd_fotos/multimedios/f/zArch/_2006/zOtros/dulia/DSCN0067.JPG
Skipping unidentified image file: /media/dd_fotos/multimedios/f/zArch/_2006/zOtros/dulia/DSCN0072.JPG
Skipping unidentified image file: /media/dd_fotos/multimedios/f/zArch/_2006/zOtros/dulia/DSCN0072m.jpg
Skipping unidentified image file: /media/dd_fotos/multimedios/f/zArch/_2006/zOtros/dulia/DSCN0073.JPG
Skipping unidentified image file: /media/dd_fotos/multimedios/f/zArch/_2006/zOtros/dulia/DSCN0073m.jpg
Skipping unidentified image file: /media/dd_fotos/multimedios/f/zArch/_2006/zOtr

Procesar un rango de años (entre 2001 y 2022, incluído)

In [37]:
base_path = "/media/dd_fotos/multimedios/f/zArch/_"
years = range(2001, 2023)

nombre_base = "salida_"
ext_base = ".txt"

for year in years:
    directory_path = f"{base_path}{year}"
    print(directory_path)
    output_file = f"{nombre_base}{year}{ext_base}"
    total_images, total_size_mb, unique_image_dates = get_jpeg_stats(directory_path)
    write_results_to_file(output_file, directory_path, total_images, total_size_mb, unique_image_dates)


/media/dd_fotos/multimedios/f/zArch/_2001
Skipping image with invalid date: /media/dd_fotos/multimedios/f/zArch/_2001/PC200004.JPG
Skipping image with invalid date: /media/dd_fotos/multimedios/f/zArch/_2001/PC180002.JPG
Skipping image with invalid date: /media/dd_fotos/multimedios/f/zArch/_2001/PC180003.JPG
Skipping image with invalid date: /media/dd_fotos/multimedios/f/zArch/_2001/PC180006.JPG
Skipping image with invalid date: /media/dd_fotos/multimedios/f/zArch/_2001/PC180008.JPG
Skipping image with invalid date: /media/dd_fotos/multimedios/f/zArch/_2001/PC200001.JPG
Skipping image with invalid date: /media/dd_fotos/multimedios/f/zArch/_2001/PC200002.JPG
Skipping image with invalid date: /media/dd_fotos/multimedios/f/zArch/_2001/PC200003.JPG
Skipping image with invalid date: /media/dd_fotos/multimedios/f/zArch/_2001/PC200005.JPG
Skipping image with invalid date: /media/dd_fotos/multimedios/f/zArch/_2001/PC200007.JPG
Skipping image with invalid date: /media/dd_fotos/multimedios/f/zArc



Skipping image with invalid date: /media/dd_fotos/multimedios/f/zArch/_2007/2007-12-11 b/altaRes/IMG_3464.JPG




Skipping image with invalid date: /media/dd_fotos/multimedios/f/zArch/_2007/2007-12-11 b/altaRes/IMG_3517.JPG
Skipping unidentified image file: /media/dd_fotos/multimedios/f/zArch/_2007/2007-12-11 b/altaRes/IMG_3520.JPG
Skipping unidentified image file: /media/dd_fotos/multimedios/f/zArch/_2007/2007-12-11 b/altaRes/IMG_3521.JPG
Skipping unidentified image file: /media/dd_fotos/multimedios/f/zArch/_2007/2007-12-11 b/altaRes/IMG_3522.JPG
Skipping unidentified image file: /media/dd_fotos/multimedios/f/zArch/_2007/2007-12-11 b/altaRes/IMG_3549.JPG
/media/dd_fotos/multimedios/f/zArch/_2008
/media/dd_fotos/multimedios/f/zArch/_2009
/media/dd_fotos/multimedios/f/zArch/_2010
Skipping unidentified image file: /media/dd_fotos/multimedios/f/zArch/_2010/2010-10-23 b/IMG_9138.JPG
Skipping unidentified image file: /media/dd_fotos/multimedios/f/zArch/_2010/2010-10-23 b/IMG_9167.JPG
/media/dd_fotos/multimedios/f/zArch/_2011
Skipping unidentified image file: /media/dd_fotos/multimedios/f/zArch/_2011/20