In [2]:
import pandas as pd
import requests
from PIL import Image
import os
from io import BytesIO
import time

def mm_to_pixels(mm, dpi=300):
    """Convert millimeters to pixels at specified DPI"""
    inches = mm / 25.4  # Convert mm to inches
    pixels = int(inches * dpi)
    return pixels

def download_and_resize_images(csv_file_path, output_directory='resized_images', 
                             width_mm=32, height_mm=45, dpi=300, quality=95):
    """
    Download images from URLs in CSV and resize them to specified dimensions in mm.
    
    Parameters:
    csv_file_path (str): Path to the CSV file
    output_directory (str): Directory to save resized images
    width_mm (float): Target width in millimeters
    height_mm (float): Target height in millimeters  
    dpi (int): DPI for conversion (default: 300 for print quality)
    quality (int): JPEG quality (1-100, default: 95)
    """
    
    try:
        # Read the CSV file
        df = pd.read_csv(csv_file_path)
        
        # Find URL column
        url_col = None
        for col in df.columns:
            if 'url' in col.lower():
                url_col = col
                break
        
        # Find sequence number column
        seq_col = None
        for col in df.columns:
            if any(keyword in col.lower() for keyword in ['seq', 'sequence', 'number', 'num', 'id']):
                seq_col = col
                break
        
        if url_col is None:
            print("Error: Could not find URL column. Please ensure your CSV has a column containing 'url' in its name.")
            return
            
        if seq_col is None:
            print("Error: Could not find sequence number column. Please ensure your CSV has a column containing 'seq', 'sequence', 'number', 'num', or 'id' in its name.")
            return
        
        print(f"Using URL column: '{url_col}'")
        print(f"Using sequence column: '{seq_col}'")
        
        # Convert mm to pixels
        target_width = mm_to_pixels(width_mm, dpi)
        target_height = mm_to_pixels(height_mm, dpi)
        
        print(f"Target size: {width_mm}mm x {height_mm}mm ({target_width}px x {target_height}px at {dpi} DPI)")
        
        # Create output directory if it doesn't exist
        if not os.path.exists(output_directory):
            os.makedirs(output_directory)
            print(f"Created directory: {output_directory}")
        
        # Process images
        successful = 0
        failed = 0
        
        # Set up request session for better performance
        session = requests.Session()
        session.headers.update({
            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
        })
        
        for index, row in df.iterrows():
            try:
                url = row[url_col]
                sequence = row[seq_col]
                
                # Skip if URL or sequence is empty/NaN
                if pd.isna(url) or pd.isna(sequence) or str(url).strip() == '' or str(sequence).strip() == '':
                    print(f"Skipping row {index + 1}: Missing URL or sequence number")
                    failed += 1
                    continue
                
                print(f"Processing sequence {sequence}: {url}")
                
                # Download image
                response = session.get(str(url), timeout=30)
                response.raise_for_status()
                
                # Open image from downloaded data
                img = Image.open(BytesIO(response.content))
                
                # Convert to RGB if necessary (handles RGBA, grayscale, etc.)
                if img.mode != 'RGB':
                    img = img.convert('RGB')
                
                # Resize image to target dimensions
                # Using LANCZOS for high-quality resizing
                resized_img = img.resize((target_width, target_height), Image.Resampling.LANCZOS)
                
                # Set DPI information
                resized_img.info['dpi'] = (dpi, dpi)
                
                # Save the resized image
                filename = f"Image-{sequence}.jpg"
                filepath = os.path.join(output_directory, filename)
                resized_img.save(filepath, 'JPEG', quality=quality, dpi=(dpi, dpi))
                
                print(f"✓ Saved: {filename}")
                successful += 1
                
                # Small delay to be respectful to servers
                time.sleep(0.1)
                
            except requests.exceptions.RequestException as e:
                print(f"✗ Download error for sequence {sequence}: {str(e)}")
                failed += 1
                continue
            except Exception as e:
                print(f"✗ Processing error for sequence {sequence}: {str(e)}")
                failed += 1
                continue
        
        print(f"\n" + "="*50)
        print(f"COMPLETED!")
        print(f"Successfully processed: {successful} images")
        print(f"Failed: {failed} images")
        print(f"Images saved to: {output_directory}")
        print(f"Image specifications: {width_mm}mm x {height_mm}mm at {dpi} DPI")
        
    except FileNotFoundError:
        print(f"Error: Could not find the CSV file '{csv_file_path}'. Please check the file path.")
    except Exception as e:
        print(f"Error reading CSV file: {str(e)}")

def preview_first_image(output_directory='resized_images'):
    """Preview the first processed image in the notebook"""
    try:
        from IPython.display import Image as IPImage, display
        
        # Find first image file
        files = [f for f in os.listdir(output_directory) if f.lower().endswith(('.jpg', '.jpeg', '.png'))]
        if files:
            first_file = sorted(files)[0]
            filepath = os.path.join(output_directory, first_file)
            print(f"Preview of: {first_file}")
            display(IPImage(filepath))
        else:
            print("No images found to preview.")
    except ImportError:
        print("IPython display not available. Run this in a Jupyter notebook to see preview.")
    except Exception as e:
        print(f"Preview error: {str(e)}")

# Example usage - modify these parameters as needed
def process_images():
    """
    Main function to process images. Modify parameters as needed.
    """
    
    # MODIFY THESE PARAMETERS
    csv_file_path = "WhatWeWonder_Images.csv"           # Your CSV file with URLs and sequence numbers
    output_directory = "resized_images"         # Output folder
    width_mm = 32                              # Target width in mm
    height_mm = 45                             # Target height in mm
    dpi = 300                                  # DPI (300 for print quality, 150 for web)
    quality = 95                               # JPEG quality (1-100)
    
    print("Image Downloader and Resizer")
    print("=" * 50)
    
    download_and_resize_images(
        csv_file_path=csv_file_path,
        output_directory=output_directory,
        width_mm=width_mm,
        height_mm=height_mm,
        dpi=dpi,
        quality=quality
    )

In [3]:
# Uncomment and run to process images
process_images()

Image Downloader and Resizer
Using URL column: 'imageURL'
Using sequence column: 'id'
Target size: 32mm x 45mm (377px x 531px at 300 DPI)
Created directory: resized_images
Processing sequence 1: https://upload.wikimedia.org/wikipedia/commons/5/54/Confucius_Tang_Dynasty.jpg
✓ Saved: Image-1.jpg
Processing sequence 2: https://upload.wikimedia.org/wikipedia/commons/e/e8/Zhang_Lu-Laozi_Riding_an_Ox.jpg
✓ Saved: Image-2.jpg
Processing sequence 3: https://upload.wikimedia.org/wikipedia/commons/b/b7/Jean-Jacques_Rousseau_%28painted_portrait%29.jpg
✓ Saved: Image-3.jpg
Processing sequence 4: https://upload.wikimedia.org/wikipedia/commons/e/ea/Saint_Augustine_by_Philippe_de_Champaigne.jpg
✓ Saved: Image-4.jpg
Processing sequence 5: https://upload.wikimedia.org/wikipedia/commons/f/ff/Buddha_in_Sarnath_Museum_%28Dhammajak_Mutra%29.jpg
✓ Saved: Image-5.jpg
Processing sequence 6: https://upload.wikimedia.org/wikipedia/commons/2/24/Heraclitus_b_4_compressed.jpg
✓ Saved: Image-6.jpg
Processing sequen