In [5]:
import os

# List of target image files
target_files = [
    "06_2019_plot_3_See_the_Average_Retail_Selling_Price_3-5_Year-Old_ copy_cropped.png",
    "07_2019_plot_3_See_the_Average_Retail_Selling_Price_3-5_Year-Old_ copy_cropped.png",
    "11_2019_plot_3 copy_cropped.png"
]

# Directory containing the images
directory = r"C:\Users\clint\Desktop\Lifecycle_RA\Data\cropped_sorted"

# Iterate through the target files and display their sizes in MB
for file_name in target_files:
    file_path = os.path.join(directory, file_name)
    if os.path.exists(file_path):
        file_size_bytes = os.path.getsize(file_path)
        file_size_mb = file_size_bytes / (1024 * 1024)  # Convert bytes to MB
        print(f"File: {file_name}, Size: {file_size_bytes} bytes ({file_size_mb:.2f} MB)")
    else:
        print(f"File: {file_name} not found in the directory.")

File: 06_2019_plot_3_See_the_Average_Retail_Selling_Price_3-5_Year-Old_ copy_cropped.png, Size: 1232218 bytes (1.18 MB)
File: 07_2019_plot_3_See_the_Average_Retail_Selling_Price_3-5_Year-Old_ copy_cropped.png, Size: 1313398 bytes (1.25 MB)
File: 11_2019_plot_3 copy_cropped.png, Size: 1195508 bytes (1.14 MB)


In [6]:
import os



# Get all image files in the directory
image_extensions = [".png", ".jpg", ".jpeg", ".gif", ".bmp"]
all_files = os.listdir(directory)
image_files = [file for file in all_files if os.path.splitext(file)[1].lower() in image_extensions]

# Calculate file sizes in MB
file_sizes_mb = []

for file_name in image_files:
    file_path = os.path.join(directory, file_name)
    file_size_bytes = os.path.getsize(file_path)
    file_size_mb = file_size_bytes / (1024 * 1024)  # Convert bytes to MB
    file_sizes_mb.append((file_name, file_size_mb))

# Sort files by size
file_sizes_mb.sort(key=lambda x: x[1])

# Print the range and statistics
if file_sizes_mb:
    smallest_file, min_size = file_sizes_mb[0]
    largest_file, max_size = file_sizes_mb[-1]
    avg_size = sum(size for _, size in file_sizes_mb) / len(file_sizes_mb)
    
    print(f"Total number of image files: {len(file_sizes_mb)}")
    print(f"Size range: {min_size:.2f} MB to {max_size:.2f} MB")
    print(f"Average size: {avg_size:.2f} MB")
    print(f"Smallest file: {smallest_file} ({min_size:.2f} MB)")
    print(f"Largest file: {largest_file} ({max_size:.2f} MB)")
else:
    print("No image files found in the directory.")

Total number of image files: 77
Size range: 0.06 MB to 1.25 MB
Average size: 0.40 MB
Smallest file: 08_2022_1_cropped.png (0.06 MB)
Largest file: 07_2019_plot_3_See_the_Average_Retail_Selling_Price_3-5_Year-Old_ copy_cropped.png (1.25 MB)


In [7]:
from PIL import Image
import os


# Directory to save the compressed images
output_directory = r"C:\Users\clint\Desktop\Lifecycle_RA\Data\cropped_sorted"

# Create the output directory if it doesn't exist
os.makedirs(output_directory, exist_ok=True)

def compress_image(input_path, output_path, quality=85, max_size_mb=1.0):
    """
    Compress an image until it's below the specified max size.
    
    Args:
        input_path: Path to the input image
        output_path: Path to save the compressed image
        quality: Initial quality setting (0-100, lower means more compression)
        max_size_mb: Maximum file size in MB
    """
    # Open the image
    img = Image.open(input_path)
    
    # Convert RGBA to RGB if needed (removing transparency can reduce size)
    if img.mode == 'RGBA':
        img = img.convert('RGB')
    
    # Try different quality settings until the image is small enough
    current_quality = quality
    while current_quality > 10:  # Don't go below quality 10
        img.save(output_path, optimize=True, quality=current_quality)
        
        # Check if the file is now small enough
        size_mb = os.path.getsize(output_path) / (1024 * 1024)
        if size_mb < max_size_mb:
            return size_mb, current_quality
        
        # If not, reduce quality and try again
        current_quality -= 5
    
    # If we get here, even quality 10 didn't make it small enough
    # Try resizing the image
    width, height = img.size
    while width > 300 and height > 300:  # Don't make the image too small
        # Reduce dimensions by 10%
        width = int(width * 0.9)
        height = int(height * 0.9)
        resized_img = img.resize((width, height), Image.LANCZOS)
        resized_img.save(output_path, optimize=True, quality=current_quality)
        
        size_mb = os.path.getsize(output_path) / (1024 * 1024)
        if size_mb < max_size_mb:
            return size_mb, f"Quality: {current_quality}, Size: {width}x{height}"
    
    return os.path.getsize(output_path) / (1024 * 1024), "Minimum quality and size"

# Process each target file
for file_name in target_files:
    input_path = os.path.join(directory, file_name)
    output_path = os.path.join(output_directory, file_name)
    
    if os.path.exists(input_path):
        # Get original file size
        original_size_mb = os.path.getsize(input_path) / (1024 * 1024)
        
        # Compress the image
        new_size_mb, quality_info = compress_image(input_path, output_path)
        
        # Print results
        print(f"File: {file_name}")
        print(f"  Original size: {original_size_mb:.2f} MB")
        print(f"  New size: {new_size_mb:.2f} MB")
        print(f"  Compression info: {quality_info}")
        print(f"  Saved to: {output_path}")
        print("-" * 50)
    else:
        print(f"File: {file_name} not found in the directory.")

File: 06_2019_plot_3_See_the_Average_Retail_Selling_Price_3-5_Year-Old_ copy_cropped.png
  Original size: 1.18 MB
  New size: 0.79 MB
  Compression info: 85
  Saved to: C:\Users\clint\Desktop\Lifecycle_RA\Data\cropped_sorted\06_2019_plot_3_See_the_Average_Retail_Selling_Price_3-5_Year-Old_ copy_cropped.png
--------------------------------------------------
File: 07_2019_plot_3_See_the_Average_Retail_Selling_Price_3-5_Year-Old_ copy_cropped.png
  Original size: 1.25 MB
  New size: 0.83 MB
  Compression info: 85
  Saved to: C:\Users\clint\Desktop\Lifecycle_RA\Data\cropped_sorted\07_2019_plot_3_See_the_Average_Retail_Selling_Price_3-5_Year-Old_ copy_cropped.png
--------------------------------------------------
File: 07_2019_plot_3_See_the_Average_Retail_Selling_Price_3-5_Year-Old_ copy_cropped.png
  Original size: 1.25 MB
  New size: 0.83 MB
  Compression info: 85
  Saved to: C:\Users\clint\Desktop\Lifecycle_RA\Data\cropped_sorted\07_2019_plot_3_See_the_Average_Retail_Selling_Price_3-5_Y