# Convert images into pdf file

---

## Import modules

`%pip freeze > requirements.txt`

In [54]:
# Image manipulation
from PIL import Image

# Directory management
import os

# Regular expression
import re

# Ceiling
import math

## Parameter(s)

In [55]:
# Main directory
main_dir = '../data/101 Ways to End the Clone Saga/101 Ways to End the Clone Saga Full'
# List of directory
all_directory = [x[0] for x in os.walk(main_dir)]
# Select one directory
main_subdir = all_directory[0]

## Custom function(s)

In [56]:
# Order string based on number (https://stackoverflow.com/questions/5967500/how-to-correctly-sort-a-string-with-a-number-inside)

def atof(text):
    try:
        retval = float(text)
    except ValueError:
        retval = text
    return retval

def natural_keys(text):
    '''
    alist.sort(key=natural_keys) sorts in human order
    http://nedbatchelder.com/blog/200712/human_sorting.html
    (See Toothy's implementation in the comments)
    float regex comes from https://stackoverflow.com/a/12643073/190597
    '''
    return [ atof(c) for c in re.split(r'[+-]?([0-9]+(?:[.][0-9]*)?|[.][0-9]+)', text) ]

alist = [
    'something1',
    'something2',
    'something1.0',
    'something1.25',
    'something1.105'
]

alist.sort(key = natural_keys)
print(alist)

['something1', 'something1.0', 'something1.105', 'something1.25', 'something2']


In [57]:
# Function to compress a PDF file
def compress_pdf(input_pdf, output_pdf):
    try:
        pdf_file = open(input_pdf, 'rb')
        pdf_reader = PyPDF2.PdfReader(pdf_file)
        
        pdf_writer = PyPDF2.PdfFileWriter()
        
        for page_num in range(pdf_reader.numPages):
            page = pdf_reader.getPage(page_num)
            page.compressContentStreams()
            pdf_writer.addPage(page)
        
        with open(output_pdf, 'wb') as output_file:
            pdf_writer.write(output_file)
        
        print(f'PDF compression complete. Saved as "{output_pdf}"')

    except Exception as e:
        print(f'An error occurred: {str(e)}')

In [58]:
# Output PDF file name
pdf_file = main_dir + '/' + os.path.split(main_subdir)[-1] + '.pdf'

## List of images

In [59]:
# List of images
list_images = os.listdir(path = main_subdir)

In [60]:
# Sort filename
list_images.sort(key = natural_keys)

In [61]:
# List of images
list_images

['1WtEtCS-0-0.jpg',
 '1WtEtCS-0-1.jpg',
 '1WtEtCS-0-2.jpg',
 '1WtEtCS-0-3.jpg',
 '1WtEtCS-0-4.jpg',
 '1WtEtCS-0-5.jpg',
 '1WtEtCS-0-6.jpg',
 '1WtEtCS-0-7.jpg',
 '1WtEtCS-0-8.jpg',
 '1WtEtCS-0-9.jpg',
 '1WtEtCS-0-10.jpg',
 '1WtEtCS-0-11.jpg',
 '1WtEtCS-0-12.jpg',
 '1WtEtCS-0-13.jpg',
 '1WtEtCS-0-14.jpg',
 '1WtEtCS-0-15.jpg',
 '1WtEtCS-0-16.jpg',
 '1WtEtCS-0-17.jpg',
 '1WtEtCS-0-18.jpg',
 '1WtEtCS-0-19.jpg',
 '1WtEtCS-0-20.jpg',
 '1WtEtCS-0-21.jpg',
 '1WtEtCS-0-22.jpg',
 '1WtEtCS-0-23.jpg',
 '1WtEtCS-0-24.jpg',
 '1WtEtCS-0-25.jpg',
 '1WtEtCS-0-26.jpg',
 '1WtEtCS-0-27.jpg',
 '1WtEtCS-0-28.jpg',
 '1WtEtCS-0-29.jpg',
 '1WtEtCS-0-30.jpg',
 '1WtEtCS-0-31.jpg',
 '1WtEtCS-0-32.jpg',
 '1WtEtCS-0-33.jpg']

## Open and resize images

In [62]:
# Height and width
l_height = []
l_width = []

# Loop
for image_path in list_images:
    # Open image
    img = Image.open(os.path.join(main_subdir, image_path))
    l_height.append(img.height)
    l_width.append(img.width)

In [63]:
# Minimum width and weight
print('Minimum width:', min(l_width))
print('Minimum weight:', min(l_height))

Minimum width: 962
Minimum weight: 1464


In [64]:
# Optimal size
opt_size = (l_height[0], l_width[0])
# opt_size = (1600, 1250)

In [65]:
# Images
images = []

# Loop
for image_path in list_images:
    # Open image
    img = Image.open(os.path.join(main_subdir, image_path))#.convert('L')
    # Image size
    h, w = img.height, img.width
    # Resize image
    if (h == opt_size[0]):
        if (w >= opt_size[1]):
            resize_img = img
        elif (w < opt_size[1]):
            resize_img = img.resize((opt_size[1], math.ceil(h * (opt_size[1]/w))))
    elif (h > opt_size[0]):
        resize_img = img.resize((math.ceil(w * (opt_size[0]/h)), opt_size[0]))
    else:
        resize_img = img.resize((math.ceil(w * (opt_size[0]/h)), opt_size[0]))
        if resize_img.width < opt_size[1]:
            t = opt_size[1] * (opt_size[0]/h) / resize_img.height
            resize_img = resize_img.resize((opt_size[1], math.ceil(h * (opt_size[1]/w))))
    # Crop image
    crop_img = resize_img.crop((0, 0, opt_size[1], opt_size[0]))
    # Add image into list
    images.append(crop_img)

## Convert JPG to PDF

In [66]:
# Save the images as a PDF
images[0].save(
    'cimildudu.pdf',
    save_all = True,
    append_images = images[1:],
    resolution = 100,
    quality = 95
)