In this notebook I present a code to convert multiple PDF files in image. The code includes a part that deals with PDF
files that ask for pasword so all other PDF files are converted and the one that requires password are listed in a .txt file.

# Load packages

In [1]:
from pdf2image import convert_from_path
from pdf2image.exceptions import PDFInfoNotInstalledError, PDFPageCountError, PDFSyntaxError
import glob
import time

TodaysDate = time.strftime("%Y-%m-%d")

# Function

In [2]:
def convert_pdf2image(pdf_path, image_path, dpi):
    """ 
    Converts a multi-page pdf file to image file given the path of the pdf file (pdf_path) and save images in image_path.
    
    Input:
        pdf_path: path to PDF file to be converted in image
        image_path: path to folder where resulting image will be saved
        dpi: chosen resolution
    """
    # convert pdf to image
    images = convert_from_path(pdf_path, dpi, jpegopt = 'optimize', fmt = 'jpge')
    # Extract name of pdf file to name image similarly
    image_name = pdf_path.split('\\')[-1].split('.')[-2]
    
    # save all images generated from pdf file
    for i, image in enumerate(images):
        fname = image_path + image_name+"_image_" + str(i) + ".jpeg"
        image.save(fname, "jpeg")

# Applying to one image

In [3]:
pdf_path = './data/pdf\Easy_recipes.pdf'
image_path = './data/img/'
dpi = 300
convert_pdf2image(pdf_path, image_path, dpi)

# Converting multiple images in a folder

In [4]:
def convert_multiple_pdf(pdfs_folder, image_path, dpi, filename):
    """ 
    Convert multiple PDF files in a folder to images.
    
    Input:
        pdfs_folder: folder containing PDF files
        image_path: image_path: path to folder where resulting image will be saved
        dpi: chosen resolution
        filename: name of the .txt file that keep name of files that required passwords and could not be converted
        
    """

    # starting counter of files converted
    count = 0
    #list to keep name of pdf files that require password
    password_pdf = []

    for pdf_path in glob.glob(pdfs_folder+'*.pdf'):
        try:
            convert_pdf2image(pdf_path, image_path, dpi)
            count += 1
        except PDFPageCountError:
            password_pdf.append(pdf_path.split('\\')[-1])
            pass    
    
        with open(pdfs_folder+filename+"_"+TodaysDate+".txt", "w") as output:
            output.write(str('\n'.join(password_pdf)))
        
    print(str(len(glob.glob(pdfs_folder+'*.pdf'))-count) + " PDF files with password.")
    print(str(count) + " files converted.")


In [5]:
# folder with multiple pdf files
pdfs_folder = './data/pdf/'

# folder to save the resulting images
image_path = './data/img/'

dpi = 300

filename = "pdfs_require_password"

convert_multiple_pdf(pdfs_folder, image_path, dpi, filename)


1 PDF files with password.
3 files converted.
