In [None]:
import os
import PyPDF2


def read_pdfs_in_folder(folder_path):
    # Ensure the folder path is valid
    if not os.path.isdir(folder_path):
        raise ValueError(f"The folder path {folder_path} is not valid.")

    # Create the 'txt' directory if it doesn't exist
    txt_folder_path = os.path.join(folder_path, "txt")
    if not os.path.exists(txt_folder_path):
        os.makedirs(txt_folder_path)

    # Iterate through all the files in the folder
    for filename in os.listdir(folder_path):
        # Check if the file is a PDF
        if filename.endswith(".pdf"):
            file_path = os.path.join(folder_path, filename)
            try:
                with open(file_path, "rb") as file:
                    reader = PyPDF2.PdfFileReader(file)
                    # Check if the PDF is encrypted
                    if reader.isEncrypted:
                        try:
                            reader.decrypt("")
                        except:
                            print(f"Could not decrypt {filename}")
                            continue

                    # Extract text from the PDF
                    pdf_text = ""
                    for page_num in range(reader.numPages):
                        page = reader.getPage(page_num)
                        pdf_text += page.extract_text()

                    # Write the extracted text to a text file
                    txt_filename = os.path.splitext(filename)[0] + ".txt"
                    txt_file_path = os.path.join(txt_folder_path, txt_filename)
                    with open(txt_file_path, "w", encoding="utf-8") as txt_file:
                        txt_file.write(pdf_text)

                    print(
                        f"Extracted text from {filename} and wrote to {txt_file_path}"
                    )

            except Exception as e:
                print(f"Failed to read {filename} due to {e}")


# Example usage
folder_path = "files"
read_pdfs_in_folder(folder_path)