In [None]:
import numpy as np
import matplotlib.pyplot as plt
import os
import re
from PyQt5.QtWidgets import *
from PyQt5.QtGui import QFont

In [None]:
import PyPDF2
from pdf2image import convert_from_path
from PIL import Image

In [None]:
class IDGenerator():
    """
    Creates a unique str id to current IDGenerator object's knowledge.

    Attributes
    ---
    nextID : int
        Next identifier the generator will output
    """
    def __init__(self):
        self.nextID = 0
        
    def generateID(self):
        """
        Returns a string of unique int id and updates nextID to new unique ID.
        
        """
        stringCode = str(self.nextID)
        self.nextID += 1
        return(stringCode)

In [None]:
class file2PDFConverter():
    
    def __init__(self, filePath, idGenerator):
        self.reader = PyPDF2.PdfFileReader(filePath)
        self.images = convert_from_path(filePath)
    
    def extractPDF(self):
        """
        Extracts and returns PDF object of given filePath

        Return
        ---
        PDF
             PDF containing ordered pages in same form as PDF from filePath
        """
        
        return(PDF())

In [None]:
images = convert_from_path('C:/Users/dosre/Downloads/invoice_200400091915.pdf')

In [None]:
type(images[0])

In [None]:
fig, ax = plt.subplots(figsize=(8, 11))
ax.imshow(images[1])

In [None]:
# Errors

class NotUniqueError(Exception):
    """
    Thrown when entry is not unique.
    
    """
    pass

class NoMatchError(Exception):
    """
    Thrown when no matching object is found
    
    """
    pass

In [None]:
# Model

class PDFPage():
    """
    PDF Page containing both image and PyPDF2 page representation of PDF
    
    Attributes
    ---
    ID : str
        ID identifying object
    image : PIL.PpmImagePlugin.PpmImageFile
        PpmImageFile of page for display
    pageObject : PyPDF2.page.PageObject

    """
    def __init__(self, ID, image, pageObject):
        self.ID = ID
        self.image = image
        self.pageObject = pageObject
        
    def __eq__(self, obj):
        if obj is self:
            return(True)
        elif not isinstance(obj, PDFPage):
            return(False)
        else:
            return((obj.getID() == self.getID()) and
                   (obj.getImage() == self.getImage()) and
                   (obj.getPageObject() == self.getPageObject()))
        
    def getID(self):
        return(self.ID)
    
    def getImage(self):
        return(self.image)
    
    def getPageObject(self):
        return(self.pageObject)

In [None]:
class PDFPageBank():
    """
    Bank of all PDFPages added so far.
    
    Attributes
    ---
    map : dict
        Dictionary of pdfPages with key of PDFPage's ID and value of PDFPage.
    
    """

    def __init__(self):
        self.map = {}
    
    def addPage(self, pdfPage):
        """
        Adds pdfPage to dictionary with its ID as its key.
        
        Parameters
        ---
        pdfPage : PDFPage
            PDFPage to add to bank.
            
        Raises
        ---
        NotUniqueError
            Raised if pdfPage with matching key is already added to bank.
            
        """
        if not (pdfPage.getID() in self.map.keys()):
            self.map[pdfPage.getID()] = pdfPage
        else:
            raise NotUniqueError('PDFPage exists in PDFPageBank already.')
        
    
    def getPage(self, ID):
        """
        Gets pdfPage with corresponding ID.
        
        Parameters
        ---
        ID : str
            ID of page to get.

        Returns
        ---
        PDFPage
            Page with matching ID.
            
        Raises
        ---
        NoMatchError
            Raised if no page has a matching ID.
        
        """
        if (ID in self.map.keys()):
            return(self.map.get(ID))
        else:
            raise NoMatchError('No PDFPage with given ID in Bank.')

    def countPages(self):
        """
        Returns int of number of pages in bank.
        
        """
        return(len(self.map.keys()))

In [None]:
# Collection of References to PDFPage Objects
class PDF():
    def __init__(self):
        self.orderedPages = []
    
    def addPage(self, pdfPage):
        """
        Adds PDFPage Object to end of PDF.

        """
    
    def getPage(self, i):
        """
        Returns reference to pdfPage of index i from PDF, grabbed from page bank. 
        
        """
        

In [None]:
import unittest

class testIDGenerator(unittest.TestCase):

    def testGenerateID(self):
        generator = IDGenerator()
        firstCode = generator.generateID()
        secondCode = generator.generateID()
        self.assertTrue(firstCode != secondCode)
        thirdCode = generator.generateID()
        self.assertTrue(thirdCode != firstCode)
        self.assertTrue(thirdCode != secondCode)

        
class testPDFPage(unittest.TestCase):
    
    def setUp(self):
        generator = IDGenerator()
        self.page1 = PDFPage(generator.generateID(), None, None)
        self.page2 = PDFPage(generator.generateID(), None, None)
    
    def testConstructor(self):
        self.assertTrue(self.page1.getID() != self.page2.getID())
        self.assertTrue(self.page1.getImage() is None)
        self.assertTrue(self.page1.getPageObject() is None)
    
    def testEq(self):
        self.assertTrue(self.page1 == self.page1)
        self.assertFalse(self.page1 == None)
        self.assertTrue(self.page1 == PDFPage(self.page1.getID(), None, None))

        
class testPDFPageBank(unittest.TestCase):

    def setUp(self):
        self.bank = PDFPageBank()
        self.page1 = PDFPage('key1', None, None)
        self.page2 = PDFPage('key2', None, None)
    
    def testConstructor(self):
        self.assertTrue(self.bank is not None)
    
    def testAddGetPage(self):
        self.bank.addPage(self.page1)
        self.bank.addPage(self.page2)
        self.assertEqual(self.bank.getPage(self.page1.getID()), self.page1)
        self.assertEqual(self.bank.getPage(self.page2.getID()), self.page2)
    
    def testAddSamePage(self):
        try:
            self.bank.addPage(self.page1)
            self.bank.addPage(self.page1)
            self.fail('Should have raised exception.')
        except(NotUniqueError):
            pass
    
    def testGetPageNotInBank(self):
        try:
            self.bank.getPage('notakey')
            self.fail('Should have raised exception.')
        except(NoMatchError):
            pass

    def testCountPages(self):
        self.assertEqual(self.bank.countPages(), 0)
        self.bank.addPage(self.page1)
        self.assertEqual(self.bank.countPages(), 1)
        self.bank.addPage(self.page2)
        self.assertEqual(self.bank.countPages(), 2)


if __name__ == '__main__':
    unittest.main(argv=['first-arg-is-ignored'], exit=False)

In [None]:
# GUI Application

class selectFileGUI(QDialog):

    def __init__(self):
        super().__init__()
        self._setUI()
        self.exec_()
        self.raise_()
        self.activateWindow()
        self.move(int(QDesktopWidget().availableGeometry().center().x()-self.width()/2.),
                  int(QDesktopWidget().availableGeometry().center().y()-self.height()/2.))
    
    def _setUI(self):
        """
        Sets buttons and line edit widgets in main QDialog.
        
        """
        self.setWindowTitle('Select PDF to Edit')
        layout = QGridLayout()
        
        fileName = QLineEdit()
        layout.addWidget(fileName, 0, 0, 1, 2)
        
        browse = QPushButton('Browse')
        browse.clicked.connect(lambda: self._handleFileDialog(fileName))
        layout.addWidget(browse, 0, 2)
        
        editPDF = QPushButton('Edit PDF')
        editPDF.clicked.connect(lambda: self._handleEditPDFButton(fileName.text()))
        layout.addWidget(editPDF, 1, 0)
        
        newPDF = QPushButton('New PDF')
        newPDF.clicked.connect(lambda: print('New'))
        layout.addWidget(newPDF, 1, 1)

        self.setLayout(layout)

    def _handleFileDialog(self, qLineEdit):
        """
        Opens QFileDialog and changes qLineEdit text to file selected.
        
        """
        dialog = QFileDialog()
        dialog.open(lambda: qLineEdit.setText(dialog.selectedFiles()[0]))
        dialog.exec_()
    
    def _handleEditPDFButton(self, fileName):
        """
        Checks fileName to see if is pdf file and that it exists, opening application
        if is, and opening error dialog if not.
        
        Parameters
        ---
        fileName : str
            fileName to check for validity
        
        """
        errorText = ''
        if (re.fullmatch('.*\.pdf/?', fileName) is None):
            errorText += 'File must be pdf.\n'
        if (not os.path.isfile(fileName)):
            errorText += 'File does not exist.\n'
        if errorText != '':
            errorBox = QMessageBox()
            errorBox.setWindowTitle('File Error')
            errorBox.setText(errorText)
            errorBox.setIcon(QMessageBox.Warning)
            errorBox.exec_()
        else:
            ### Application activation goes here
            None


if __name__ == '__main__':
    app = QApplication([])
    gui = selectFileGUI()  # var assignment prevents immediate garbage collection for some reason
    app.exec_()

In [None]:
Image.open('C:/Users/dosre/Downloads/UBC Course Schedule CPSC 310.PNG')

In [None]:
os.path.isfile('C:/Users/dosre/Downloads/Reddit Prof Pic.pdf')