In [1]:
import numpy as np
import matplotlib.pyplot as plt
import os
import re
import PyPDF2
from PyQt5.QtWidgets import *
from PyQt5.QtGui import QFont
from PyQt5.QtCore import Qt
from SPDFErrors import *
from pdf2image import convert_from_path
from PIL import Image, ImageOps
from matplotlib.backends.backend_qt5agg import FigureCanvasQTAgg

In [2]:
class IDGenerator():
    """
    Creates a unique str id to current IDGenerator object's knowledge.

    Attributes
    ---
    nextID : int
        Next identifier the generator will output
    """
    def __init__(self):
        self.nextID = 0
        
    def generateID(self):
        """
        Returns a string of unique int id and updates nextID to new unique ID.
        
        """
        stringCode = str(self.nextID)
        self.nextID += 1
        return(stringCode)

In [3]:
class PDFPage():
    """
    PDF Page containing both image and PyPDF2 page representation of PDF.
    
    Attributes
    ---
    ID : str
        ID identifying object.
    image : PIL.PpmImagePlugin.PpmImageFile
        PpmImageFile of page for graphical display.
    pageObject : PyPDF2.page.PageObject
        PageObject of pypdf2 page for actually making pdf.

    """
    def __init__(self, ID, image, pageObject):
        self.ID = ID
        self.image = image
        self.pageObject = pageObject
        
    def __eq__(self, obj):
        if obj is self:
            return(True)
        elif not isinstance(obj, PDFPage):
            return(False)
        else:
            return((obj.getID() == self.getID()) and
                   (obj.getImage() == self.getImage()) and
                   (obj.getPageObject() == self.getPageObject()))
        
    def getID(self):
        return(self.ID)
    
    def getImage(self):
        return(self.image)
    
    def getPageObject(self):
        return(self.pageObject)

In [4]:
class PDFPageBank():
    """
    Bank of all PDFPages added so far.
    
    Attributes
    ---
    map : dict
        Dictionary of pdfPages with key of PDFPage's ID and value of PDFPage.
    
    """
    def __init__(self):
        self.map = {}
    
    def contains(self, ID):
        """
        Returns true if pdfPage ID is in page bank.
        
        Parameters
        ---
        ID : str
            ID to check if in.
        
        Returns
        ---
        bool
            True if is in, false otherwise.
        
        """
        return(ID in self.map.keys())
    
    def addPage(self, pdfPage):
        """
        Adds pdfPage to dictionary with its ID as its key.
        
        Parameters
        ---
        pdfPage : PDFPage
            PDFPage to add to bank.
            
        Raises
        ---
        NotUniqueError
            Raised if pdfPage with matching key is already added to bank.
            
        """
        if not self.contains(pdfPage.getID()):
            self.map[pdfPage.getID()] = pdfPage
        else:
            raise NotUniqueError('PDFPage exists in PDFPageBank already.')
        
    
    def getPage(self, ID):
        """
        Gets pdfPage with corresponding ID.
        
        Parameters
        ---
        ID : str
            ID of page to get.

        Returns
        ---
        PDFPage
            Page with matching ID.
            
        Raises
        ---
        NotInBankError
            Raised if no page has a matching ID.
        
        """
        if self.contains(ID):
            return(self.map.get(ID))
        else:
            raise NotInBankError('No PDFPage with given ID in Bank.')

    def countPages(self):
        """
        Returns int of number of pages in bank.
        
        """
        return(len(self.map.keys()))



In [5]:
class PDF():
    """
    Ordered collection of References to PDFPage IDs in PDFPageBank.
    
    Attributes
    ---
    orderedPages : list
        List of ordered PDFPage IDs representing PDF.
    pageBank : PDFPageBank
        PDFPageBank where PDFPages in PDF are located.

    Notes
    ---
    Iterable of PDFPage.
    
    """
    def __init__(self, pageBank):
        self.orderedPages = []
        self.pageBank = pageBank
    
    def __eq__(self, obj):
        if obj is self:
            return(True)
        elif not isinstance(obj, PDF):
            return(False)
        else:
            return((obj._getOrderedPages() == self.orderedPages))
    
    def __iter__(self):
        self.currentIndex = 0
        return(self)
    
    def __next__(self):
        if self.currentIndex  < self.countPages():
            page = self.getPage(self.currentIndex)
            self.currentIndex += 1
            return(page)
        else:
            raise(StopIteration)
    
    def _setOrderedPages(self, orderedPages):
        self.orderedPages = orderedPages
    
    def _getOrderedPages(self):
        return(self.orderedPages)
    
    def addPage(self, pdfPage):
        """
        Adds PDFPage Object to end of PDF if is in pageBank.
        
        Parameters
        ---
        pdfPage : PDFPage
            PDFPage to add to bank.
            
        Raises
        ---
        NotInBankError
            Raised if pdfPage not in bank yet.

        """
        if self.pageBank.contains(pdfPage.getID()):
            self.orderedPages.append(pdfPage.getID())
        else:
            raise(NotInBankError("PDFPage not in pagebank."))
    
    def getPage(self, i):
        """
        Returns reference to pdfPage of index i from PDF, grabbed from page bank. 

        """
        return(self.pageBank.getPage(self.orderedPages[i]))
    
    def removePage(self, i):
        """
        Removes page at index i from PDF. 
        
        """
        self.orderedPages.pop(i)
    
    def moveBeforePage(self, i, beforeThisIndex):
        """
        Moves page at index i in front of page at beforeThisIndex.
        
        """
        self.orderedPages = (self.orderedPages[:beforeThisIndex] + 
                             [self.orderedPages[i]] +  # Page to reinsert
                             self.orderedPages[beforeThisIndex:])  # Slice including beforeThisIndex
        if i < beforeThisIndex:  # Just removes item at old index since that is same original
            self.removePage(i)
        else:  # Removes item at old index+1, since old item would have been shifted by one if >= original i
            self.removePage(i+1)

    def moveAfterPage(self, i, afterThisIndex):
        """
        Moves page at index i after page at afterThisIndex.
        
        """
        self.orderedPages = (self.orderedPages[:afterThisIndex+1] +  # Slice including afterThisIndex
                             [self.orderedPages[i]] +  # Page to reinsert
                             self.orderedPages[afterThisIndex+1:])
        if i >= afterThisIndex:  # Removes item at old index+1, since old item would have been shifted by one if moving page is > afterThisIndex
            self.removePage(i+1)
        else:  # Just removes item at old index since that is same original
            self.removePage(i)

    def countPages(self):
        """
        Returns int of number of pages in PDF.
        
        """
        return(len(self.orderedPages))

    def appendEntirePDF(self, pdf):
        """
        Appends all pages in pdf to the end of self.
            
        Raises
        ---
        NotInBankError
            Raised if any pages in pdf are not in the same bank as this pdf.

        """
        for page in pdf:
            self.addPage(page)
    
    def copyPDF(self):
        """
        Returns different object but copy of PDF.
        
        Returns
        ---
        PDF
            Copy of self.
        
        """
        pdf = PDF(self.pageBank)
        pdf._setOrderedPages(self._getOrderedPages().copy())
        return(pdf)



In [6]:
class PDFHistoryRecorder():
    
    def __init__(self):
        self.currentVersion = -1   # First pdf starts at 0 index
        self.versions = []
        
    def _getVersions(self):
        return(self.versions)
    
    def _getCurrentVersion(self):
        return(self.currentVersion)
    
    def _setCurrentVersion(self, i):
        self.currentVersion = i
    
    def newVersion(self, pdf):
        """
        Erase all later versions and add pdf to versions.

        """
        self.versions = self.versions[:self.currentVersion+1]
        self.currentVersion += 1
        self.versions.append(pdf.copyPDF())
    
    def previousVersion(self):
        """
        Changes back to previous version and returns that pdf, if no previous versions raises exception.
        
        Raises
        ---
        NoPrevVersions
            Thrown if there are no previous versions (i.e. current index less than or equal to 0).
        
        """
        if self.currentVersion > 0:
            self.currentVersion -= 1
            return(self.versions[self.currentVersion])
        else:
            raise(NoPrevVersions('No previous versions to rollback to.'))
    
    def laterVersion(self):
        """
        Changes forward to later version and returns that pdf, if no later version raises exception.
        
        Raises
        ---
        NoLaterVersions
            Thrown if there are no future versions (i.e. current index greater than or equal to length of versions).
        
        """
        if self.currentVersion < len(self.versions)-1:
            self.currentVersion += 1
            return(self.versions[self.currentVersion])
        else:
            raise(NoLaterVersions('No later versions to rollforward to.'))



In [7]:
# i = 3
# beforeIndex = 4
# a = ['1', '2', '3', '4', '5']
# a = a[:beforeIndex] + [a[i]] + a[beforeIndex:]
# if i < beforeIndex:
#     a.pop(i)
# else:
#     a.pop(i+1)
# a

# i = 4
# afterIndex = 2
# a = ['1', '2', '3', '4', '5']
# a = a[:afterIndex+1] + [a[i]] + a[afterIndex+1:]
# print(a)
# if i >= afterIndex:
#     a.pop(i+1)
# else:
#     a.pop(i)
# print(a)

In [8]:
class File2PDFConverter():
    """
    Object that extracts a vaild pdf file into a PDF object.
    
    Attributes
    ---
    reader : PyPDF2.PDFFileReader
        Object to read in actual pdf file and extract PyPDF2.pageObjects to put into
        actual pdf file later.
    images : list <PIL.PpmImagePlugin.PpmImageFile>
        PpmImageFile of page for graphical display in same order as pages in reader.
    generator : IDGenerator
        IDGenerator for application to generate unique IDs for created PDFPages.
    pdfBank : PDFPageBank
        PDFPageBank to store PDFPages for, for all PDFs.
    
    """
    def __init__(self, filePath, idGenerator, pdfBank):
        self.reader = PyPDF2.PdfFileReader(filePath)
        self.images = convert_from_path(filePath)
        self.generator = idGenerator
        self.bank = pdfBank
    
    def extractPDF(self):
        """
        Extracts and returns PDF object of given filePath

        Returns
        ---
        PDF
             PDF containing ordered pages in same form as PDF from filePath
        """
        pdf = PDF(self.bank)
        for i in range(self.reader.getNumPages()):
            page = PDFPage(self.generator.generateID(),
                           self.images[i],
                           self.reader.getPage(i))
            self.bank.addPage(page)
            pdf.addPage(page)
        return(pdf)



In [9]:
class PDF2FileConverter():
    """
    Object that converts a PDF object into an actual pdf file.
    
    Attributes
    ---
    writer : PyPDF2.PdfFileWriter
        Object that actually extracts out pdf.
    
    """
    def __init__(self, pdf):
        self.writer = PyPDF2.PdfFileWriter()
        for page in pdf:
            self.writer.addPage(page.getPageObject())

    def extractToFilePath(self, filePath):
        """
        Extracts PDF that is loaded in writer, into a pdf file at filePath.

        Parameters
        ---
        filePath : str
            String of file path to create pdf file with.

        """
        if (re.fullmatch('.*\.pdf?', filePath) is not None):
            fileStream = open(filePath, 'wb')
            self.writer.write(fileStream)
            fileStream.close()
        else:
            raise(FilePathNotPDF('Given file path must be valid pdf file (No end slash also).'))



In [10]:
# TowardsPDFConverter = File2PDFConverter('C:/Users/dosre/Downloads/invoice_200400091915.pdf', IDGenerator(), PDFPageBank())
# pdf = TowardsPDFConverter.extractPDF()
# for page in pdf:
#     fig, ax = plt.subplots(figsize=(8, 11))
#     ax.imshow(page.getImage())
#     ax.tick_params(axis='both', left=False, bottom=False, labelleft=False, labelbottom=False)
#     plt.show()
# TowardsFileConverter = PDF2FileConverter(pdf)
# TowardsFileConverter.extractToFilePath('C:/Users/dosre/Desktop/trumpus.pdf')

In [11]:
import unittest

class testIDGenerator(unittest.TestCase):

    def testGenerateID(self):
        generator = IDGenerator()
        firstCode = generator.generateID()
        secondCode = generator.generateID()
        self.assertTrue(firstCode != secondCode)
        thirdCode = generator.generateID()
        self.assertTrue(thirdCode != firstCode)
        self.assertTrue(thirdCode != secondCode)

        
class testPDFPage(unittest.TestCase):
    
    def setUp(self):
        self.page1 = PDFPage('key1', None, None)
        self.page2 = PDFPage('key2', None, None)
    
    def testConstructor(self):
        self.assertTrue(self.page1.getID() != self.page2.getID())
        self.assertTrue(self.page1.getImage() is None)
        self.assertTrue(self.page1.getPageObject() is None)
    
    def testEq(self):
        self.assertTrue(self.page1 == self.page1)
        self.assertFalse(self.page1 == None)
        self.assertTrue(self.page1 == PDFPage(self.page1.getID(), None, None))

        
class testPDFPageBank(unittest.TestCase):

    def setUp(self):
        self.bank = PDFPageBank()
        self.page1 = PDFPage('key1', None, None)
        self.page2 = PDFPage('key2', None, None)
    
    def testConstructor(self):
        self.assertTrue(self.bank is not None)
    
    def testAddGetPage(self):
        self.bank.addPage(self.page1)
        self.bank.addPage(self.page2)
        self.assertEqual(self.bank.getPage(self.page1.getID()), self.page1)
        self.assertEqual(self.bank.getPage(self.page2.getID()), self.page2)
    
    def testAddSamePage(self):
        try:
            self.bank.addPage(self.page1)
            self.bank.addPage(self.page1)
            self.fail('Should have raised exception.')
        except(NotUniqueError):
            pass
    
    def testGetPageNotInBank(self):
        try:
            self.bank.getPage('notakey')
            self.fail('Should have raised exception.')
        except(NotInBankError):
            pass

    def testCountPages(self):
        self.assertEqual(self.bank.countPages(), 0)
        self.bank.addPage(self.page1)
        self.assertEqual(self.bank.countPages(), 1)
        self.bank.addPage(self.page2)
        self.assertEqual(self.bank.countPages(), 2)


class testPDF(unittest.TestCase):
    
    def setUp(self):
        self.bank = PDFPageBank()
        self.page1 = PDFPage('key1', None, None)
        self.page2 = PDFPage('key2', None, None)
        self.bank.addPage(self.page1)
        self.bank.addPage(self.page2)
        self.pdf = PDF(self.bank)
    
    def testConstructor(self):
        self.assertTrue(self.pdf is not None)
        self.pdf.addPage(self.page1)
        self.pdf.addPage(self.page2)
        self.assertEqual(self.pdf._getOrderedPages(), ['key1', 'key2'])
    
    def testAddGetPage(self):
        self.pdf.addPage(self.page1)
        self.pdf.addPage(self.page2)
        self.assertEqual(self.pdf.getPage(0), self.page1)
        self.assertEqual(self.pdf.getPage(1), self.page2)
    
    def testAddPageNotInBank(self):
        try:
            self.pdf.addPage(PDFPage('key3', None, None))
            self.fail('Should not have added PDFPage w key3 as is not in bank.')
        except(NotInBankError):
            pass

    def testCountPages(self):
        self.assertEqual(self.pdf.countPages(), 0)
        self.pdf.addPage(self.page1)
        self.assertEqual(self.pdf.countPages(), 1)
        self.pdf.addPage(self.page2)
        self.assertEqual(self.pdf.countPages(), 2)
    
    def test0LengthIterator(self):
        for page in self.pdf:
            self.fail('Nothing should have run in for loop since no elements.')
    
    def test2LengthIterator(self):
        self.pdf.addPage(self.page1)
        self.pdf.addPage(self.page2)
        for page in self.pdf:
            pass
        iterable = iter(self.pdf)
        self.assertEqual(next(iterable), self.page1)
        self.assertEqual(next(iterable), self.page2)
    
    def testAppendEntirePDF(self):
        self.pdf.addPage(self.page1)
        self.pdf.addPage(self.page2)
        pdf2 = PDF(self.bank)
        page3 = PDFPage('key3', None, None)
        page4 = PDFPage('key4', None, None)
        self.bank.addPage(page3)
        self.bank.addPage(page4)
        pdf2.addPage(page3)
        pdf2.addPage(page4)
        self.pdf.appendEntirePDF(pdf2)
        iterable = iter(self.pdf)
        self.assertEqual(next(iterable), self.page1)
        self.assertEqual(next(iterable), self.page2)
        self.assertEqual(next(iterable), page3)
        self.assertEqual(next(iterable), page4)
    
    def testAppendEntirePDFNotInBank(self):
        self.pdf.addPage(self.page1)
        self.pdf.addPage(self.page2)
        otherBank = PDFPageBank()
        pdf2 = PDF(otherBank)
        page3 = PDFPage('key3', None, None)
        otherBank.addPage(page3)
        pdf2.addPage(page3)
        try:
            self.pdf.appendEntirePDF(pdf2)
            self.fail('Page3 not in bank so should have failed.')
        except(NotInBankError):
            pass
    
    def testRemovePage(self):
        self.pdf.addPage(self.page1)
        self.pdf.addPage(self.page2)
        self.pdf.removePage(1)
        self.assertEqual(self.pdf._getOrderedPages(), ['key1'])
        self.pdf.addPage(self.page2)
        self.pdf.removePage(0)
        self.assertEqual(self.pdf._getOrderedPages(), ['key2'])
    
    def testRemovePageBadIndex(self):
        try:
            self.pdf.removePage(0)
            self.fail('No items yet so should not be able to remove anything.')
        except(IndexError):
            pass
    
    def MoveTestSetUp(self):
        self.page3 = PDFPage('key3', None, None)
        self.page4 = PDFPage('key4', None, None)
        self.bank.addPage(self.page3)
        self.bank.addPage(self.page4)
        self.pdf.addPage(self.page1)
        self.pdf.addPage(self.page2)
        self.pdf.addPage(self.page3)
        self.pdf.addPage(self.page4)
    
    def testMoveBeforePage(self):
        # Test move middle item to same place both ways
        self.MoveTestSetUp()
        self.pdf.moveBeforePage(1, 2)
        self.assertEqual(self.pdf._getOrderedPages(), ['key1', 'key2', 'key3', 'key4'])
        self.setUp()
        self.MoveTestSetUp()
        self.pdf.moveBeforePage(1, 1)
        self.assertEqual(self.pdf._getOrderedPages(), ['key1', 'key2', 'key3', 'key4'])
        
        # Test move middle item before end element
        self.setUp()
        self.MoveTestSetUp()
        self.pdf.moveBeforePage(1, 3)
        self.assertEqual(self.pdf._getOrderedPages(), ['key1', 'key3', 'key2', 'key4'])
        
        # Test move middle item before first element
        self.setUp()
        self.MoveTestSetUp()
        self.pdf.moveBeforePage(1, 0)
        self.assertEqual(self.pdf._getOrderedPages(), ['key2', 'key1', 'key3', 'key4'])
        
        # Test move first item before end element
        self.setUp()
        self.MoveTestSetUp()
        self.pdf.moveBeforePage(0, 3)
        self.assertEqual(self.pdf._getOrderedPages(), ['key2', 'key3', 'key1', 'key4'])
        
        # Test move first item before first element
        self.setUp()
        self.MoveTestSetUp()
        self.pdf.moveBeforePage(0, 0)
        self.assertEqual(self.pdf._getOrderedPages(), ['key1', 'key2', 'key3', 'key4'])
        
        # Test move last item before end element
        self.setUp()
        self.MoveTestSetUp()
        self.pdf.moveBeforePage(3, 3)
        self.assertEqual(self.pdf._getOrderedPages(), ['key1', 'key2', 'key3', 'key4'])
        
        # Test move last item before first element
        self.setUp()
        self.MoveTestSetUp()
        self.pdf.moveBeforePage(3, 0)
        self.assertEqual(self.pdf._getOrderedPages(), ['key4', 'key1', 'key2', 'key3'])
    
    def testMoveAfterPage(self):
        # Test move middle item to same place both ways
        self.MoveTestSetUp()
        self.pdf.moveAfterPage(1, 0)
        self.assertEqual(self.pdf._getOrderedPages(), ['key1', 'key2', 'key3', 'key4'])
        self.setUp()
        self.MoveTestSetUp()
        self.pdf.moveAfterPage(1, 1)
        self.assertEqual(self.pdf._getOrderedPages(), ['key1', 'key2', 'key3', 'key4'])
        
        # Test move middle item after end element
        self.setUp()
        self.MoveTestSetUp()
        self.pdf.moveAfterPage(1, 3)
        self.assertEqual(self.pdf._getOrderedPages(), ['key1', 'key3', 'key4', 'key2'])
        
        # Test move middle item after third element
        self.setUp()
        self.MoveTestSetUp()
        self.pdf.moveAfterPage(1, 2)
        self.assertEqual(self.pdf._getOrderedPages(), ['key1', 'key3', 'key2', 'key4'])
        
        # Test move first item after end element
        self.setUp()
        self.MoveTestSetUp()
        self.pdf.moveAfterPage(0, 3)
        self.assertEqual(self.pdf._getOrderedPages(), ['key2', 'key3', 'key4', 'key1'])
        
        # Test move first item after first element
        self.setUp()
        self.MoveTestSetUp()
        self.pdf.moveAfterPage(0, 0)
        self.assertEqual(self.pdf._getOrderedPages(), ['key1', 'key2', 'key3', 'key4'])
        
        # Test move last item after end element
        self.setUp()
        self.MoveTestSetUp()
        self.pdf.moveAfterPage(3, 3)
        self.assertEqual(self.pdf._getOrderedPages(), ['key1', 'key2', 'key3', 'key4'])
        
        # Test move last item after first element
        self.setUp()
        self.MoveTestSetUp()
        self.pdf.moveAfterPage(3, 0)
        self.assertEqual(self.pdf._getOrderedPages(), ['key1', 'key4', 'key2', 'key3'])
    
    def testEq(self):
        self.pdf.addPage(self.page1)
        self.pdf.addPage(self.page2)
        self.assertTrue(self.pdf == self.pdf)
        self.assertFalse(self.pdf == None)
        pdf2 = PDF(self.bank)
        pdf2.addPage(self.page1)
        pdf2.addPage(self.page2)
        self.assertTrue(self.pdf == pdf2)
        pdf2.removePage(1)
        self.assertFalse(self.pdf == pdf2)
    
    def testCopyPDF(self):
        self.pdf.addPage(self.page1)
        self.pdf.addPage(self.page2)
        pdf2 = self.pdf.copyPDF()
        self.assertEqual(pdf2, self.pdf)
        self.assertFalse(pdf2 is self.pdf)

        
class testPDFHistoryRecorder(unittest.TestCase):
    
    def setUp(self):
        self.bank = PDFPageBank()
        self.page1 = PDFPage('key1', None, None)
        self.page2 = PDFPage('key2', None, None)
        self.page3 = PDFPage('key3', None, None)
        self.page4 = PDFPage('key4', None, None)
        self.bank.addPage(self.page1)
        self.bank.addPage(self.page2)
        self.bank.addPage(self.page3)
        self.bank.addPage(self.page4)
        self.pdf = PDF(self.bank)
        self.pdf2 = PDF(self.bank)
        self.pdf3 = PDF(self.bank)
        self.historyRecorder = PDFHistoryRecorder()
    
    def testConstructor(self):
        self.assertTrue(self.historyRecorder is not None)
    
    def testNewVersion(self):
        self.historyRecorder.newVersion(self.pdf)
        self.assertEqual(self.historyRecorder._getCurrentVersion(), 0)
        self.pdf.addPage(self.page1)
        self.historyRecorder.newVersion(self.pdf)
        self.assertEqual(self.historyRecorder._getCurrentVersion(), 1)
        self.pdf.addPage(self.page2)
        self.historyRecorder.newVersion(self.pdf)
        self.assertEqual(self.historyRecorder._getCurrentVersion(), 2)
        self.pdf.removePage(1)
        self.historyRecorder.newVersion(self.pdf)
        self.assertEqual(self.historyRecorder._getCurrentVersion(), 3)
        versions = self.historyRecorder._getVersions()
        self.assertEqual(len(versions), 4)
        self.assertEqual(versions[3], self.pdf)
        self.pdf.addPage(self.page2)
        self.assertEqual(versions[2], self.pdf)
        self.pdf.removePage(1)
        self.assertEqual(versions[1], self.pdf)
        self.pdf.removePage(0)
        self.assertEqual(versions[0], self.pdf)
    
    def testNewVersionRemoveLaterVersions(self):
        self.historyRecorder.newVersion(self.pdf)
        self.pdf.addPage(self.page1)
        self.historyRecorder.newVersion(self.pdf)
        self.pdf.addPage(self.page2)
        self.historyRecorder.newVersion(self.pdf)
        self.pdf.removePage(1)
        self.historyRecorder.newVersion(self.pdf)
        self.historyRecorder._setCurrentVersion(1)
        self.historyRecorder.newVersion(self.pdf)
        versions = self.historyRecorder._getVersions()
        self.assertEqual(versions[-1], self.pdf)
        self.assertEqual(versions[-2], self.pdf)
        self.pdf.removePage(0)
        self.assertEqual(versions[-3], self.pdf)
    
    def addVersionsToRecorder(self):
        self.pdf.addPage(self.page1)
        self.pdf2.addPage(self.page1)
        self.pdf2.addPage(self.page2)
        self.pdf3.addPage(self.page2)
        self.historyRecorder.newVersion(self.pdf)
        self.historyRecorder.newVersion(self.pdf2)
        self.historyRecorder.newVersion(self.pdf3)
    
    def testPreviousVersion(self):
        self.addVersionsToRecorder()
        self.assertEqual(self.historyRecorder.previousVersion(), self.pdf2)
        self.assertEqual(self.historyRecorder.previousVersion(), self.pdf)
        try:
            self.historyRecorder.previousVersion()
            self.fail('Should throw exception as no more previous versions.')
        except(NoPrevVersions):
            pass
        
    def testPreviousVersion(self):
        self.addVersionsToRecorder()
        self.historyRecorder._setCurrentVersion(0)
        self.assertEqual(self.historyRecorder.laterVersion(), self.pdf2)
        self.assertEqual(self.historyRecorder.laterVersion(), self.pdf3)
        try:
            self.historyRecorder.laterVersion()
            self.fail('Should throw exception as no more future versions.')
        except(NoLaterVersions):
            pass


if __name__ == '__main__':
    unittest.main(argv=['first-arg-is-ignored'], exit=False)

..........................
----------------------------------------------------------------------
Ran 26 tests in 0.042s

OK


In [12]:
class selectFileGUI(QDialog):

    def __init__(self, title='Select PDF to Edit'):
        super().__init__()
        self._setUI(title)
        self.selectedPDFPath = None  # sets selectedPDFPath as none initially
        self.exec_()
        self.raise_()
        self.activateWindow()
        self.move(int(QDesktopWidget().availableGeometry().center().x()-self.width()/2.),
                  int(QDesktopWidget().availableGeometry().center().y()-self.height()/2.))
    
    def _setUI(self, title):
        """
        Sets buttons and line edit widgets in main QDialog.
        
        """
        self.setWindowTitle(title)
        layout = QGridLayout()
        
        fileName = QLineEdit()
        layout.addWidget(fileName, 0, 0, 1, 2)
        
        browse = QPushButton('Browse')
        browse.clicked.connect(lambda: self._handleFileDialog(fileName))
        layout.addWidget(browse, 0, 2)
        
        editPDF = QPushButton('Select PDF')
        editPDF.clicked.connect(lambda: self._handleEditPDFButton(fileName.text()))
        layout.addWidget(editPDF, 1, 0)

        self.setLayout(layout)

    def _handleFileDialog(self, qLineEdit):
        """
        Opens QFileDialog and changes qLineEdit text to file selected.
        
        """
        dialog = QFileDialog()
        dialog.open(lambda: qLineEdit.setText(dialog.selectedFiles()[0]))
        dialog.exec_()
    
    def _handleEditPDFButton(self, fileName):
        """
        Checks fileName to see if is pdf file and that it exists, opening application
        if is, and opening error dialog if not.
        
        Parameters
        ---
        fileName : str
            fileName to check for validity
        
        """
        errorText = ''
        if (re.fullmatch('.*\.pdf/?', fileName) is None):
            errorText += 'File must be pdf.\n'
        if (not os.path.isfile(fileName)):
            errorText += 'File does not exist.\n'
        if errorText != '':
            errorBox = QMessageBox()
            errorBox.setWindowTitle('File Error')
            errorBox.setText(errorText)
            errorBox.setIcon(QMessageBox.Warning)
            errorBox.exec_()
        else:
            self.selectedPDFPath = fileName
            self.close()
        
    def getSelectedPDFPath(self):
        """
        Gets the finalized selected PDF path, after the PDF is closed, if none is set raises an exception.

        Raises
        ---
        NoValidFilePathGiven
            Thrown when during application's lifetime no valid filepath was selected.

        """
        if self.selectedPDFPath is not None:
            return(self.selectedPDFPath)
        else:
            raise(NoValidFilePathGiven('No valid path was given during application lifetime.'))



In [None]:
# GUI Application

class editFileGUI(QDialog):
    
    def __init__(self, pdfFilePath):
        super().__init__()

        # Create single bank and IDgenerator for instance of GUI
        self.bank = PDFPageBank()
        self.generator = IDGenerator()
        self.recorder = PDFHistoryRecorder()
        self.pdf = self.loadPDF(pdfFilePath)
        self.moveMode = False  # Not initially in moveMode
        self.currentIndex = 0
        self.pageCount = self.pdf.countPages()

        # Load UI
        self._setUI()
        self._update()
        self.exec_()
    
    def _setUI(self):
        self.setWindowTitle('SimplePDF')
        layout = QGridLayout()
        
        # Add main figure
        self.mainFigure, self.mainAx = plt.subplots(figsize=(6, 7.75))
        self.mainAx.tick_params(axis='both', left=False, bottom=False, labelleft=False, labelbottom=False)
        self.mainFigure.tight_layout(pad=0)
        self.canvas = FigureCanvasQTAgg(self.mainFigure)
        layout.addWidget(self.canvas, 1, 1, 7, 3)
        
        # Main figure arrows, current index, and select page button
        self.prevPageButton = QPushButton('<')
        self.prevPageButton.clicked.connect(lambda: self._incrementPageIndex(-1))
        layout.addWidget(self.prevPageButton, 4, 0)

        self.nextPageButton = QPushButton('>')
        self.nextPageButton.clicked.connect(lambda: self._incrementPageIndex(1))
        layout.addWidget(self.nextPageButton, 4, 4)
        
        self.indexDisplay = QLabel('')
        self.indexDisplay.setAlignment(Qt.AlignCenter)
        self.indexDisplay.setFont(QFont('default', 11))
        layout.addWidget(self.indexDisplay, 0, 2)
        
        self.placeBeforeButton = QPushButton('Place Before')
        self.placeBeforeButton.clicked.connect(lambda: self._movePage('Before'))
        self.placeBeforeButton.setEnabled(False)
        layout.addWidget(self.placeBeforeButton, 8, 1)

        self.placeAfterButton = QPushButton('Place After')
        self.placeAfterButton.clicked.connect(lambda: self._movePage('After'))
        self.placeAfterButton.setEnabled(False)
        layout.addWidget(self.placeAfterButton, 8, 2)

        self.cancelButton = QPushButton('Cancel')
        self.cancelButton.clicked.connect(lambda: self._cancelMovePage())
        self.cancelButton.setEnabled(False)
        layout.addWidget(self.cancelButton, 8, 3)
        
        # Function buttons append, remove page, move page, undo, redo, export
        self.appendPDFButton = QPushButton('Append PDF')
        self.appendPDFButton.clicked.connect(lambda: self._handleAppendPDF())
        layout.addWidget(self.appendPDFButton, 2, 5, 1, 2)

        self.removePageButton = QPushButton('Remove Page')
        self.removePageButton.clicked.connect(lambda: self._removeCurrentPage())
        layout.addWidget(self.removePageButton, 3, 5, 1, 2)

        self.movePageButton = QPushButton('Move Page')
        self.movePageButton.clicked.connect(lambda: self._handleMovePage())
        layout.addWidget(self.movePageButton, 4, 5, 1, 2)

        self.undoButton = QPushButton('Undo')
        self.undoButton.clicked.connect(lambda: print('undo'))
        layout.addWidget(self.undoButton, 5, 5)

        self.redoButton = QPushButton('Redo')
        self.redoButton.clicked.connect(lambda: print('redo'))
        layout.addWidget(self.redoButton, 5, 6)

        self.exportPDFButton = QPushButton('Export PDF')
        self.exportPDFButton.clicked.connect(lambda: print('export pdf'))
        layout.addWidget(self.exportPDFButton, 6, 5, 1, 2)
        
        self.setLayout(layout)
    
    def loadPDF(self, pdfFilePath):
        """
        Returns PDF of the given pdf at pdfFilePath.
        
        """
        converter = File2PDFConverter(pdfFilePath, self.generator, self.bank)
        return(converter.extractPDF())

    def _update(self):
        """
        Saves current version of pdf and updates all UI. Use whenever there is a change to pdf.
        
        """
        self._saveVersion()
        self._updateUI()
    
    def _updateUI(self):
        self.pageCount = self.pdf.countPages()
        self.indexDisplay.setText('{}/{}'.format(self.currentIndex+1, self.pageCount))
        self.mainAx.clear()
        if self.moveMode and (self.currentIndex == self.indexToMove):
            self.mainAx.imshow(ImageOps.colorize(ImageOps.grayscale(self.pdf.getPage(self.currentIndex).getImage()),
                                                 black='#000000', white='#add8e6'))
        else:
            self.mainAx.imshow(self.pdf.getPage(self.currentIndex).getImage())
        self.canvas.draw()
    
    def _saveVersion(self):
        """
        Saves current pdf as new version in recorder.
        
        """
        self.recorder.newVersion(self.pdf)
    
    def _incrementPageIndex(self, i):
        """
        If increment does not bring page count under 0 or over pageCount-1, increment i pages and update UI.
        
        """
        if 0 <= self.currentIndex+i <= self.pageCount-1:
            self.currentIndex += i
            self._updateUI()
    
    def _handleAppendPDF(self):
        gui = selectFileGUI('Select PDF to Append')
        try:
            self.pdf.appendEntirePDF(self.loadPDF(gui.getSelectedPDFPath()))
            self._update()
        except(NoValidFilePathGiven):
            pass
    
    def _removeCurrentPage(self):
        """
        If there is still more then one page, deletes current page and moves to previous. If already at first page
        does not move.
        
        """
        if self.pageCount > 1:
            self.pdf.removePage(self.currentIndex)
            if self.currentIndex > 0:
                self.currentIndex -= 1
            self._update()
    
    def _handleMovePage(self):
        self.indexToMove = self.currentIndex
        self._activateMoveFunction(True)
        self._updateUI()
    
    def _cancelMovePage(self):
        self._activateMoveFunction(False)
        self._updateUI()
    
    def _movePage(self, movement):
        if movement == 'Before':
            self.pdf.moveBeforePage(self.indexToMove, self.currentIndex)
            if self.indexToMove > self.currentIndex:
                self.currentIndex += 1
        elif movement == 'After':
            self.pdf.moveAfterPage(self.indexToMove, self.currentIndex)
            if self.indexToMove < self.currentIndex:
                self.currentIndex -= 1
        self._activateMoveFunction(False)
        self._update()
    
    #def _handleVersionChange()
    
    def _activateMoveFunction(self, boolean):
        self.moveMode = boolean
        self.appendPDFButton.setEnabled(not boolean)
        self.removePageButton.setEnabled(not boolean)
        self.movePageButton.setEnabled(not boolean)
        self.undoButton.setEnabled(not boolean)
        self.redoButton.setEnabled(not boolean)
        self.exportPDFButton.setEnabled(not boolean)
        self.placeBeforeButton.setEnabled(boolean)
        self.placeAfterButton.setEnabled(boolean)
        self.cancelButton.setEnabled(boolean)


if __name__ == '__main__':
    tempapp = QApplication([])
    tempgui = editFileGUI('C:/Users/dosre/Downloads/invoice_200400091915.pdf')  # var assignment prevents immediate garbage collection for some reason
    tempapp.exec_()

In [None]:
# if __name__ == '__main__':
#     app = QApplication([])
#     gui = selectFileGUI()
#     try:
#         main = editFileGUI(gui.getSelectedPDFPath())
#     except(NoValidFilePathGiven):
#         pass
#     app.exec_()