# Web Application ✱ Utility for PDF Files/Pages
## Main Objective: Create a web application which enables users to:
* Merge 
* Splice 
* Rotate Pages 
PDF documents.

<b>Note:</b> The code snippets of util functionalities: `rangify`, `add`, `merge` of this notebook are taken from the original author at https://github.com/metaist/pdfmerge where it was used as a command-line utility. This notebook leverages on these functionalities to run in a more user-friendly environment: providing users with a web app interface to perform the same tasks in this jupyter notebook.

<b>Running:</b> Python 3.7.9<br>
<b>Using:</b> pip 20.2.4<br>
<b>OS:</b> Windows 10

<img src="preview.png" height="150px" />

### 1. Importing/Installing required module(s)
1. Import the required packages for PDF read/write: `PyPDF2`,`PdfFileWriter`,`PdfFileReader`
2. Initialise util functions here to carry out merge, splice and page rotatation functions of PDF documents.

In [1]:
from glob import glob
import os
import re

from PyPDF2 import PdfFileWriter, PdfFileReader

#### 1.1 Specify Error Handing Rules

In [2]:
ERROR_PATH = "ERROR: path not found: {0}"
ERROR_RULE = "ERROR: invalid rule: {0}"
ERROR_RANGE = "ERROR: page {0} out of range [1-{1}]"
ERROR_BOUNDS = "ERROR: missing upper bound on range [{0}..]"

#### 1.2 Specify PDF document Handing Rules - Range and Page Rotation

In [3]:
RULE_RANGE = ".."
RULE_ROTATE = { 
                None: 0, 
                ">": 90, 
                "V": 180, 
                "<": 270 
              }
RULE_DEFAULT = RULE_RANGE

In [4]:
RE_MATCH_TYPE = type(re.match("", ""))
RE_HAS_RULE = re.compile(r"^(.*)\[(.*)\]$")
RE_RULE = re.compile(r"^(-?\d+)?(\.\.)?(-?\d+)?([>V<])?$")

In [5]:
# rule (str, obj): pages to extract or a regex matching the rule
# range_max (int): maximum number of page
# Returns (list): List of pages to extract
def rangify(rule, range_max=None):
    # pylint: disable=R0912
    result, match = [], None
    if isinstance(rule, str):
        match = RE_RULE.search(rule)
        assert match, ERROR_RULE.format(rule)
    elif isinstance(rule, RE_MATCH_TYPE):
        assert rule is not None, ERROR_RULE.format()
        match = rule

    beg, isrange, end, _ = match.groups()
    isrange = (isrange == RULE_RANGE)

    if not beg and not end:
        assert range_max is not None, ERROR_BOUNDS.format(beg)
        beg, isrange, end = 1, True, range_max

    beg = (beg and int(beg)) or 1
    end = (end and int(end))

    if beg:
        beg = int(beg)
        if range_max and beg < 1:
            beg += range_max + 1
        elif range_max and beg > range_max:
            beg = range_max

    if end:
        end = int(end)
        if range_max and end < 1:
            end += range_max + 1
        elif range_max and end > range_max:
            end = range_max
    elif isrange:
        assert range_max is not None, ERROR_BOUNDS.format(beg)
        end = range_max

    if isrange and end < beg:
        result = sorted(range(end, beg + 1), reverse=True)
    elif isrange:
        result = range(beg, end + 1)
    else:
        result.append(beg)

    return result

In [6]:
# path (str, list): path or list of paths to merge
# password (str): password for encrypted files
# writer (PdfFileWriter): output writer to add pdf files
# rules (str): pages and rotation rules
# Returns: (PdfFileWriter): The merged PDF is ready for output.
def add(path, password="", writer=None, rules=RULE_DEFAULT):
    if writer is None:
        writer = PdfFileWriter()

    if isinstance(path, list):  # merge all the paths
        for subpath in path:
            writer = add(subpath, password, writer, rules)
    else:
        match = RE_HAS_RULE.search(path)
        if match:
            path, rules = match.groups()
        rules = re.sub(r"\s", "", rules)

        if os.path.isdir(path):
            path = os.path.join(path, "*.pdf")

        if "*" in path:
            writer = add(glob(path), password, writer, rules)
        else:
            assert os.path.isfile(path), ERROR_PATH.format(path)
            reader = PdfFileReader(open(path, "rb"))
            if reader.isEncrypted:
                reader.decrypt(password)

            for rule in rules.split(","):
                match = RE_RULE.search(rule)
                assert match, ERROR_RULE.format(rule)
                _, _, _, rotate = match.groups()
                for page in rangify(match, reader.getNumPages()):
                    writer.addPage(
                        reader.getPage(page - 1).rotateClockwise(
                            RULE_ROTATE[rotate]
                        )
                    )
    return writer

In [7]:
# path (list): path or list of paths to merge
# output (str): output file name
# password (str): password for encrypted files (default: "")
def merge(paths, output, password=""):
    writer = add(paths, password)
    with open(output, "wb") as stream:
        writer.write(stream)

### 2. Intialise Web Application Here

In [8]:
import requests 

from flask import Flask
from flask import render_template
from flask import request
from flask import redirect

from werkzeug.wrappers import Request, Response

In [9]:
cwd=os.getcwd()
cwd

'C:\\Users\\xuema\\Desktop\\GitHub_Repos\\pdf-3-in-1'

In [10]:
hostname="localhost"
port=9000
base_url="http://"+hostname+":"+str(port)

pdf_files=[]
pdf_files_json=""

output="output.pdf"  
download_filename="/".join([base_url,"static","uploads",output])
output_filename="\\".join([cwd,"static","uploads",output])
        
def shutdown_server():
    func = request.environ.get("werkzeug.server.shutdown")
    if func is None:
        raise RuntimeError("Not running with the Werkzeug Server")
    func()

app = Flask(__name__)
app.config["PDF_UPLOADS"] = "\\".join([cwd,"static","uploads"])

# API to shutdown server
@app.route("/shutdown", methods=["GET","POST"])
def shutdown():
    shutdown_server()
    return "Server is shutting down..."

@app.route("/")
def index():
    title="PDF 3-in-1"
    return render_template("public/index.html", message=title, template_folder="templates", static_folder="static")

@app.route("/upload-pdf", methods=["GET", "POST"])
def upload_pdf():
    if request.method == "POST":
        if request.files:
            pdf = request.files["pdf"]
            pdf.save(os.path.join(app.config["PDF_UPLOADS"], pdf.filename))
            pdf_filepath="\\".join([cwd,"static","uploads",pdf.filename])
            
            no_of_pages = PdfFileReader(open(pdf_filepath,"rb")).getNumPages()
            pdf_jsonOb={
              "filename": pdf.filename,
              "filepath": pdf_filepath,
              "totalNoOfPages": no_of_pages,
              "startPage": 1,
              "endPage": no_of_pages,
              "rotation": ""
            }
            pdf_files.append(pdf_jsonOb)
            pdf_files_json=str(pdf_files)
            pdf_files_json=pdf_files_json.replace("'","\"")
            
            return redirect(request.base_url+"#"+pdf_files_json)
    return render_template("public/upload_pdf.html")

@app.route("/process-pdf", methods=["GET", "POST"])
def process_pdf():
    if request.method == "POST":
        pdf_files=request.form["pdf_files"]
        import json
        pdf_files=json.loads(pdf_files)
        
        to_merge=[]
        #[START][..][END][ROTATE]
        for fileObj in pdf_files:
            options=str(fileObj["filepath"])
            no_of_pages = int(fileObj["totalNoOfPages"])
            if(no_of_pages>1):
                try:
                    START=str(fileObj["startPage"])
                    END = str(fileObj["endPage"])
                except:
                    START=""
                    END=""

            ROTATE=str(fileObj["rotation"]).strip().upper()

            page_range=[]
            page_range.append(START)
            page_range.append(END)
            page_range=list(filter(lambda x: x != "", page_range))
            if(len(page_range)==0):
                page_range=""
                if(ROTATE != ""):
                    options+="["+ROTATE+"]"
            else:
                page_range=str(START)+".."+str(END)
                if(ROTATE == ""):
                    options+="["+page_range+"]"
                else:
                    options+="["+page_range+ROTATE+"]"

            #print(options)
            to_merge.append(options)
        merge(to_merge, output_filename)
        
        list_of_files = os.listdir(app.config["PDF_UPLOADS"])
        for old_pdf_file in list_of_files:
            old_pdf_filepath="\\".join([app.config["PDF_UPLOADS"],old_pdf_file])
            if old_pdf_file != "output.pdf":
                try:
                    os.remove(old_pdf_filepath)
                except:
                    dummy=old_pdf_filepath
        
        return redirect(request.base_url)
    
    return "Download merged pdf at: <a href='"+download_filename+"' download='"+ output+"' target='blank'>output.pdf</a>"
    
def upload_pdf():
    if request.method == "POST":
        if request.files:
            pdf = request.files["pdf"]
            pdf.save(os.path.join(app.config["PDF_UPLOADS"], pdf.filename))
            pdf_filepath="\\".join([cwd,"static","uploads",pdf.filename])
            
            no_of_pages = PdfFileReader(open(pdf_filepath,"rb")).getNumPages()
            pdf_jsonOb={
              "filename": pdf.filename,
              "filepath": pdf_filepath,
              "totalNoOfPages": no_of_pages,
              "startPage": 1,
              "endPage": no_of_pages,
              "rotation": ""
            }
            pdf_files.append(pdf_jsonOb)
            #print("Pdf uploaded at: " + pdf_filepath)
            pdf_files_json=str(pdf_files)
            pdf_files_json=pdf_files_json.replace("'","\"")
            
            return redirect(request.base_url+"#"+pdf_files_json)
    return render_template("public/upload_pdf.html")

if __name__ == "__main__":
    from werkzeug.serving import run_simple
    run_simple(hostname, port, app)

 * Running on http://localhost:9000/ (Press CTRL+C to quit)
127.0.0.1 - - [01/Nov/2020 09:19:05] "[37mGET / HTTP/1.1[0m" 200 -
127.0.0.1 - - [01/Nov/2020 09:19:18] "[37mGET /shutdown HTTP/1.1[0m" 200 -


<a href="http://localhost:9000/shutdown" target="blank">Quit Application</a>