# Instructions

How to run the code:
1.   Hover the mouse around the "Show code" text to reveal the play button (if necessary).
2.   Click the play button for each of A-C in order.
3.   After clicking the play button C, upload the PDF layout data and the CSV wordlist using the "Choose Files" button.

**Notes**

Download the CSV template [here](https://drive.google.com/file/d/1PH4qmrRmNpnErzLm_0JSKDbz0ArO_K_7/view?usp=sharing)

Depending on the size of the files, it can take up to 15 minutes or longer for the process to complete. Do not close the web browser tab or window until both output files--the highlighted PDF and summary text file--have finished downloading.

In [None]:
#@title A. Click the play button to set up the script.{display-mode: "form"}
!pip install pymupdf
!pip install frontend

In [None]:
#@title B. Click the play button to initialize the script. Changing the comment name is optional.{display-mode: "form"}
def comment_pdf(input_file:str, list_filename_csv:str, pages:list=None):
    import fitz
    from google.colab import files

    comment_name = "LCI-QA" #@param {type:"string"}
    search_list = read_csv(list_filename_csv)
    # create matches dictionary for output summary
    matches_record = {search[0]: 0 for search in search_list}
    # open pdf
    pdfIn = fitz.open(input_file)
    # Iterate throughout pdf pages
    for pg,page in enumerate(pdfIn):
        pageID = pg+1
        # If required to look in specific pages
        if pages and pageID not in pages:
            continue
        # Use the search_for function to find text
        for search_settings in search_list:
            word, comment, color = search_settings
            matched_values = page.search_for(word,hit_max=20)
            if matched_values:
                # Update matches_record
                matches_record[word] += len(matched_values)
                highlight_text(matched_values, page, color, comment_name, comment)
    
    # Save to output file
    output_file = input_file.split(".")[0] + " comments.pdf"
    pdfIn.save(output_file,garbage=3,deflate=True)
    pdfIn.close()
    
    create_summary(input_file, output_file, comment_name, matches_record)

    if output_file:
        files.download(output_file)
        files.download('summary.txt')

def read_csv(list_filename_csv):
    import csv
    with open(list_filename_csv, 'r') as csv_data:
        csv_reader = csv.reader(csv_data)
        header = next(csv_reader) # skips the first row
        search_list = [row for row in csv_reader]
    return search_list

def highlight_text(matched_values, page, color, comment_name, comment):
    colors = {
        "red": [0.7, 0.35, 0.5],
        "green": [0.35, 0.7, 0.5],
        "blue": [0.35, 0.5, 0.7]
    }
    for item in matched_values:
        # Highlight found text
        annot = page.add_highlight_annot(item)
        if color:
            annot.set_colors(stroke=colors[color])
        # Add comment to the found match
        info = annot.info
        info["title"] = comment_name
        info["content"] = comment
        annot.set_info(info)
        annot.update(opacity=0.4)

def create_summary(input_file, output_file, comment_name, matches_record):
    summary = {
        "Input File": input_file,
        "Output File": output_file,
        "Comment Title": comment_name,
        "Matching Instances": "\n" + "\n".join("{}: {}".format(word, count) for word, count in matches_record.items())
    }
    # Export Process Summary
    with open('summary.txt', 'w') as summary_txt:
        summary_txt.write("\n".join("{}: {}".format(i, j) for i, j in summary.items()))


In [None]:
#@title C. Click the play button to upload the files.{display-mode: "form"}
from google.colab import files

uploaded = files.upload()
message = ""
for filename in uploaded:
    if filename.endswith(".pdf"):
      pdf_file = filename
      message += "PDF successfully uploaded!\nPush play again and upload the CSV."
    if filename.endswith(".csv"):
      keyword_list_file = filename
      message += "CSV successfully uploaded!\nPush play again and upload the PDF."

try:
    comment_pdf(input_file="/content/"+pdf_file, list_filename_csv="/content/"+keyword_list_file)
except:
    print(message)


In [None]:
#@title RESET: click the play button below to remove all files, then upload a new pair of files in step C.{display-mode: "form"}
!rm summary.txt
!rm *csv
!rm *.pdf
print('All files removed!\nReady to upload new files.')

All files removed!
Ready to upload new files.
