In [1]:
import tkinter as tk
from tkinter.filedialog import askopenfilename
import os 
import docx
import gensim.summarization
from IPython.display import display, Markdown, clear_output
import ipywidgets as widgets

In [2]:
def openfile():
    """Opens a file on the local storage via a File | Open dialog box

    Returns:
        str: The full path to the file selected via the File | Open dialog or "" if no file was selected
    """
    # Hide the additional dialog box
    root = tk.Tk()
    root.withdraw()
    root.wm_attributes('-topmost', 1)

    # Show the file open dailog box and return the filename
    filename = askopenfilename(initialdir=os.getcwd(), title="Choose Input File", filetypes = (("Word Documents","*.docx"),("Text Files","*.txt")))

    return filename

In [3]:
def getText(filename):
    """Reads the text from a .txt or .docx file

    Args:
        filename (str): The full path to the file to be read

    Returns:
        str: All of the text read from the file
    """
    extension = os.path.splitext(filename)[1][1:] # Extract the extension from the filename
    
    if extension == "txt": # If it is a .txt file just open the file and read the contents
        f = open(filename, "r")
        fullText = f.read()
        return fullText

    if extension == "docx": # if it is a .docx use the docx library to open it
        doc = docx.Document(filename)
        fullText = []

        for para in doc.paragraphs: # Append each paragraph to fullText
            fullText.append(para.text)
        
        return '\n'.join(fullText) # Join each element of fullText, separated with a \n and return the whole as a string
    
    return "" # If the filename does not have a .txt. or .docx extension, return an empty string

In [4]:
def summarise(text, word_count=300):
    """Uses text summarization to produce a summary of the text passed in

    Args:
        text (str): The text to summarize
        word_count (int, optional): The maximum number of words to limit the summary to. Defaults to 300.

    Returns:
        str: A summarized version of the text passed in.
    """
    if text == "": # If the text passed in is empty, return an empty string
        return ""

    # There are a lot of combinations of text in the input that can be preprocessed.
    # The code below is a simple example of the sort of preprocessing that can inprove the summarizrion
    striptext = text.replace('\n\n', ' ') # Replace double carriage return linefeed with a space
    striptext = striptext.replace('\n', ' ') # Replace single carriage return linefeed with a space
    striptext = striptext.replace('\t', ' ') # Replace tab with a space
    striptext = striptext.replace('i.e.', '[[ie]]') # Replace i.e. as it is interpreted as two sentences separated by full stops
    
    # Use gensim.summarization.summarize to summarize the text. The internal process is to tokenize the sentences and 
    # words, then assign importance to the words through frequency and other techniques and then to establish the scoring 
    # and weighting for each sentence.
    summary = gensim.summarization.summarize(striptext, word_count=word_count)
    
    # A simple example of post-processing - replace the full stops with a full stops, next line and put any instances of
    # i.e. back as they were before the pre-processing. Lastly return the summarization.
    return summary.replace(".", ".\n").replace('[[ie]]', 'i.e.') 

In [5]:
def printmd(string):
    """Formats and prints a string that can contain HTML markup like <b>, <H> etc.

    Args:
        string (str): The string to format and print containing markup.
    """

    display(Markdown(string)) # Use IPython.display IPython.Markdown and to format and print the string

In [6]:
def openLocalFileAndSummarize():
    """Displays a File | Open dialog for the user to select a file from local storage, then prints the title followed by a summarization of the text contained in the selected file.
    """
    file_name = openfile()
    if file_name != "": 
        printmd(f"<b>Executive Summary for {os.path.basename(file_name)}\n</b>")
        print(summarise(getText(file_name)))

In [7]:
def on_button_clicked(_):
    """Event handler for the button click

    Args:
        _ (N/A): Dummy parameter which must be passed in or an error is raised
    """
    with out:
      clear_output() # Clear the output from the last run
      openLocalFileAndSummarize() # Open the File | Open dialog box, select a file and summarize it

button = widgets.Button(description='Choose a File') # Create the button
out = widgets.Output() # Create the output
button.on_click(on_button_clicked) # Assign the event handler

https://www.sustainability-reports.com/unleashing-the-power-of-online-reporting/

In [8]:
widgets.VBox([button,out]) # Create a vertical box consisting of the button followed by the output

VBox(children=(Button(description='Choose a File', style=ButtonStyle()), Output()))