In [5]:
import docx
import re
import bs4
import os
import csv
from zipfile import ZipFile, ZIP_STORED, ZipInfo
import xml.etree.ElementTree
import shutil
import tempfile

# Wczytanie bazy na samym początku programu
db_main = []

with open("db.csv", "r", encoding="utf-8", newline="") as readdb:
    reader = csv.reader(readdb, delimiter=',')
    for row in reader:
        db_main.append((row[0], row[1], eval(row[2])))

def file_analysis(path):

    master_path = path
    
    with open(master_path, "rb") as f:
        document = ZipFile(f)
        xml_content = document.read('word/document.xml')

    # xml_str = xml.etree.ElementTree.tostring(tree, encoding="unicode", method="html")
    xml_str = xml_content.decode("utf-8")

    # Wyszukiwanie wyników
    regex = r"PN(?: |-).{1,30}?(?:(?::\d{4})(?:-\d\d|))(?:[\S]+?(?:\d{4})|)(?:-\d{2}|)"
    normy = re.findall(regex, xml_str)

    # Wyszukiwanie notacji przed 1994
    regex94 = r"PN(?: |-)\d{2}/.(?:[\S]+)(?:\d)"
    normy94 = re.findall(regex94, xml_str)

    # Porównanie wyników wyszukiwania z bazą
    results = []
    found = 0

    for n in normy:
        mark = "Brak w bazie"
        state = "Nieznany" # Up-to-date
        newest = None

        for d in db_main:
            if n == d[1]:
                mark = "Znaleziono"
                state = "Aktualny"
                found += 1
                break

            elif n in d[2]:
                mark = "Znaleziono"
                state = "Nieaktualny"
                newest = d[1]
                found += 1
                break

        results.append((n, mark, state, newest))

    for n94 in normy94:
        results.append((n94, "Brak w bazie", "Notacja sprzed 1994", None))

    return results

# def final_docx(str)
#     # Podmiana stringow w xml_str

#     for positive_match in results:
#         if positive_match[3] != None:
#             xml_str = xml_str.replace(positive_match[0], positive_match[3], 1)

#     # with open('output_test.txt', 'w', encoding="utf-8") as f:
#     #     f.write(xml_str)

In [6]:
import tkinter as tk
import tkinter.ttk as ttk
import tkinter.messagebox as mb
from tkinter import filedialog as fd

In [16]:
class App(tk.Tk):
    
    def __init__(self):
        super().__init__()
        self.minsize(360, 200)
        self.maxsize(360, 200)
        
        self.title("ffnorma")
        self.heading = tk.Label(text="ffnorma", padx=15, pady=15, font=("Arial Black", 24))
        self.desc = tk.Label(text="Wyszukuje i aktualizuje numery norm w dokumentach docx", padx=15, pady=25, font=("Arial", 9))
        self.findbtn = tk.Button(text="Znajdź plik", 
                                 padx=5, pady=5, width = 10,
                                 command=self.browse)
        self.updtbtn = tk.Button(text="Analizuj...", 
                                 padx=5, pady=5, width = 10, 
                                 state=tk.DISABLED,
                                 command=self.open_window)
        
        self.filepath = tk.StringVar()
        self.initaldir = r"C:"
        self.filetypes = (("Word Documents","*.docx"), ("All files", "*.*"))

        self.heading.grid(row=0, sticky = tk.NW, columnspan=2)
        self.desc.grid(row=1, columnspan=2)
        self.findbtn.grid(row=2, column=0, sticky=tk.E, padx=5)
        self.updtbtn.grid(row=2, column=1, sticky=tk.W, padx=5)
        self.grid_columnconfigure(0, minsize=240)
        
    def browse(self):
        self.filepath.set(fd.askopenfilename(initialdir=self.initaldir,
                                             filetypes=self.filetypes))
        if self.filepath.get().endswith('.docx'):
#             mb.showinfo("Info", f"Załatdowano plik {self.filepath.get()}")
            self.updtbtn.config(state=tk.NORMAL)

    def open_window(self):
        self.raport = file_analysis(self.filepath.get())
        raport_window = Raport(self, self.raport)
        raport_window.grab_set()

        
class Raport(tk.Toplevel):
    
    def __init__(self, parent, raport):
        super().__init__(parent)
        self.label = tk.Label(self, text="Raport", padx=15, pady=15, font=("Arial", 12))
        
        self.result_headers = ["Wykryta nazwa", "Status bazy", "Status aktualności", "Aktualna nazwa"]
        self.result_list = raport

        self.tree = ttk.Treeview(self, columns=self.result_headers, show="headings")

        for col in self.result_headers:
            self.tree.heading(col, text=col.title(), 
                              command=lambda _col=col: self.treeview_sort_column(self.tree, _col, False))
        for item in self.result_list:
            self.tree.insert('', 'end', values=item)
            
        self.sb = ttk.Scrollbar(self, orient="vertical", command=self.tree.yview)      
        self.acceptbtn = tk.Button(self, text="Podmień na aktualne", command=self.replace, padx=5, pady=5, width = 20)
        self.cancelbtn = tk.Button(self, text="Anuluj", command=self.destroy, padx=5, pady=5, width = 10)

        self.label.grid(row=0, columnspan=3)
        self.tree.grid(row=1, columnspan=2, padx=10)
        self.sb.grid(row=1, column=2, sticky=tk.NSEW)
        self.tree.configure(yscrollcommand=self.sb.set)
        self.grid_columnconfigure(0, minsize=700)
        
        self.acceptbtn.grid(row=2, column=0, padx=10, pady=20, sticky=tk.SE)
        self.cancelbtn.grid(row=2, column=1, padx=10, pady=20, sticky=tk.SE)

    
    def replace(self):
        pass
    
    def treeview_sort_column(self, tv, col, reverse):
        l = [(tv.set(k, col), k) for k in tv.get_children('')]
        l.sort(reverse=reverse)

        # rearrange items in sorted positions
        for index, (val, k) in enumerate(l):
            tv.move(k, '', index)

        # reverse sort next time
        tv.heading(col, command=lambda: self.treeview_sort_column(tv, col, not reverse))



In [17]:
app = App()
app.iconbitmap('yellow-icon.ico')
app.mainloop()

# ffnorma
##### Wyszukuje i aktualizuje numery norm w dokumentach docx
Szukaj... | Analizuj...

Plik źródłowy pozostanie bez zmian.
Program utworzy kopię dokumentu z zamienionymi numerami norm w tym samym katalogu.  