In [1]:
import torch
torch.cuda.empty_cache()


In [2]:
import tkinter as tk
from tkinter import ttk, messagebox
import os
import threading
import joblib
from sentence_transformers import SentenceTransformer
import pandas as pd
import time
import csv
from fpdf import FPDF
import datetime
import random
import os

  from tqdm.autonotebook import tqdm, trange





In [3]:
# Check if __file__ is available (in scripts), else use the current working directory (for Jupyter)
try:
    script_dir = os.path.dirname(os.path.abspath(__file__))
except NameError:
    script_dir = os.getcwd()  # Fallback to the current working directory in Jupyter or interactive environments

# Define the directory paths for application forms and the main script
applications_dir = os.path.join(script_dir, 'Search report', 'Application Forms')
main_script_dir = os.path.join(script_dir, 'Search report')

In [4]:
class SearchReportPDF(FPDF):
    def __init__(self, application_no, *args, **kwargs):
        super().__init__(*args, **kwargs)
        self.application_no = application_no  # Store the application number

    def header(self):
        # Title outside the box, inside the top margin
        self.set_y(10)  # Position inside the top margin
        self.set_font("Arial", 'B', 14)
        self.cell(0, 10, "INTERNATIONAL SEARCH REPORT", 0, 1, 'C')
        self.set_font("Arial", '', 12)
        # Insert the dynamically provided application number
        self.cell(0, 10, f"International Application No.: {self.application_no}", 0, 1, 'C')
        self.ln(2)
        
    def section_title(self, title):
        self.set_font("Arial", 'B', 12)
        self.cell(0, 10, title, 0, 1, 'L')
        self.ln(1)

    def section_body(self, body):
        self.set_font("Arial", '', 9)
        self.multi_cell(self.w - 2 * self.l_margin, 10, body)
        self.ln(1)

    def table_with_title(self, title, headers, data, col_widths):
        # Ensure table respects margins
        total_width = sum(col_widths)
        if total_width > (self.w - 2 * self.l_margin):
            scale_factor = (self.w - 2 * self.l_margin) / total_width
            col_widths = [width * scale_factor for width in col_widths]
        
        # Add table title
        self.section_title(title)
        
        # Add table headers
        self.set_font("Arial", 'B', 8)
        line_height = self.font_size * 2
        self.set_fill_color(200, 200, 200)  # Light gray background for headers
        for header, width in zip(headers, col_widths):
            self.cell(width, line_height, header, align='C', fill=True)
        self.ln(line_height)
        
        # Add a line below the header
        self.line(self.l_margin, self.get_y(), self.w - self.r_margin, self.get_y())
        
        # Add table data with horizontal lines between rows
        self.set_font("Arial", '', 8)
        for row in data:
            max_line_height = max(self.get_string_height(width, datum) for datum, width in zip(row, col_widths))
            y_before = self.get_y()
            for datum, width in zip(row, col_widths):
                # Use multi_cell to handle text wrapping
                x_before = self.get_x()
                self.multi_cell(width, 5, datum, align='L')
                self.set_xy(x_before + width, y_before)
            self.ln(max_line_height)
            # Add a horizontal line after each row
            self.line(self.l_margin, self.get_y(), self.w - self.r_margin, self.get_y())
        self.ln(5)

    def get_string_height(self, width, text):
        # Helper function to calculate height of a multi-line string
        num_lines = self.get_string_width(text) / width
        return (num_lines + 1) * self.font_size * 2



In [5]:
# Function to load submitted patent applications
def load_patent_applications():
    applications = []
    if os.path.exists(applications_dir):
        applications = [app for app in os.listdir(applications_dir) if os.path.isdir(os.path.join(applications_dir, app))]
    return applications

In [6]:
# Function to load and display selected patent application details
def load_application_details(event):
    selected_app = application_listbox.get(application_listbox.curselection())
    display_application_details(selected_app)

In [7]:
# Function to display application details
def display_application_details(title):
    patent_dir = os.path.join(applications_dir, title)

    # Clear previous details
    details_textbox.delete(1.0, tk.END)

    # Load and display CSV data
    csv_file = os.path.join(patent_dir, f'{title}.csv')
    if os.path.exists(csv_file):
        with open(csv_file, 'r') as file:
            reader = csv.reader(file)
            headers = next(reader)
            data = next(reader)
            details_textbox.insert(tk.END, f"Patent Title: {title}\n\n")
            for header, value in zip(headers, data):
                details_textbox.insert(tk.END, f"{header.replace('_', ' ').capitalize()}: {value}\n")

    # List uploaded files
    details_textbox.insert(tk.END, "\nUploaded Documents:\n")
    for file in os.listdir(patent_dir):
        if file.endswith(('.pdf', '.jpeg')):
            details_textbox.insert(tk.END, f"- {file}\n")


In [8]:
# Function to predict IPC code in a background thread
def predict_ipc_code():
    try:
        selected_app = application_listbox.get(application_listbox.curselection())
        if selected_app:
            # Path to the CSV file of the selected patent application
            application_folder = os.path.join(applications_dir, selected_app)

            # Create a new thread to run the IPC prediction and pass 'selected_app'
            thread = threading.Thread(target=run_ipc_prediction, args=(application_folder, selected_app))
            thread.start()
        else:
            messagebox.showwarning("No Selection", "Please select a patent application to predict the IPC code.")
    except IndexError:
        messagebox.showwarning("No Selection", "Please select a patent application to predict the IPC code.")

In [9]:
# Function to run the IPC prediction and show the progress bar
def run_ipc_prediction(application_folder, selected_app):
    try:
        # Show progress bar
        progress_window = tk.Toplevel(root)
        progress_window.title("Predicting IPC Code...")
        progress_label = tk.Label(progress_window, text="Please wait, predicting IPC code...")
        progress_label.pack(pady=10)
        progress_bar = ttk.Progressbar(progress_window, orient="horizontal", length=300, mode="indeterminate")
        progress_bar.pack(pady=20)
        progress_bar.start()

        # Load classifier, label encoder, and SentenceTransformer model on CPU
        classifier_file_path = os.path.join(main_script_dir, 'ipc_section_classifier.pkl')
        label_encoder_file_path = os.path.join(main_script_dir, 'ipc_section_label_encoder.pkl')
        model = SentenceTransformer('AI-Growth-Lab/PatentSBERTa')  # Force model to run on CPU

        loaded_classifier = joblib.load(classifier_file_path)
        loaded_label_encoder = joblib.load(label_encoder_file_path)

        # CSV file path
        csv_file_path = os.path.join(application_folder, f'{os.path.basename(application_folder)}.csv')
        if not os.path.exists(csv_file_path):
            raise FileNotFoundError(f"CSV file not found at {csv_file_path}")

        # Load the CSV into a pandas DataFrame
        df = pd.read_csv(csv_file_path)

        # Debugging: Print the column names
        print("CSV Column Names:", df.columns.tolist())

        # Check for required columns
        if 'claims' not in df.columns or 'title' not in df.columns:
            raise ValueError(f"'claims' or 'title' column not found in {csv_file_path}. Found columns: {df.columns.tolist()}")

        # Initialize predictions list
        predictions = []

        # Generate a publication number based on the current time
        publication_number = int(time.time())

        # Iterate through each row in the dataframe
        for index, row in df.iterrows():
            claims = row['claims']
            title = row['title']

            if claims:
                # Encode the input claims into embeddings
                claims_embedding = model.encode([claims])

                # Predict using the loaded classifier
                predicted_encoded_section = loaded_classifier.predict(claims_embedding)

                # Decode the predicted section back to the IPC format
                predicted_section = loaded_label_encoder.inverse_transform(predicted_encoded_section)

                # Append the results to the predictions list
                predictions.append([publication_number, title, predicted_section[0]])

                # Display the predicted IPC code in the title_ipc_label in the desired format
                title_ipc_label.config(text=f"{title} - Classification ({predicted_section[0]})")

        # Convert predictions to a DataFrame and save to CSV
        predictions_df = pd.DataFrame(predictions, columns=['publication_number', 'title', 'ipc'])
        output_csv_file_path = os.path.join(application_folder, 'test_ipc_codes.csv')
        predictions_df.to_csv(output_csv_file_path, mode='w', header=True, index=False)

        # Stop the progress bar and close the window when done
        progress_bar.stop()
        progress_window.destroy()

        # Display the "Generate Search Report" button after prediction
        generate_report_button = ttk.Button(frame_right, text="Generate Search Report", 
                                            command=lambda: generate_search_report(application_folder))
        generate_report_button.pack(pady=10)

        messagebox.showinfo("Success", f"IPC code predicted and saved for {selected_app}")

    except Exception as e:
        progress_window.destroy()
        messagebox.showerror("Error", f"Failed to predict IPC code: {str(e)}")


In [10]:
# Generate_search_report function to handle dynamic folder
def generate_search_report(application_folder):
    try:
        # Load the predicted IPC code data from the generated CSV
        csv_file_path = os.path.join(application_folder, 'test_ipc_codes.csv')
        if not os.path.exists(csv_file_path):
            raise FileNotFoundError(f"CSV file not found at {csv_file_path}")

        # Load the CSV data into a DataFrame
        df = pd.read_csv(csv_file_path)

        # Generate the current year and a random unique 6-digit number
        current_year = datetime.datetime.now().year
        unique_number = random.randint(100000, 999999)

        # Create the dynamically generated International Application Number
        application_no = f"PCT/IB{current_year}/{unique_number}"

        # Create instance of FPDF with dynamic application number
        pdf = SearchReportPDF(application_no, 'P', 'mm', 'A4')

        # Set 3 cm margins
        pdf.set_margins(30, 30, 30)

        # Add a page
        pdf.add_page()

        # Keep duplicates for CLASSIFICATION OF SUBJECT MATTER
        ipc_codes_classification = "; ".join(df['ipc'].tolist())

        # Remove duplicate IPC codes for FIELDS SEARCHED by converting to a set and then join them into a single string
        ipc_codes_fields_searched = "; ".join(sorted(set(df['ipc'].tolist())))

        # Section title and body for CLASSIFICATION OF SUBJECT MATTER
        pdf.section_title("CLASSIFICATION OF SUBJECT MATTER")
        pdf.section_body(ipc_codes_classification)

        # **New**: Extract and add the Patent Title after "CLASSIFICATION OF SUBJECT MATTER"
        if 'title' in df.columns:
            title = df['title'].iloc[0]  # Get the first title from the CSV (assuming all rows have the same title)
            pdf.section_title("Patent Title")
            pdf.section_body(title)

        # Replace A01C with ipc_codes in the "FIELDS SEARCHED" section, removing duplicates
        pdf.section_title("FIELDS SEARCHED")
        pdf.section_body(f"Minimum documentation searched (classification symbols): {ipc_codes_fields_searched}\n"
                        "Documentation searched other than minimum documentation to the extent that such documents are included in the fields searched.\n"
                        "Electronic database consulted during the international search: EPAB")

        # DOCUMENTS CONSIDERED TO BE RELEVANT
        header = ["Category", "Citation of Document (with relevant passages)", "Relevant to Claim No."]
        col_widths = [20, 120, 40]

        rows = [
            ["X", "EP 3072376 A2 (BASF SE) - 28 September 2016\nParagraphs: [0020], [0023]-[0024], [0028], [0055], [0058]-[0059], [0078]", "1-15"],
            ["A", "CN 101014914 A (Agrium Polymer Coatings Corp) - 08 August 2007\nClaims: 1-58", "1-15"],
            ["A", "CN 103442547 A (Univ. Muenchen Tech) - 11 December 2013\nClaims: 1-10", "1-15"],
            ["A", "CN 204807546 U (Beijing Shennongyuan Biotechnology Dev. Co. Ltd) - 25 November 2015", "1-15"],
            ["A", "CN 1243112 A (LI Fengjie) - 02 February 2000\nClaims: 1-9", "1-15"]
        ]

        pdf.table_with_title("DOCUMENTS CONSIDERED TO BE RELEVANT", header, rows, col_widths)

        # Special Categories of Cited Documents as a Table
        header_special = ["Category", "Description"]
        col_widths_special = [20, 150]

        rows_special = [
            ["A", "Document defining the general state of the art, not considered to be of particular relevance"],
            ["T", "Later document published after the international filing date or priority date, cited to understand the principle or theory underlying the invention"],
            ["X", "Document of particular relevance; the claimed invention cannot be considered novel or involves no inventive step when taken alone"],
            ["E", "Document published on or after the international filing date"],
            ["O", "Document referring to an oral disclosure, use, exhibition, or other means"]
        ]

        pdf.table_with_title("Special Categories of Cited Documents", header_special, rows_special, col_widths_special)

        # Get the current date in the format "DD Month YYYY"
        today = datetime.datetime.now().strftime("%d %B %Y")

        # Dates
        pdf.section_title("Dates")
        dates = f"- Date of actual completion of the international search: {today}\n" \
                f"- Date of mailing of the international search report: {today}"
        pdf.section_body(dates)

        # Name and Mailing Address of the ISA/CN with Indentation
        pdf.section_title("Name and Mailing Address of the ISA/CN")
        address = "Confused Electrons:\n        Arnab Saha\n        Tran Le Phuong Lan\n        Mauricio Rodriguez Alas\n        Ralph Ryan Hebrio"
        pdf.section_body(address)

        # Replace the Authorized Officer section with the provided names
        pdf.section_title("Authorized Officers")
        authorized_officers = (
            "David Horat - Director of Patent Knowledge, Directorate-General 5 Legal and International Affairs\n"
            "Åsa Ribbe - Principal Director Operations, Directorate-General 1 Patent Granting Process\n"
            "Sylvia Kok-de Vries - Director of Prospects and Studies, Directorate-General 4 Corporate Services\n"
            "Franco Mascia - Data Scientist, Directorate-General 1 Patent Granting Process and Business Information Technology\n"
            "Diego Eguidazu Alonso - Chief Information Officer, Business Information Technology"
        )
        pdf.section_body(authorized_officers)

        # INFORMATION ON PATENT FAMILY MEMBERS
        header_family = ["International Application No.", "Application Number"]
        rows_family = [[application_no]]
        col_widths_family = [80, 110]

        # Output the PDF
        output_pdf_path = os.path.join(application_folder, 'international_search_report.pdf')
        pdf.output(output_pdf_path)

        messagebox.showinfo("Success", f"Search report generated and saved as {output_pdf_path}")

    except Exception as e:
        messagebox.showerror("Error", f"Failed to generate search report: {str(e)}")


In [11]:


# Create the main window for the patent examiner interface
root = tk.Tk()
root.title("Patent Examiner Dashboard")
root.geometry("900x700")

# Header Label
header_label = tk.Label(root, text="European Patent Office", font=("Helvetica", 20, "bold"), bg='#007acc', fg='white')
header_label.pack(fill='x', pady=10)

# Frame for the list of patent applications
frame_left = tk.Frame(root, bg='white')
frame_left.pack(side='left', fill='y', padx=20, pady=20)

# Label for the list of applications
application_label = tk.Label(frame_left, text="Submitted Patent Applications", font=("Helvetica", 12, "bold"), bg='white')
application_label.pack(pady=10)

# Listbox to display the patent applications
application_listbox = tk.Listbox(frame_left, height=20, width=40)
application_listbox.pack(pady=10)

# Load submitted applications into the listbox
applications = load_patent_applications()
for app in applications:
    application_listbox.insert(tk.END, app)

# Bind selection event to load details
application_listbox.bind("<<ListboxSelect>>", load_application_details)

# Frame for displaying details of the selected application
frame_right = tk.Frame(root, bg='white')
frame_right.pack(side='right', fill='both', expand=True, padx=20, pady=20)

# Label for application details
details_label = tk.Label(frame_right, text="Application Details", font=("Helvetica", 12, "bold"), bg='white')
details_label.pack(pady=10)

# Textbox to display the details of the selected application
details_textbox = tk.Text(frame_right, wrap='word', height=20, width=60)
details_textbox.pack(pady=10)

# Button to predict IPC code
predict_ipc_button = ttk.Button(frame_right, text="Predict IPC Code", command=predict_ipc_code)
predict_ipc_button.pack(pady=10)

# ** New ** Label to display Patent Title - Classification (IPC)
title_ipc_label = tk.Label(frame_right, text="", font=("Helvetica", 12, "bold"), bg='white', fg='blue')
title_ipc_label.pack(pady=10)

# Start the main loop
root.mainloop()