In [1]:
import tkinter as tk
from tkinter import filedialog, messagebox, ttk
import nltk
from nltk import word_tokenize, pos_tag, ne_chunk
from collections import Counter
nltk.download('punkt')
nltk.download('maxent_ne_chunker')
nltk.download('words')

class NERApp:
    def __init__(self, master):
        self.master = master
        master.title("Named Entity Recognition")
        self.create_widgets()

    def create_widgets(self):
        # Text input area
        self.text_area = ttk.Entry(self.master, width=100)
        self.text_area.grid(row=0, column=0, columnspan=3, padx=10, pady=10)

        # Buttons
        self.browse_button = tk.Button(self.master, text="Browse", command=self.load_file, bg="red")
        self.browse_button.grid(row=1, column=0, padx=10, pady=5)

        self.process_button = tk.Button(self.master, text="Process", command=self.process_text, bg="blue")
        self.process_button.grid(row=1, column=1, padx=10, pady=5)

        # Status bar
        self.status_bar = tk.Label(self.master, text="", bd=1, relief=tk.SUNKEN, anchor=tk.W)
        self.status_bar.grid(row=2, column=0, columnspan=3, sticky=tk.W + tk.E, padx=10, pady=5)

        # Result area
        self.result_notebook = ttk.Notebook(self.master)
        self.result_notebook.grid(row=3, column=0, columnspan=3, padx=10, pady=10)

        # Named Entities Table
        self.named_entities_tab = ttk.Frame(self.result_notebook)
        self.result_notebook.add(self.named_entities_tab, text='Named Entities')
        self.named_entities_tree = ttk.Treeview(self.named_entities_tab, columns=('Entity', 'Occurrences'), show='headings')
        self.named_entities_tree.heading('Entity', text='Entity')
        self.named_entities_tree.heading('Occurrences', text='Occurrences')
        self.named_entities_tree.pack(fill='both', expand=True)

        # Word Count Table
        self.word_count_tab = ttk.Frame(self.result_notebook)
        self.result_notebook.add(self.word_count_tab, text='Word Count')
        self.word_count_tree = ttk.Treeview(self.word_count_tab, columns=('Word', 'Occurrences'), show='headings')
        self.word_count_tree.heading('Word', text='Word')
        self.word_count_tree.heading('Occurrences', text='Occurrences')
        self.word_count_tree.pack(fill='both', expand=True)

        # POS Tags Table
        self.pos_tags_tab = ttk.Frame(self.result_notebook)
        self.result_notebook.add(self.pos_tags_tab, text='POS Tags')
        self.pos_tags_tree = ttk.Treeview(self.pos_tags_tab, columns=('Token', 'POS Tag'), show='headings')
        self.pos_tags_tree.heading('Token', text='Token')
        self.pos_tags_tree.heading('POS Tag', text='POS Tag')
        self.pos_tags_tree.pack(fill='both', expand=True)

    def load_file(self):
        try:
            file_path = filedialog.askopenfilename(filetypes=[("Text files", "*.txt"), ("All files", "*.*")])
            if file_path:
                with open(file_path, 'r', encoding='utf-8', errors='replace') as file:
                    text = file.read()
                    self.text_area.delete(0, tk.END)
                    self.text_area.insert(0, text)
                    self.update_status("File loaded successfully.")
        except Exception as e:
            messagebox.showerror("Error", f"Error loading file: {str(e)}")

    def process_text(self):
        try:
            text = self.text_area.get()
            messagebox.showinfo("Processing", "Processing text. This may take a moment...")
            entity_count, word_count, pos_tags = self.analyze_text(text)
            # Display the result in tables
            self.display_named_entities(entity_count)
            self.display_word_count(word_count)
            self.display_pos_tags(pos_tags)
            self.update_status("Text processed successfully.")
        except Exception as e:
            messagebox.showerror("Error", f"Error processing text: {str(e)}")

    def analyze_text(self, text):
        words = word_tokenize(text)
        pos_tags = pos_tag(words)
        named_entities = ne_chunk(pos_tags)
        entity_count = Counter()
        word_count = Counter()
        for entity in named_entities:
            if isinstance(entity, tuple):
                # Counting individual words
                word_count[entity[0]] += 1
            else:
                entity_name = ' '.join([word[0] for word in entity.leaves()])
                entity_count[entity_name] += 1
        return entity_count, word_count, pos_tags

    def display_named_entities(self, entity_count):
        self.named_entities_tree.delete(*self.named_entities_tree.get_children())
        for entity, count in entity_count.items():
            self.named_entities_tree.insert('', 'end', values=(entity, count))

    def display_word_count(self, word_count):
        self.word_count_tree.delete(*self.word_count_tree.get_children())
        for word, count in word_count.items():
            self.word_count_tree.insert('', 'end', values=(word, count))

    def display_pos_tags(self, pos_tags):
        self.pos_tags_tree.delete(*self.pos_tags_tree.get_children())
        for token, pos_tag in pos_tags:
            self.pos_tags_tree.insert('', 'end', values=(token, pos_tag))

    def update_status(self, message, error=False):
        if error:
            self.status_bar.config(fg='red')
        else:
            self.status_bar.config(fg='black')
        self.status_bar.config(text=message)

# Create the Tkinter application
root = tk.Tk()
app = NERApp(root)

# Run the application
root.mainloop()


[nltk_data] Error loading punkt: <urlopen error [WinError 10060] A
[nltk_data]     connection attempt failed because the connected party
[nltk_data]     did not properly respond after a period of time, or
[nltk_data]     established connection failed because connected host
[nltk_data]     has failed to respond>
[nltk_data] Error loading maxent_ne_chunker: <urlopen error [WinError
[nltk_data]     10060] A connection attempt failed because the
[nltk_data]     connected party did not properly respond after a
[nltk_data]     period of time, or established connection failed
[nltk_data]     because connected host has failed to respond>
[nltk_data] Error loading words: <urlopen error [WinError 10054] An
[nltk_data]     existing connection was forcibly closed by the remote
[nltk_data]     host>
