In [7]:
from PIL import Image
import cv2
import pytesseract
import numpy as np
import tkinter as tk
from tkinter import filedialog
from tkinter import scrolledtext

In [8]:
def preprocess_image(image_path):
    # Read the image using PIL
    image = Image.open(image_path)
    
    # Convert PIL Image to OpenCV format (numpy array)
    cv_image = np.array(image)
    
    # Convert to grayscale
    gray_image = cv2.cvtColor(cv_image, cv2.COLOR_BGR2GRAY)
    
    # Apply thresholding
    _, thresh_image = cv2.threshold(gray_image, 128, 255, cv2.THRESH_BINARY_INV)
    
    # Optional: Denoise the image
    denoised_image = cv2.fastNlMeansDenoising(thresh_image, None, 30, 7, 21)
    
    return denoised_image

def extract_text_from_image(preprocessed_image):
    # Use pytesseract to do OCR on the preprocessed image
    text = pytesseract.image_to_string(preprocessed_image)
    return text

def process_image(image_path):
    # Preprocess the image
    preprocessed_image = preprocess_image(image_path)
    
    # Extract text from the preprocessed image
    text = extract_text_from_image(preprocessed_image)
    
    return text

def open_file():
    # Open a file dialog to select an image file
    file_path = filedialog.askopenfilename(filetypes=[("Image Files", "*.jpg;*.jpeg;*.png")])
    
    if file_path:
        # Process the selected image and get the text
        extracted_text = process_image(file_path)
        
        # Display the extracted text in the text area
        text_area.delete(1.0, tk.END)  # Clear previous text
        text_area.insert(tk.END, extracted_text)

In [9]:
# Create the main application window
root = tk.Tk()
root.title("OCR Text Extractor")

# Create a button to open the file dialog
open_button = tk.Button(root, text="Open Image File", command=open_file)
open_button.pack(pady=10)

# Create a scrolled text area to display the extracted text
text_area = scrolledtext.ScrolledText(root, wrap=tk.WORD, width=80, height=20)
text_area.pack(padx=10, pady=10)

# Run the application
root.mainloop()