In [None]:
import os
import cv2
import pytesseract
from PIL import Image
import numpy as np 

# Directory containing the number plate images
input_dir = 'number_plates'
output_dir = 'extracted_texts'
os.makedirs(output_dir, exist_ok=True)

# Ensure Tesseract is installed on your system
# You may need to set the tesseract_cmd if it's not in your PATH
# pytesseract.pytesseract.tesseract_cmd = r'/usr/local/bin/tesseract'  # Example path for Unix
# pytesseract.pytesseract.tesseract_cmd = r'C:\Program Files\Tesseract-OCR\tesseract.exe'  # Example path for Windows

# List all files in the directory
files = [f for f in os.listdir(input_dir) if os.path.isfile(os.path.join(input_dir, f))]

# Function to perform OCR on an image
def extract_text_from_image(image_path):
    # Load the image
    image = Image.open(image_path)
    
    # Convert the image to grayscale (optional but often improves OCR results)
    gray_image = cv2.cvtColor(np.array(image), cv2.COLOR_BGR2GRAY)
    
    # Perform OCR using pytesseract
    text = pytesseract.image_to_string(gray_image, config='--psm 8')
    
    return text.strip()


In [None]:

# Iterate over each file and extract text
for file_name in files:
    file_path = os.path.join(input_dir, file_name)
    
    # Extract text from the image
    text = extract_text_from_image(file_path)
    
    # Print the extracted text
    print(f'Text from {file_name}: {text}')
    
    # Save the extracted text to a file
    output_file_path = os.path.join(output_dir, f'{os.path.splitext(file_name)[0]}.txt')
    with open(output_file_path, 'w') as output_file:
        output_file.write(text)

print('Text extraction complete.')
