In [1]:
# OpenCV for image processing
import cv2                 
# Tesseract for text extraction
import pytesseract         
# Pandas for data manipulation
import pandas as pd        
# Regular expressions for text pattern matching
import re                  

# Class to process license plates
class LicensePlateProcessor:
    def __init__(self, excel_file, license_data_file):
        # Constructor to initialize class attributes
        self.excel_file = excel_file
        self.license_plate_details = []
        # Store the path to the license data Excel file
        self.license_data_file = license_data_file
        # Initialize a list to store unprocessed image paths
        self.unprocessed_images = []

    # Read license data from an Excel file
    def read_license_data(self):
        try:
            read_file = pd.read_excel(self.license_data_file)
            return read_file
                
        except Exception as e:
            # Handle exceptions, print an error message, and return None
            print(f"Error reading license data from '{self.license_data_file}': {str(e)}")
            return None

    # Process an image and extract the license plate text
    def process_image(self, image_path):
        try:
            # Load the image
            image = cv2.imread(image_path)

            # Check if the image is loaded successfully
            if image is None:
                print(f"Error: Unable to load image from '{image_path}'")
                return None

            # Convert the image to grayscale
            gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)

            # Apply bilateral filtering for noise reduction
            gray = cv2.bilateralFilter(gray, 11, 17, 17)

            # Detect edges in the image using Canny edge detection
            edged = cv2.Canny(gray, 170, 200)

            # Find contours in the edged image
            contours, _ = cv2.findContours(edged.copy(), cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE)
            contours = sorted(contours, key=cv2.contourArea, reverse=True)[:10]

            # Initialize variables for license plate
            license_plate = None

            # Loop through contours to find a rectangle representing the license plate
            for contour in contours:
                perimeter = cv2.arcLength(contour, True)
                approx = cv2.approxPolyDP(contour, 0.02 * perimeter, True)
                if len(approx) == 4:
                    license_plate = approx
                    break

            # If a license plate is found
            if license_plate is not None:
                x, y, w, h = cv2.boundingRect(license_plate)
                license_plate_image = gray[y:y + h, x:x + w]
                
                # Use Tesseract to extract text from the license plate image
                license_plate_text = pytesseract.image_to_string(license_plate_image, config='--psm 6')
                
                # Clean the extracted text by removing non-alphanumeric characters
                cleaned_string = re.sub(r'[^A-Za-z0-9]', '', license_plate_text)

                return cleaned_string

        except Exception as e:
            # Handle exceptions, print an error message, and return None
            print(f"Error processing image '{image_path}': {str(e)}")
            return None

    # Process a batch of images
    def process_images(self):
        try:
            # Load the image file paths from an Excel file
            read_file = pd.read_excel(self.excel_file)

            # Iterate through the rows of the Excel file
            for index, row in read_file.iterrows():
                image_path = row['path']

                # Process the image and extract the license plate text
                license_plate_text = self.process_image(image_path)

                if license_plate_text:
                    # Determine the state of the license plate text
                    state = self.get_license_plate_state(license_plate_text)

                    # Store the image details in a dictionary
                    image_details = {
                        'image_path': image_path,
                        'license_plate_text': license_plate_text,
                        'state': state
                    }

                    # Append the details to a list
                    self.license_plate_details.append(image_details)
                else:
                    # If license plate text was not extracted, add the image path to the unprocessed_images list
                    self.unprocessed_images.append(image_path)

            # After processing all images, check if there are unprocessed images
            if self.unprocessed_images:
                # Create a DataFrame for unprocessed image paths
                unprocessed_df = pd.DataFrame({'Unprocessed_Images': self.unprocessed_images})
                
                # Save the unprocessed image paths to a separate Excel file
                unprocessed_df.to_excel('unprocessedImage.xlsx', index=False)

        except Exception as e:
            # Handle exceptions and print an error message
            print(f"Error processing images from Excel: {str(e)}")

    # Determine the state of a license plate
    def get_license_plate_state(self, license_plate_text):
        # Check if the extracted text matches the Sydney format (two letters - two numbers - two letters) or (three letters - two numbers - one letter)
        if re.match(r'^[A-Za-z]{2}\d{2}[A-Za-z]{2}$', license_plate_text) or re.match(r'^[A-Za-z]{3}\d{2}[A-Za-z]$', license_plate_text):
            return 'NSW'
        # Check if the extracted text matches the Queensland format (three numbers - three letters)
        elif re.match(r'^\d{3}[A-Za-z]{3}$', license_plate_text):
            return 'Queensland'
        # Check if the extracted text matches the Victoria format (three letters - three numbers or 1-two letters-one number-two letters) or (1 - two letters - one number - two letters)
        elif re.match(r'^[A-Za-z]{3}\d{3}$', license_plate_text) or re.match(r'^1[A-Za-z]{2}\d[A-Za-z]{2}$', license_plate_text):
            return 'Victoria'
        # Check if the extracted text matches the South Australia format (S-three numbers - three letters)
        elif re.match(r'^S\d{3}[A-Za-z]{3}$', license_plate_text):
            return 'South Australia'
        # Check if the extracted text matches the ACT format (Y- two letter- two numbers - one letter)
        elif re.match(r'^Y[A-Za-z]{2}\d{2}[A-Za-z]$', license_plate_text):
            return 'ACT'
        # Check if the extracted text matches the Western Australia format (one number - three letters - three numbers)
        elif re.match(r'^\d[A-Za-z]{3}\d{3}$', license_plate_text):
            return 'Western Australia'
        # Check if the extracted text matches the Tasmania format (one letter - two numbers - two letters)
        elif re.match(r'^[A-Za-z]\d{2}[A-Za-z]{2}$', license_plate_text):
            return 'Tasmania'
        # Check if the extracted text matches the Northern Territory format (C- one letter - two numbers - two letters)
        elif re.match(r'^C[A-Za-z]\d{2}[A-Za-z]{2}$', license_plate_text):
            return 'Northern Territory'
        else:
            return 'Not from the state of australia'

    # Display license plate details
    def display_license_plate_details(self):
        # ANSI escape codes for bold, italic, and red text
        bold_red_text = "\033[1;31mLicense plate extracted from image\033[0m"
        
        # Print a header
        print('========================================')
        print(bold_red_text)
        print('========================================')
        print()
        
        # Iterate through license plate details and display them
        for details in self.license_plate_details:
            print('Image Path:', details['image_path'])
            print('License Plate Text:', '\033[1;32m'+details['license_plate_text']+'\033[0m')  # Bold and green text for license plate text
            print('State:', '\033[1;31m'+details['state']+'\033[0m')  # Bold and red text for state
            print()

    # Display existed data and count
    def display_existed_non_existed_data(self):
        bold_red_text = "\033[1;31mLicense plate extracted from image\033[0m"
        
        # Print a header
        print('========================================')
        print(bold_red_text)
        print('========================================')
        
        # Read license data from the Excel file
        license_data = self.read_license_data()
        
        if license_data is not None:
            # Create a DataFrame to store all the details from the original license data
            all_license_data_df = pd.DataFrame(license_data)

            # Create a list to store existing and non-existing licenses
            existing_licenses = []
            non_existing_licenses = []

            # Iterate through processed license plate details
            for details in self.license_plate_details:
                license_plate_text = details['license_plate_text']

                # Check if the license plate text exists in the license data
                if any(all_license_data_df['License_Plate'] == license_plate_text):
                    existing_licenses.append(details)
                else:
                    non_existing_licenses.append(details)

            # Create DataFrames for existing and non-existing licenses
            existing_licenses_df = pd.DataFrame(existing_licenses)
            non_existing_licenses_df = pd.DataFrame(non_existing_licenses)

            # Save the existing and non-existing licenses DataFrames to separate Excel files
            existing_licenses_df.to_excel('existed_licenses.xlsx', index=False)
            non_existing_licenses_df.to_excel('non_existed_licenses.xlsx', index=False)

            # Count the number of existed and non-existed licenses
            num_existing_licenses = len(existing_licenses)
            num_non_existing_licenses = len(non_existing_licenses)

            # Print the counts
            print(f'Number of Existed Licenses: {num_existing_licenses}')
            print(f'Number of Non-Existed Licenses: {num_non_existing_licenses}')
            print('\033[3;31mNote: The respective files have been created with names existed_licenses and non_existed_licenses.\033[0m')

# Entry point of the program
if __name__ == "__main__":
    # Create an instance of LicensePlateProcessor and provide paths to both Excel files
    processor = LicensePlateProcessor('image.xlsx', 'license_details.xlsx')
    
    # Process images, display license plate details, and check for existed data
    processor.process_images()
    processor.display_license_plate_details()
    processor.display_existed_non_existed_data()
    
    # Print the count of unprocessed images
    print('======================================================')
    unprocessed_count = len(processor.unprocessed_images)
    print(f'Number of Unprocessed Images: {unprocessed_count}')
    print('The data are exported in excel file named as unprocessedImage.')

[1;31mLicense plate extracted from image[0m

Image Path: car_pictures/sa.jpeg
License Plate Text: [1;32m9891AST[0m
State: [1;31mNot from the state of australia[0m

Image Path: car_pictures/qld.jpeg
License Plate Text: [1;32m560FAKQUEENSLANDSUNSHINESTATE[0m
State: [1;31mNot from the state of australia[0m

Image Path: car_pictures/nsw1.png
License Plate Text: [1;32mABC123[0m
State: [1;31mVictoria[0m

Image Path: car_pictures/victoria2.png
License Plate Text: [1;32meVICTORIATHEEDUCATIONSTATE[0m
State: [1;31mNot from the state of australia[0m

Image Path: car_pictures/wa1.jpeg
License Plate Text: [1;32mWESTERNAUSTRALIAq[0m
State: [1;31mNot from the state of australia[0m

Image Path: car_pictures/southaus1.jpeg
License Plate Text: [1;32mS107AWD[0m
State: [1;31mSouth Australia[0m

Image Path: car_pictures/nsw3.png
License Plate Text: [1;32mo7SePNEWSOUTHWALES[0m
State: [1;31mNot from the state of australia[0m

Image Path: car_pictures/nsw4.png
License Plate Text: