In [1]:
import cv2
import pytesseract
import re
import os

def detect_products_and_prices(image_path, horizontal_tolerance=30, vertical_tolerance=10):
    # Load the image
    img = cv2.imread(image_path)
    if img is None:
        print(f"Error: Unable to load image {image_path}")
        return [], []

    # Convert to grayscale
    gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)

    # Extract text with bounding boxes using Tesseract
    d = pytesseract.image_to_data(img, output_type=pytesseract.Output.DICT)

    products = []
    prices = []

    # Regex pattern for detecting products at the start of the line and price with 2 decimals at the end
    product_price_regex = r"^\S+.*\d+\.\d{2}$"

    # Loop through detected text and find products and prices
    for i in range(len(d['text'])):
        text = d['text'][i].strip()
        if not text:
            continue

        x, y, w, h = d['left'][i], d['top'][i], d['width'][i], d['height'][i]

        # Detect product and price pair using the regex
        if re.match(product_price_regex, text):
            # Check if it matches the product-price pattern (start of line with product and price at the end)
            parts = text.rsplit(' ', 1)  # Split the last word (price) from the rest (product)
            if len(parts) == 2:
                product = {'text': parts[0], 'bbox': (x, y, w, h)}
                price = {'text': parts[1], 'bbox': (x + w, y, w, h)}  # Adjust price bbox as per its location

                # Add detected product and price to respective lists
                products.append(product)
                prices.append(price)

    # Find corresponding product and price pairs based on their horizontal positions
    product_price_pairs = []

    for product in products:
        for price in prices:
            product_x = product['bbox'][0]
            price_x = price['bbox'][0] + price['bbox'][2]  # Price is the rightmost part of its bounding box

            # Check if the product is to the left of the price and both are in the same vertical range (i.e., within tolerance)
            horizontal_distance = abs(product_x - price_x)
            vertical_distance = abs(product['bbox'][1] - price['bbox'][1])

            # If they are within tolerance for horizontal and vertical distance, we consider them a pair
            if horizontal_distance < horizontal_tolerance and vertical_distance < vertical_tolerance:
                product_price_pairs.append({
                    'product': product['text'],
                    'price': price['text'],
                    'horizontal_distance': horizontal_distance,
                    'vertical_distance': vertical_distance
                })

    return product_price_pairs

# Example usage for multiple images in a folder
img_folder = 'images/'
image_files = [f for f in os.listdir(img_folder) if f.endswith((".jpg", ".png", ".jpeg"))]

# Loop through each image and detect products and prices
for file_name in image_files:
    img_path = os.path.join(img_folder, file_name)
    product_price_pairs = detect_products_and_prices(img_path)

    # Print detected product-price pairs for each image
    print(f"Detected pairs in {file_name}:")
    if not product_price_pairs:
        print("No product-price pairs detected.")
    else:
        for pair in product_price_pairs:
            print(f"Product: {pair['product']} | Price: {pair['price']} | "
                  f"Horizontal Distance: {pair['horizontal_distance']} | "
                  f"Vertical Distance: {pair['vertical_distance']}")


Detected pairs in 0.jpg:
No product-price pairs detected.
Detected pairs in 1.jpg:
No product-price pairs detected.
Detected pairs in 10.jpg:
No product-price pairs detected.
Detected pairs in 11.jpg:
No product-price pairs detected.
Detected pairs in 12.jpg:
No product-price pairs detected.
Detected pairs in 13.jpg:
No product-price pairs detected.
Detected pairs in 14.jpg:
No product-price pairs detected.
Detected pairs in 15.jpg:
No product-price pairs detected.
Detected pairs in 16.jpg:
No product-price pairs detected.
Detected pairs in 17.jpg:
No product-price pairs detected.
Detected pairs in 18.jpg:
No product-price pairs detected.
Detected pairs in 19.jpg:
No product-price pairs detected.
Detected pairs in 2.jpg:
No product-price pairs detected.
Detected pairs in 3.jpg:
No product-price pairs detected.
Detected pairs in 4.jpg:
No product-price pairs detected.
Detected pairs in 5.jpg:
No product-price pairs detected.


KeyboardInterrupt: 

In [6]:
import cv2
import pytesseract
import os

def print_word_bboxes(image_path):
    # Load the image
    img = cv2.imread(image_path)
    if img is None:
        print(f"Error: Unable to load image at {image_path}")
        return

    # Extract bounding box data using Tesseract
    data = pytesseract.image_to_data(img, output_type=pytesseract.Output.DICT)

    print(f"Bounding boxes and text for {image_path}:")
    print(f"{'Text':<20} {'X':<5} {'Y':<5} {'Width':<6} {'Height':<6}")

    # Loop through each word detected by Tesseract
    for i in range(len(data['text'])):
        text = data['text'][i].strip()
        if not text:
            continue

        # Bounding box coordinates
        x, y, width, height = data['left'][i], data['top'][i], data['width'][i], data['height'][i]
        print(f"{text:<20} {x:<5} {y:<5} {width:<6} {height:<6}")
img_folder = 'images/'  # Path to your images folder
image_files = [f for f in os.listdir(img_folder) if f.endswith((".jpg", ".png", ".jpeg"))]

for file_name in image_files:
    img_path = os.path.join(img_folder, file_name)
    print_word_bboxes(img_path)
    print("-" * 50)


Bounding boxes and text for images/0.jpg:
Text                 X     Y     Width  Height
WAL*MART             154   193   340    54    
ALWAYS               192   258   93     17    
LOW                  299   256   49     17    
PRICES.              362   256   96     16    
woe                  264   277   123    76    
SUPERCENTER          255   366   132    20    
OPEN                 244   389   48     19    
24                   304   389   23     19    
HOURS                340   388   59     20    
MANAGER              244   411   84     19    
.                    344   425   4      4     
TBA                  364   410   35     19    
(515                 225   433   56     18    
)                    295   433   6      18    
986                  317   432   35     19    
-                    365   440   9      3     
1783                 389   432   46     19    
S1#                  102   456   36     18    
9748                 150   454   48     18    
OP#               

PermissionError: [WinError 32] The process cannot access the file because it is being used by another process: 'C:\\Users\\adars\\AppData\\Local\\Temp\\tess_2l57qptb_input.PNG'

In [5]:


def test_text_detection(image_path):
    # Load the image
    img = cv2.imread(image_path)
    
    # Extract all text from the image
    detected_text = pytesseract.image_to_string(img)
    
    # Print the detected text
    print("Detected Text:")
    print(detected_text)

# Example Usage
img_folder = 'images/'  # Path to your images folder
image_files = [f for f in os.listdir(img_folder) if f.endswith((".jpg", ".png", ".jpeg"))]

for file_name in image_files[:1]:
    img_path = os.path.join(img_folder, file_name)
    print(f"Testing text detection for {file_name}...")
    test_text_detection(img_path)


Testing text detection for 0.jpg...
Detected Text:
WAL*MART

ALWAYS LOW PRICES.

woe

SUPERCENTER
OPEN 24 HOURS
MANAGER . TBA
(515 ) 986 - 1783
S1# 9748 OP# 00000158 TE# 14 TRH 03178
BANANAS 00000000401 1kF
0.41 Ib @ 1 Ib /0.49 0.20 N
FRAP 001200010451 F «5.48 N
DISCOUNT GIVEN 0.57
J SII. = 11
5
y CASH TEND 1100
CHANGE DUE “5 “gg


