In [6]:
import easyocr
import cv2
import re
import csv

# Initialize EasyOCR Reader
reader = easyocr.Reader(['en'], gpu=False)

# Preprocess the image (Optional: Blur and Grayscale)
def preprocess_image(image_path):
    img = cv2.imread(image_path)
    img = cv2.GaussianBlur(img, (5, 5), 0)  # Apply blur
    gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)  # Convert to grayscale
    return gray

# Extract handwritten numbers (including decimals) using EasyOCR
def extract_numbers(image_path):
    img = preprocess_image(image_path)
    results = reader.readtext(img, detail=0)  # Extract text without bounding box details

    # Use regex to filter numbers (including decimals)
    numbers_only = re.findall(r'\d+\.?\d*', ''.join(results))
    return numbers_only

# Save the results to a CSV file
def save_to_csv(output_file, results):
    with open(output_file, mode='w', newline='') as file:
        writer = csv.writer(file)
        writer.writerow(['Image Name', 'Extracted Numbers'])  # Header row

        # Write image name and extracted numbers
        for image_name, numbers in results.items():
            writer.writerow([image_name, ', '.join(numbers)])  # Join numbers with commas

# Main function
def process_images(image_files, output_file):
    results = {}

    # Iterate through each specified image file
    for image_file in image_files:
        extracted_numbers = extract_numbers(image_file)
        results[image_file.split('/')[-1]] = extracted_numbers  # Save file name and numbers

    # Save results to CSV
    save_to_csv(output_file, results)
    print(f"Results saved to {output_file}")

# File paths
image_files = [
    'VW_DTN01_A_00002_101000000000000000002051033000000.front.JPG',
    'VW_DTN01_L_00007_101000000000000000002038853600000.front.JPG'
]
output_csv = 'extracted_numbers2.csv'  # Output CSV file

# Run the processing
process_images(image_files, output_csv)


Using CPU. Note: This module is much faster with a GPU.


Results saved to extracted_numbers2.csv
