# Libraries

In [8]:
import cv2
import numpy as np
import math
import os
from PIL import Image, ImageEnhance, ImageFilter, ImageOps
import pytesseract
from deskew import determine_skew
import imutils
import re

import pytesseract
import shutil

pytesseract.pytesseract.tesseract_cmd = '/home/jan/miniconda3/bin/tesseract'


# picture preprocessing

## v2

In [None]:
import cv2
import numpy as np
import pytesseract
from PIL import Image, ImageEnhance
import imutils
import os
import math

def find_receipt_in_image(image_path):
    """
    Finds and extracts only the receipt portion using color thresholding
    to better handle white receipts on darker backgrounds.
    
    Args:
        image_path: Path to the input image
        
    Returns:
        The extracted receipt image as a numpy array
    """
    # Read the image
    image = cv2.imread(image_path)
    if image is None:
        raise ValueError(f"Could not read image at {image_path}")
        
    original = image.copy()
    
    # Convert to grayscale
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    
    # Apply threshold to isolate white/light regions
    # Use Otsu's method to automatically determine optimal threshold
    _, thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
    
    # Clean up the threshold with morphological operations
    kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (5, 5))
    closed = cv2.morphologyEx(thresh, cv2.MORPH_CLOSE, kernel, iterations=3)
    
    # Find contours in the thresholded image
    contours, _ = cv2.findContours(closed, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    
    # Sort contours by area (largest first)
    contours = sorted(contours, key=cv2.contourArea, reverse=True)
    
    # If no contours found, return original image
    if not contours:
        return original
    
    # Take the largest contour as the receipt
    receipt_contour = contours[0]
    
    # Create a mask for the receipt
    mask = np.zeros(gray.shape, dtype=np.uint8)
    cv2.drawContours(mask, [receipt_contour], -1, 255, -1)
    
    # Apply the mask to extract just the receipt
    receipt_only = cv2.bitwise_and(original, original, mask=mask)
    
    # Get a tight bounding rectangle around the receipt
    x, y, w, h = cv2.boundingRect(receipt_contour)
    cropped = receipt_only[y:y+h, x:x+w]
    
    # Return original if crop is too small
    if cropped.size == 0 or w < image.shape[1] * 0.2 or h < image.shape[0] * 0.2:
        return original
        
    return cropped

def determine_skew(image):
    """
    Determine the skew angle of text in an image.
    
    Args:
        image: Grayscale input image
        
    Returns:
        The skew angle in degrees
    """
    # Apply threshold
    _, binary = cv2.threshold(image, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)
    
    # Enhance horizontal lines
    kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (30, 1))
    morph = cv2.morphologyEx(binary, cv2.MORPH_OPEN, kernel)
    
    # Find contours
    contours, _ = cv2.findContours(morph, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    
    angles = []
    weights = []
    
    for contour in contours:
        # Skip very small contours
        if cv2.contourArea(contour) < 100:
            continue
            
        # Get minimum area rectangle
        rect = cv2.minAreaRect(contour)
        angle = rect[2]
        
        # Normalize angle
        if angle < -45:
            angle = 90 + angle
        elif angle > 45:
            angle = angle - 90
            
        angles.append(angle)
        weights.append(cv2.arcLength(contour, True))
    
    # Return weighted average angle if we have contours
    if angles:
        return np.average(angles, weights=weights)
    
    return 0

def deskew_image(image):
    """
    Corrects skew using a gentler approach that preserves detail.
    
    Args:
        image: The input image as numpy array
        
    Returns:
        The deskewed image
    """
    # Convert to grayscale if needed
    if len(image.shape) == 3:
        gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    else:
        gray = image.copy()
    
    # Determine skew angle
    angle = determine_skew(gray)
    
    # Only correct if angle is significant (>1 degree)
    if abs(angle) > 1:
        h, w = image.shape[:2]
        center = (w // 2, h // 2)
        
        # Rotate with cubic interpolation and border replication
        M = cv2.getRotationMatrix2D(center, angle, 1.0)
        rotated = cv2.warpAffine(image, M, (w, h), 
                                flags=cv2.INTER_CUBIC, 
                                borderMode=cv2.BORDER_REPLICATE)
        return rotated
    
    return image

def correct_orientation(image):
    """
    Makes sure text in the receipt is right-side up by comparing OCR confidence
    in different orientations.
    
    Args:
        image: The input image
        
    Returns:
        The correctly oriented image
    """
    # Convert to grayscale if not already
    if len(image.shape) == 3:
        gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    else:
        gray = image.copy()
    
    # Only test upright vs upside-down (0° and 180°)
    orientations = [
        gray,                              # Original (0°)
        cv2.rotate(gray, cv2.ROTATE_180)   # Upside down (180°)
    ]
    
    best_orientation = 0
    best_confidence = -1
    
    for i, img in enumerate(orientations):
        # Enhance the image for OCR
        _, binary = cv2.threshold(img, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
        
        # Use Tesseract to get OCR data with confidence values
        try:
            ocr_data = pytesseract.image_to_data(binary, config='--psm 11', output_type=pytesseract.Output.DICT)
            
            # Calculate average confidence (ignoring zero confidences)
            confidences = [conf for conf in ocr_data['conf'] if conf > 0]
            avg_confidence = sum(confidences) / len(confidences) if confidences else 0
            
            # Count words with reasonable confidence
            valid_words = sum(1 for conf in ocr_data['conf'] if conf > 30)
            
            # Combined score: average confidence * number of valid words
            score = avg_confidence * valid_words
            
            if score > best_confidence:
                best_confidence = score
                best_orientation = i
                
        except Exception as e:
            print(f"OCR error during orientation detection: {e}")
            # If OCR fails, keep the original orientation
            pass
    
    # If OCR-based detection failed or gave low confidence overall, 
    # just keep the original orientation
    if best_confidence < 10:
        return image
    
    # Apply the selected orientation to the original image
    if best_orientation == 0:
        return image
    else:
        return cv2.rotate(image, cv2.ROTATE_180)

def enhance_for_ocr(image, binarize=False):
    """
    Enhances an image for OCR with emphasis on preserving detail and readability.
    
    Args:
        image: The input image (numpy array)
        binarize: Whether to return a binary or grayscale image (default: False)
        
    Returns:
        Enhanced image as numpy array
    """
    # Convert to PIL for enhancement operations
    if isinstance(image, np.ndarray):
        if len(image.shape) == 3:
            pil_img = Image.fromarray(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
        else:
            pil_img = Image.fromarray(image)
    else:
        pil_img = image
    
    # Moderate contrast enhancement
    enhancer = ImageEnhance.Contrast(pil_img)
    enhanced = enhancer.enhance(1.3)  # Reduced from 1.5
    
    # Convert back to numpy
    np_img = np.array(enhanced)
    
    # Convert to grayscale if needed
    if len(np_img.shape) == 3:
        gray = cv2.cvtColor(np_img, cv2.COLOR_RGB2GRAY)
    else:
        gray = np_img
    
    # Improve contrast using CLAHE (Contrast Limited Adaptive Histogram Equalization)
    clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8))
    enhanced_gray = clahe.apply(gray)
    
    # Apply gentle Gaussian blur to reduce noise without losing detail
    denoised = cv2.GaussianBlur(enhanced_gray, (3, 3), 0)
    
    # Apply unsharp masking for crisp edges
    gaussian = cv2.GaussianBlur(denoised, (0, 0), 3.0)
    unsharp_mask = cv2.addWeighted(denoised, 1.5, gaussian, -0.5, 0)
    
    # If binary output is requested, apply Otsu's thresholding
    if binarize:
        _, binary = cv2.threshold(unsharp_mask, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
        return binary
    
    return unsharp_mask

def process_receipt_for_gemini(image_path, output_path=None, binarize=False):
    """
    Process a receipt image for optimal OCR with improved enhancement.
    
    Args:
        image_path: Path to the input image
        output_path: Path to save the processed image
        binarize: Whether to binarize the final image (default: False)
    
    Returns:
        The processed image ready for API submission
    """
    try:
        # Step 1: Extract receipt from image
        receipt_only = find_receipt_in_image(image_path)
        
        # Step 2: Correct skew with improved angle detection
        deskewed = deskew_image(receipt_only)
        
        # Step 3: Fix orientation with improved confidence-based detection
        oriented = correct_orientation(deskewed)
        
        # Step 4: Enhance for OCR with improved detail preservation
        enhanced = enhance_for_ocr(oriented, binarize=binarize)
        
        # Save result if output path provided
        if output_path:
            os.makedirs(os.path.dirname(output_path), exist_ok=True)
            cv2.imwrite(output_path, enhanced, [cv2.IMWRITE_PNG_COMPRESSION, 0])
            print(f"Processed receipt saved to {output_path}")
        
        return enhanced
        
    except Exception as e:
        print(f"Error processing receipt: {e}")
        return cv2.imread(image_path) if os.path.exists(image_path) else None

def main():
    input_path = "../data/receipts/test.png"
    output_path = "../data/processed_receipts/test_processed.png"
    
    processed_img = process_receipt_for_gemini(input_path, output_path)
    
if __name__ == "__main__":
    main()

Processed receipt saved to ../data/processed_receipts/test_processed.png


# load model

# fill database