# Transaction OCR with PaddleOCR

This notebook uses PaddleOCR for optical character recognition on transaction images, then parses the recognized text to extract transaction details.

## Install Required Packages

First, let's install the necessary packages if they're not already installed.

In [None]:
# Install PaddleOCR and dependencies
!pip install paddlepaddle -i https://mirror.baidu.com/pypi/simple
!pip install "paddleocr>=2.0.1" # Recommended to use version 2.0.1+
!pip install opencv-python

## Import Libraries

In [None]:
import os
import re
import json
import uuid
import cv2
from paddleocr import PaddleOCR
import numpy as np
import matplotlib.pyplot as plt

## Initialize PaddleOCR

In [None]:
# Initialize PaddleOCR with desired options
# Adjust language as needed - use 'en' for English, 'ch' for Chinese, etc.
ocr = PaddleOCR(use_angle_cls=True, lang='en', use_gpu=False)

## Define OCR Function

In [None]:
def process_image_with_paddleocr(image_path):
    """
    Process an image using PaddleOCR and return the OCR results.
    
    Args:
        image_path (str): Path to the image file
        
    Returns:
        list: List of detected text with their positions
    """
    # Check if file exists
    if not os.path.exists(image_path):
        print(f"Error: File {image_path} does not exist.")
        return []
    
    # Read image
    img = cv2.imread(image_path)
    if img is None:
        print(f"Error: Could not read image {image_path}.")
        return []
        
    # Run OCR
    try:
        result = ocr.ocr(img, cls=True)
        
        # Format the result similar to your original API response
        formatted_result = []
        
        # PaddleOCR returns a list of lists where each inner list contains a list of coordinates and [text, confidence]
        for line in result[0]:
            if len(line) >= 2:  # Ensure proper structure
                coordinates = line[0]
                text_confidence = line[1]
                
                entry = {
                    "text": text_confidence[0],  # The detected text
                    "confidence": float(text_confidence[1]),  # The confidence score
                    "box": [
                        {"x": int(coordinates[j][0]), "y": int(coordinates[j][1])} 
                        for j in range(4)  # Four corners of the bounding box
                    ]
                }
                formatted_result.append(entry)
                
        return formatted_result
    except Exception as e:
        print(f"OCR Error: {e}")
        return []

## Define Transaction Parsing Function

In [None]:
def parse_transactions(ocr_text):
    """
    Parse transaction details from OCR text.
    
    Args:
        ocr_text (list): List of OCR results with 'text' keys
        
    Returns:
        list: List of parsed transaction dictionaries
    """
    # Combine all OCR text into a single string
    full_text = " ".join(entry['text'].strip() for entry in ocr_text)
    print(f"Full OCR Text: {full_text}")  # Debugging output
    
    # Dynamic regex pattern for transaction details
    transaction_pattern = re.compile(
        r'(?P<tanggal>\d{1,2}[\/\-]\d{1,2})\s+'                         # Match dates like 3/03 or 03-03
        r'(?P<name>[A-Za-z\s]+?)\-(?P<from_bank>[A-Za-z\s]+?)\s+'         # Match names and bank (non-greedy)
        r'IDR\s*(?P<amount>[\d,\.]+)\s+'                                 # Match "IDR" and amount with optional spaces
        r'.*?(?P<no_rek>\d{3}\-\d{3}\-[0-9A-Za-z]{4}\-TAHAPAN)'          # Relax account number to allow digits or letters
    )
    
    matches = transaction_pattern.finditer(full_text)
    
    transactions = []
    for match in matches:
        # Normalize date by replacing potential "O" with "0"
        tanggal = match.group("tanggal").replace("O", "0")
        
        raw_amount = match.group("amount")
        # Remove common thousand separators and periods
        normalized_amount = raw_amount.replace('.', '').replace(',', '')
        # Adjust the amount if necessary (this example divides by 100)
        amount = int(normalized_amount) // 100
        
        transaction = {
            "tanggal": tanggal,
            "name": match.group("name").strip(),
            "from_bank": match.group("from_bank").strip(),
            "amount": amount,
            "no_rek": match.group("no_rek")
        }
        transactions.append(transaction)
    
    return transactions

## Visualization Helper Function

In [None]:
def visualize_ocr_results(image_path, ocr_results):
    """
    Visualize OCR results on the image.
    
    Args:
        image_path (str): Path to the original image
        ocr_results (list): OCR results with text and bounding box information
    """
    # Read the image
    img = cv2.imread(image_path)
    if img is None:
        print(f"Error: Could not read image {image_path} for visualization.")
        return
        
    # Convert to RGB for matplotlib display
    img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    
    # Create a copy for drawing
    img_with_boxes = img_rgb.copy()
    
    # Draw bounding boxes and text
    for entry in ocr_results:
        if 'box' in entry and len(entry['box']) == 4:
            # Convert box points to numpy array of points
            box_points = np.array([[p['x'], p['y']] for p in entry['box']], dtype=np.int32)
            
            # Draw polygon
            cv2.polylines(img_with_boxes, [box_points], True, (0, 255, 0), 2)
            
            # Add text near the top-left corner
            text_position = (box_points[0][0], box_points[0][1] - 10)
            cv2.putText(img_with_boxes, entry['text'][:20], text_position, 
                       cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 0, 0), 1)
    
    # Display the image
    plt.figure(figsize=(15, 15))
    plt.imshow(img_with_boxes)
    plt.title("OCR Results Visualization")
    plt.axis('off')
    plt.show()

## Main Processing Function

In [None]:
def process_transaction_image(image_path, visualize=True):
    """
    Process a transaction image: perform OCR, parse transactions, and optionally visualize results.
    
    Args:
        image_path (str): Path to the transaction image
        visualize (bool): Whether to visualize the OCR results
        
    Returns:
        list: List of parsed transactions
    """
    # Step 1: Process the image with PaddleOCR
    ocr_results = process_image_with_paddleocr(image_path)
    
    if not ocr_results:
        print("No OCR results found.")
        return []
    
    # Step 2: Visualize OCR results if requested
    if visualize:
        visualize_ocr_results(image_path, ocr_results)
    
    # Step 3: Parse transactions from OCR text
    transactions = parse_transactions(ocr_results)
    
    # Step 4: Display parsed transactions
    print(f"\nParsed {len(transactions)} transactions:")
    for i, transaction in enumerate(transactions, 1):
        print(f"Transaction {i}:")
        for key, value in transaction.items():
            print(f"  {key}: {value}")
        print()
    
    return transactions

## Process Sample Images

Replace 'your_transaction_image.jpg' with your actual transaction image path.

In [None]:
# Path to your transaction image
image_path = 'your_transaction_image.jpg'

# Process the image
transactions = process_transaction_image(image_path)

# Save results to JSON if desired
if transactions:
    output_file = f"transactions_{uuid.uuid4()}.json"
    with open(output_file, 'w') as f:
        json.dump(transactions, f, indent=2)
    print(f"Saved transactions to {output_file}")

## Process Multiple Images (Optional)

If you have multiple transaction images to process, you can use the following code.

In [None]:
def process_transaction_folder(folder_path, extensions=['jpg', 'jpeg', 'png']):
    """
    Process all transaction images in a folder.
    
    Args:
        folder_path (str): Path to the folder containing transaction images
        extensions (list): List of valid image extensions to process
        
    Returns:
        dict: Dictionary mapping image filenames to their transactions
    """
    all_transactions = {}
    
    if not os.path.exists(folder_path):
        print(f"Error: Folder {folder_path} does not exist.")
        return all_transactions
    
    # Process each image file in the folder
    for filename in os.listdir(folder_path):
        # Check if the file has a valid extension
        if any(filename.lower().endswith(f'.{ext}') for ext in extensions):
            image_path = os.path.join(folder_path, filename)
            print(f"\nProcessing {filename}...")
            
            # Process the image
            transactions = process_transaction_image(image_path, visualize=True)
            
            # Add to results dictionary
            all_transactions[filename] = transactions
    
    # Save all results to a single JSON file
    if all_transactions:
        output_file = f"all_transactions_{uuid.uuid4()}.json"
        with open(output_file, 'w') as f:
            json.dump(all_transactions, f, indent=2)
        print(f"\nSaved all transactions to {output_file}")
    
    return all_transactions

# Uncomment and modify the following line to process a folder of images
# folder_transactions = process_transaction_folder('your_transaction_images_folder')

## Improving OCR Results (Optional)

If you're having issues with OCR accuracy, you can try preprocessing the images before running OCR.

In [None]:
def preprocess_image(image_path):
    """
    Preprocess an image to improve OCR results.
    
    Args:
        image_path (str): Path to the image file
        
    Returns:
        numpy.ndarray: Preprocessed image
    """
    # Read the image
    img = cv2.imread(image_path)
    if img is None:
        print(f"Error: Could not read image {image_path}.")
        return None
    
    # Convert to grayscale
    gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    
    # Apply thresholding to get binary image
    _, binary = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
    
    # Optional: Apply noise reduction
    denoised = cv2.fastNlMeansDenoising(binary, None, 10, 7, 21)
    
    # Optional: Apply dilation to make text thicker
    kernel = np.ones((2, 2), np.uint8)
    dilated = cv2.dilate(denoised, kernel, iterations=1)
    
    # Display preprocessing stages
    fig, axes = plt.subplots(2, 2, figsize=(15, 10))
    axes[0, 0].imshow(cv2.cvtColor(img, cv2.COLOR_BGR2RGB))
    axes[0, 0].set_title('Original')
    axes[0, 0].axis('off')
    
    axes[0, 1].imshow(gray, cmap='gray')
    axes[0, 1].set_title('Grayscale')
    axes[0, 1].axis('off')
    
    axes[1, 0].imshow(binary, cmap='gray')
    axes[1, 0].set_title('Binary (Thresholded)')
    axes[1, 0].axis('off')
    
    axes[1, 1].imshow(dilated, cmap='gray')
    axes[1, 1].set_title('Dilated')
    axes[1, 1].axis('off')
    
    plt.tight_layout()
    plt.show()
    
    # Save the preprocessed image temporarily
    preprocessed_path = f"preprocessed_{os.path.basename(image_path)}"
    cv2.imwrite(preprocessed_path, dilated)
    print(f"Saved preprocessed image to {preprocessed_path}")
    
    return preprocessed_path

# Example usage:
# preprocessed_image_path = preprocess_image('your_transaction_image.jpg')
# if preprocessed_image_path:
#     transactions = process_transaction_image(preprocessed_image_path)