# Image Processing Pipeline Notebook

This notebook demonstrates the image processing and OCR pipeline steps.

In [None]:
# Setup and imports
%reload_ext autoreload
%autoreload 1
%aimport core.ocr
%aimport core.image
%aimport core.text

import cv2
import core
import conf
from pathlib import Path
from PIL import Image

# Initialize pipeline
pipeline = core.Pipeline()

## Block Segmentation Demo

In [None]:
def process_blocks(preprocess_path: Path, output_dir: str, year: str, page: str):
    """Process image blocks and return segmented blocks."""
    return core.Image(preprocess_path, output_dir).block_segmentation(year, page)

def process_lines(block_path: Path, output_dir: str, year: str, page: str, block_num: int):
    """Process block lines and return segmented lines."""
    return core.Image(block_path, output_dir).line_segmentation(year, page, block_num)

def process_block_text(block_path: Path, year: str, page: str, block_num: int):
    """Process block text using OCR."""
    return core.OCR(block_path).block_to_string(year, page, block_num)

def process_line_text(line_path: Path, year: str, page: str, block_num: int, line_num: int, height: int):
    """Process line text using OCR."""
    return core.OCR(line_path).line_to_string(year, page, block_num, line_num, height)

## Pipeline Execution

In [None]:
# Process blocks
blocks = process_blocks(
    preprocess_path=Path('data/output/preprocess/1922-028.png'),
    output_dir='data/output/block/',
    year='1922',
    page='028'
)

# Process method-specific segmentation
method = pipeline.params.METHOD
strings = []

if method == "BLOCK":
    for block_path, block_num in blocks:
        string = process_block_text(block_path, '1922', '028', block_num)
        strings.append(string)
        
elif method == "LINE":
    for block_path, block_num in blocks:
        lines = process_lines(block_path, 'data/output/line/', '1922', '028', block_num)
        for line_path, line_num, height in lines:
            string = process_line_text(line_path, '1922', '028', block_num, line_num, height)
            strings.append(string)
else:
    raise ValueError(f"Unsupported method: {method}")