<a href="https://colab.research.google.com/github/javier-fraga-garcia/ocr-library-db/blob/main/notebooks/notebook-prototype.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Prototipo OCR Library
---

# Configuración entorno

In [None]:
%%capture
!pip install ultralytics
!pip install paddlepaddle
!pip install paddleocr
!pip install pytesseract
!pip install easyocr

In [None]:
from tqdm.auto import tqdm
from uuid import uuid4 as uuid
from pathlib import Path
import json
from ultralytics import YOLO
import cv2
from google.colab.patches import cv2_imshow

# Detección de libros

In [None]:
model = YOLO('yolov8l.pt')

In [None]:
## Configurar rutas
root_dir = Path.cwd()
image_dir = root_dir / 'book-images'
output_dir = root_dir / 'output_images'
ouptut_dir_original = output_dir / 'original'
output_dir_gray = output_dir / 'gray'
output_dir_thres = output_dir / 'threshold'
output_dir_meta = output_dir / 'metadata'
output_dir.mkdir(exist_ok=True, parents=True)
ouptut_dir_original.mkdir(exist_ok=True, parents=True)
output_dir_gray.mkdir(exist_ok=True, parents=True)
output_dir_thres.mkdir(exist_ok=True, parents=True)
output_dir_meta.mkdir(exist_ok=True, parents=True)

In [None]:
def process_image(image):
  if len(image.shape) == 3:
    img = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

  img = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY)
  _, threshold = cv2.threshold(img, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)

  return img, threshold

In [None]:
## Procesar imagenes
for original_image in image_dir.glob('*.jpg'):
  results = model(original_image)
  img = cv2.imread(original_image)

  metadata = []
  for result in results:
    for box in result.boxes:
      if int(box.cls[0]) == 73:
        x1, y1, x2, y2 = map(int, box.xyxy[0])
        conf = float(box.conf[0])

        cropped = img[y1:y2, x1:x2]
        cropped = cv2.rotate(cropped, cv2.ROTATE_90_CLOCKWISE)
        gray, threshold = process_image(cropped)

        versions = {
            'original': cropped,
            'gray': gray,
            'threshold': threshold
        }

        book_id = str(uuid())

        for version, image in versions.items():
          filename = f'book_{version}_{book_id}.jpg'
          filepath = output_dir / version / filename
          cv2.imwrite(str(filepath), image)

          metadata.append({
              'filename': filename,
              'coords': [x1, y1, x2, y2],
              'confidence': conf,
              'original_image': original_image.name
          })
  with open(output_dir_meta / f'metadata_{original_image.name}.json', 'w') as f:
    json.dump(metadata, f, indent=2)

# OCR

## Inicializar modelos

In [None]:
from paddleocr import PaddleOCR
import pytesseract
import easyocr

In [None]:
ocr = PaddleOCR(lang='es', use_angle_cls=True)

In [None]:
reader = easyocr.Reader(['es', 'en'])

## Prueba blanco y negro

In [None]:
## Probar todos los modelos
books = list(output_dir_gray.glob('*.jpg'))
results = []
for book in tqdm(books[:10]):
  paddle = ocr.predict(str(book))
  tesseract = pytesseract.image_to_string(str(book))
  easy = ' '.join(reader.readtext(str(book), detail=0))
  results.append({'book': str(book), 'paddle': ' '.join(paddle[0].get('rec_texts')), 'tesseract': tesseract, 'easy': easy})

## Lanzar proceso

In [None]:
results = [
    {'book': str(book), 'text': ' '.join(ocr.predict(str(book))[0].get('rec_texts'))}
    for book in tqdm(list(output_dir_gray.glob('*.jpg')))
]
with open(root_dir / 'results.json', 'w+', encoding='utf-8') as f:
  json.dump({'data': results}, f, indent=2, ensure_ascii=False)
print('Proceso finalizado')