# Conversión de script .py a notebook .ipynb
Este notebook contiene el código del cargador de documentos PDF.

In [None]:
from dataclasses import dataclass
from typing import List, Dict, Any
from pathlib import Path
from PyPDF2 import PdfReader

In [None]:
@dataclass
class Doc:
    path: str
    pages: List[str]
    meta: Dict[str, Any]

In [None]:
class DocLoader:
    def __init__(self):
        pass

    def load(self, input_path: str) -> List[Doc]:
        p = Path(input_path)
        docs = []
        if p.is_dir():
            files = sorted([x for x in p.iterdir() if x.suffix.lower() in {'.pdf'}])
        else:
            files = [p]
        for file in files:
            try:
                reader = PdfReader(str(file))
                pages = []
                for page in reader.pages:
                    try:
                        txt = page.extract_text() or ""
                    except Exception:
                        txt = ""
                    pages.append(txt)
                meta = {"n_pages": len(pages), "name": file.name}
                docs.append(Doc(path=str(file), pages=pages, meta=meta))
            except Exception as e:
                docs.append(Doc(path=str(file), pages=[""], meta={"error": str(e), "name": file.name}))
        return docs

## Ejemplo de uso
Descomenta y ajusta la ruta para probar con un PDF.

In [None]:
# loader = DocLoader()
# documentos = loader.load("/ruta/a/tu/pdf")
# documentos[0].meta