Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
167 changes: 167 additions & 0 deletions examples/docling_picture_description.ipynb

Large diffs are not rendered by default.

12 changes: 12 additions & 0 deletions langchain_docling/_plugins.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
"""Register Docling plugins."""


def picture_description():
"""Picture description plugins."""
from langchain_docling.picture_description import PictureDescriptionLangChainModel

return {
"picture_description": [
PictureDescriptionLangChainModel,
]
}
88 changes: 88 additions & 0 deletions langchain_docling/picture_description.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,88 @@
"""Picture description model using LangChain primitives."""

import base64
import io
from collections.abc import Iterable
from pathlib import Path
from typing import ClassVar, Literal, Optional, Type, Union

from docling.datamodel.accelerator_options import AcceleratorOptions
from docling.datamodel.pipeline_options import PictureDescriptionBaseOptions
from docling.models.picture_description_base_model import PictureDescriptionBaseModel
from docling.models.utils.hf_model_download import HuggingFaceModelDownloadMixin
from langchain_core.language_models.chat_models import BaseChatModel
from PIL import Image


class PictureDescriptionLangChainOptions(PictureDescriptionBaseOptions):
"""Options for the PictureDescriptionLangChainModel."""

kind: ClassVar[Literal["langchain"]] = "langchain"
llm: BaseChatModel
prompt: str = "Describe this document picture in a few sentences."
provenance: Optional[str] = None


class PictureDescriptionLangChainModel(
PictureDescriptionBaseModel, HuggingFaceModelDownloadMixin
):
"""Implementation of a PictureDescription model using LangChain."""

@classmethod
def get_options_type(cls) -> Type[PictureDescriptionBaseOptions]:
"""Define the option type for the factory."""
return PictureDescriptionLangChainOptions

def __init__(
self,
enabled: bool,
enable_remote_services: bool,
artifacts_path: Optional[Union[Path, str]],
options: PictureDescriptionLangChainOptions,
accelerator_options: AcceleratorOptions,
):
"""Initialize PictureDescriptionLangChainModel."""
super().__init__(
enabled=enabled,
enable_remote_services=enable_remote_services,
artifacts_path=artifacts_path,
options=options,
accelerator_options=accelerator_options,
)
self.options: PictureDescriptionLangChainOptions

if self.enabled:
self.llm = self.options.llm
self.provenance = "langchain"
if self.options.provenance:
self.provenance += f"-{self.options.provenance}"

def _annotate_images(self, images: Iterable[Image.Image]) -> Iterable[str]:
"""Annotate the images with the LangChain model."""
# Create input messages
batch_messages = []

for image in images:
buffered = io.BytesIO()
image.save(buffered, format="PNG")
image_data = base64.b64encode(buffered.getvalue()).decode("utf-8")
batch_messages.append(
[
{
"role": "user",
"content": [
{"type": "text", "text": self.options.prompt},
{
"type": "image_url",
"image_url": {
"url": f"data:image/png;base64,{image_data}"
},
},
],
}
]
)

responses = self.llm.batch(batch_messages)
for resp in responses:
yield resp.text()
4 changes: 4 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,7 @@ dev = [
"pytest~=8.3",
"pytest-cov>=6.1.1",
"python-semantic-release~=7.32",
"langchain-openai>=0.2.12",
]

[tool.uv]
Expand All @@ -72,6 +73,9 @@ default-groups = "all"
[tool.setuptools.packages.find]
include = ["langchain_docling*"]

[project.entry-points."docling"]
langchain_docling = "langchain_docling._plugins"

[tool.black]
line-length = 88
target-version = ["py39", "py310"]
Expand Down
Loading
Loading