From e5c9f6f4fb10c0a42dcd90c8d25d7706e53d98e2 Mon Sep 17 00:00:00 2001 From: Adam Dobrawy Date: Sat, 7 Mar 2026 13:43:50 +0100 Subject: [PATCH] Accept file paths, bytes, and file objects in ImageToTextTask ImageToTextTask now accepts three input types instead of only file objects: file path (str/Path), raw bytes, or file-like object (existing behavior). The image is base64-encoded eagerly in __init__ instead of lazily in serialize(). Co-Authored-By: Claude Opus 4.6 --- python_anticaptcha/tasks.py | 17 ++++++++++++----- tests/test_tasks.py | 28 +++++++++++++++++++++++----- 2 files changed, 35 insertions(+), 10 deletions(-) diff --git a/python_anticaptcha/tasks.py b/python_anticaptcha/tasks.py index 0464233..3eaf9bd 100644 --- a/python_anticaptcha/tasks.py +++ b/python_anticaptcha/tasks.py @@ -1,7 +1,8 @@ from __future__ import annotations import base64 -from typing import Any, BinaryIO +from pathlib import Path +from typing import Any, BinaryIO, Union class BaseTask: @@ -139,7 +140,7 @@ class FunCaptchaTask(ProxyMixin, UserAgentMixin, CookieMixin, FunCaptchaProxyles class ImageToTextTask(BaseTask): type = "ImageToTextTask" - fp = None + _body = None phrase = None case = None numeric = None @@ -151,7 +152,7 @@ class ImageToTextTask(BaseTask): def __init__( self, - fp: BinaryIO, + image: Union[str, Path, bytes, BinaryIO], phrase: bool | None = None, case: bool | None = None, numeric: int | None = None, @@ -163,7 +164,13 @@ def __init__( *args: Any, **kwargs: Any, ) -> None: - self.fp = fp + if isinstance(image, (str, Path)): + with open(image, "rb") as f: + self._body = base64.b64encode(f.read()).decode("utf-8") + elif isinstance(image, bytes): + self._body = base64.b64encode(image).decode("utf-8") + else: + self._body = base64.b64encode(image.read()).decode("utf-8") self.phrase = phrase self.case = case self.numeric = numeric @@ -176,7 +183,7 @@ def __init__( def serialize(self, **result: Any) -> dict[str, Any]: data = super().serialize(**result) - data["body"] = base64.b64encode(self.fp.read()).decode("utf-8") + data["body"] = self._body if self.phrase is not None: data["phrase"] = self.phrase if self.case is not None: diff --git a/tests/test_tasks.py b/tests/test_tasks.py index 9ee0e96..c26540b 100644 --- a/tests/test_tasks.py +++ b/tests/test_tasks.py @@ -138,22 +138,40 @@ def test_type(self): class TestImageToTextTask: def test_serialize_base64(self): fp = io.BytesIO(b"fake image data") - task = ImageToTextTask(fp=fp) + task = ImageToTextTask(fp) data = task.serialize() assert data["type"] == "ImageToTextTask" assert data["body"] == "ZmFrZSBpbWFnZSBkYXRh" + def test_from_bytes(self): + task = ImageToTextTask(b"fake image data") + data = task.serialize() + assert data["type"] == "ImageToTextTask" + assert data["body"] == "ZmFrZSBpbWFnZSBkYXRh" + + def test_from_file_path(self, tmp_path): + img = tmp_path / "captcha.jpeg" + img.write_bytes(b"fake image data") + task = ImageToTextTask(str(img)) + data = task.serialize() + assert data["body"] == "ZmFrZSBpbWFnZSBkYXRh" + + def test_from_pathlib_path(self, tmp_path): + img = tmp_path / "captcha.jpeg" + img.write_bytes(b"fake image data") + task = ImageToTextTask(img) + data = task.serialize() + assert data["body"] == "ZmFrZSBpbWFnZSBkYXRh" + def test_optional_fields_omitted(self): - fp = io.BytesIO(b"data") - task = ImageToTextTask(fp=fp) + task = ImageToTextTask(b"data") data = task.serialize() for key in ["phrase", "case", "numeric", "math", "minLength", "maxLength", "comment", "websiteUrl"]: assert key not in data def test_optional_fields_included(self): - fp = io.BytesIO(b"data") task = ImageToTextTask( - fp=fp, phrase=True, case=True, numeric=1, + b"data", phrase=True, case=True, numeric=1, math=False, min_length=3, max_length=10, comment="solve this", website_url="https://example.com", )