-
Notifications
You must be signed in to change notification settings - Fork 38
/
task_model.py
111 lines (86 loc) · 2.99 KB
/
task_model.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
from abc import abstractmethod
from functools import cached_property
from typing import Callable, Generator, Iterable
from uuid import uuid4
from .._cachable import _Cachable
DEFAULT_BATCH_SIZE = 10
def _check_texts_length(
self: "TaskModel", max_length_func: Callable[[list[str]], int], texts: list[str]
):
# Get max text length
max_text_length = max_length_func(texts)
# Check max_new_tokens
if max_text_length > self.model_max_length:
raise ValueError(
"The length of your texts exceeds the max length of the model, use"
" `truncate=True` if you wish to truncate inputs."
)
class TaskModel(_Cachable):
def __init__(self, cache_folder_path: None | str = None):
"""Base class for all task models.
Args:
cache_folder_path: The path to the cache folder. If ``None``, the default
cache folder for the DataDreamer session will be used.
"""
super().__init__(cache_folder_path=cache_folder_path)
@abstractmethod
def count_tokens(self, value: str) -> int:
"""Counts the number of tokens in a string.
Args:
value: The string to count tokens for.
Returns:
The number of tokens in the string.
"""
pass
@property
@abstractmethod
def model_max_length(self) -> int:
pass
@abstractmethod
def run(
self,
texts: Iterable[str],
truncate: bool = False,
batch_size: int = DEFAULT_BATCH_SIZE,
batch_scheduler_buffer_size: None | int = None,
adaptive_batch_size: bool = True,
progress_interval: None | int = 60,
force: bool = False,
cache_only: bool = False,
verbose: None | bool = None,
log_level: None | int = None,
total_num_texts: None | int = None,
return_generator: bool = False,
**kwargs,
) -> Generator[dict[str, float], None, None] | list[dict[str, float]]:
pass
@cached_property
def model_card(self) -> None | str: # pragma: no cover
return None
@cached_property
def license(self) -> None | str: # pragma: no cover
return None
@cached_property
def citation(self) -> None | list[str]: # pragma: no cover
return None
@property
def version(self) -> float: # pragma: no cover
return 1.0
@cached_property
def display_icon(self) -> str: # pragma: no cover
return ""
@cached_property
def display_name(self) -> str: # pragma: no cover
return super().display_name
@cached_property
def _cache_name(self) -> None | str: # pragma: no cover
return None
@property
def _input_type(self) -> str: # pragma: no cover
return "text"
def __ring_key__(self) -> int: # pragma: no cover
return uuid4().int
def unload_model(self): # pragma: no cover # noqa: B027
"""Unloads resources required to run the model from memory."""
pass
__all__ = ["TaskModel"]