-
Notifications
You must be signed in to change notification settings - Fork 454
/
document_question_answering.py
85 lines (72 loc) · 3.19 KB
/
document_question_answering.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
# Inference code generated from the JSON schema spec in @huggingface/tasks.
#
# See:
# - script: https://github.com/huggingface/huggingface.js/blob/main/packages/tasks/scripts/inference-codegen.ts
# - specs: https://github.com/huggingface/huggingface.js/tree/main/packages/tasks/src/tasks.
from dataclasses import dataclass
from typing import Any, List, Optional, Union
from .base import BaseInferenceType
@dataclass
class DocumentQuestionAnsweringInputData(BaseInferenceType):
"""One (document, question) pair to answer"""
image: Any
"""The image on which the question is asked"""
question: str
"""A question to ask of the document"""
@dataclass
class DocumentQuestionAnsweringParameters(BaseInferenceType):
"""Additional inference parameters
Additional inference parameters for Document Question Answering
"""
doc_stride: Optional[int] = None
"""If the words in the document are too long to fit with the question for the model, it will
be split in several chunks with some overlap. This argument controls the size of that
overlap.
"""
handle_impossible_answer: Optional[bool] = None
"""Whether to accept impossible as an answer"""
lang: Optional[str] = None
"""Language to use while running OCR. Defaults to english."""
max_answer_len: Optional[int] = None
"""The maximum length of predicted answers (e.g., only answers with a shorter length are
considered).
"""
max_question_len: Optional[int] = None
"""The maximum length of the question after tokenization. It will be truncated if needed."""
max_seq_len: Optional[int] = None
"""The maximum length of the total sentence (context + question) in tokens of each chunk
passed to the model. The context will be split in several chunks (using doc_stride as
overlap) if needed.
"""
top_k: Optional[int] = None
"""The number of answers to return (will be chosen by order of likelihood). Can return less
than top_k answers if there are not enough options available within the context.
"""
word_boxes: Optional[List[Union[List[float], str]]] = None
"""A list of words and bounding boxes (normalized 0->1000). If provided, the inference will
skip the OCR step and use the provided bounding boxes instead.
"""
@dataclass
class DocumentQuestionAnsweringInput(BaseInferenceType):
"""Inputs for Document Question Answering inference"""
inputs: DocumentQuestionAnsweringInputData
"""One (document, question) pair to answer"""
parameters: Optional[DocumentQuestionAnsweringParameters] = None
"""Additional inference parameters"""
@dataclass
class DocumentQuestionAnsweringOutputElement(BaseInferenceType):
"""Outputs of inference for the Document Question Answering task"""
answer: str
"""The answer to the question."""
end: int
"""The end word index of the answer (in the OCR’d version of the input or provided word
boxes).
"""
score: float
"""The probability associated to the answer."""
start: int
"""The start word index of the answer (in the OCR’d version of the input or provided word
boxes).
"""
words: List[int]
"""The index of each word/box pair that is in the answer"""