/
visual_question_answering.py
53 lines (40 loc) · 1.63 KB
/
visual_question_answering.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
# Inference code generated from the JSON schema spec in @huggingface/tasks.
#
# See:
# - script: https://github.com/huggingface/huggingface.js/blob/main/packages/tasks/scripts/inference-codegen.ts
# - specs: https://github.com/huggingface/huggingface.js/tree/main/packages/tasks/src/tasks.
from dataclasses import dataclass
from typing import Any, Optional
from .base import BaseInferenceType
@dataclass
class VisualQuestionAnsweringInputData(BaseInferenceType):
"""One (image, question) pair to answer"""
image: Any
"""The image."""
question: Any
"""The question to answer based on the image."""
@dataclass
class VisualQuestionAnsweringParameters(BaseInferenceType):
"""Additional inference parameters
Additional inference parameters for Visual Question Answering
"""
top_k: Optional[int] = None
"""The number of answers to return (will be chosen by order of likelihood). Note that we
return less than topk answers if there are not enough options available within the
context.
"""
@dataclass
class VisualQuestionAnsweringInput(BaseInferenceType):
"""Inputs for Visual Question Answering inference"""
inputs: VisualQuestionAnsweringInputData
"""One (image, question) pair to answer"""
parameters: Optional[VisualQuestionAnsweringParameters] = None
"""Additional inference parameters"""
@dataclass
class VisualQuestionAnsweringOutputElement(BaseInferenceType):
"""Outputs of inference for the Visual Question Answering task"""
label: Any
score: float
"""The associated score / probability"""
answer: Optional[str] = None
"""The answer to the question"""